1136 files changed, 79438 insertions, 7494 deletions
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 31cfe2c2a2af..c7edba18a6f0 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -199,7 +199,7 @@ config DRM_TTM
 config DRM_TTM_KUNIT_TEST
         tristate "KUnit tests for TTM" if !KUNIT_ALL_TESTS
         default n
-        depends on DRM && KUNIT && MMU
+        depends on DRM && KUNIT && MMU && (UML || COMPILE_TEST)
         select DRM_TTM
         select DRM_EXPORT_FOR_TESTS if m
         select DRM_KUNIT_TEST_HELPERS
@@ -207,7 +207,8 @@ config DRM_TTM_KUNIT_TEST
         help
           Enables unit tests for TTM, a GPU memory manager subsystem used
           to manage memory buffers. This option is mostly useful for kernel
-          developers.
+          developers. It depends on (UML || COMPILE_TEST) since no other driver
+          which uses TTM can be loaded while running the tests.
 
           If in doubt, say "N".
 
@@ -276,6 +277,8 @@ source "drivers/gpu/drm/nouveau/Kconfig"
 
 source "drivers/gpu/drm/i915/Kconfig"
 
+source "drivers/gpu/drm/xe/Kconfig"
+
 source "drivers/gpu/drm/kmb/Kconfig"
 
 config DRM_VGEM
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 8ac6f4b9546e..104b42df2e95 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -134,6 +134,7 @@ obj-$(CONFIG_DRM_RADEON)+= radeon/
 obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/
 obj-$(CONFIG_DRM_AMDGPU)+= amd/amdxcp/
 obj-$(CONFIG_DRM_I915)	+= i915/
+obj-$(CONFIG_DRM_XE)	+= xe/
 obj-$(CONFIG_DRM_KMB_DISPLAY)  += kmb/
 obj-$(CONFIG_DRM_MGAG200) += mgag200/
 obj-$(CONFIG_DRM_V3D)  += v3d/
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 2afecc55090f..260e32ef7bae 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -80,7 +80,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \
 	amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
 	amdgpu_fw_attestation.o amdgpu_securedisplay.o \
 	amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
-	amdgpu_ring_mux.o amdgpu_xcp.o
+	amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o
 
 amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
 
diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
index 02f4c6f9d4f6..576067d66bb9 100644
--- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
@@ -330,6 +330,7 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
 {
 	struct list_head *reset_device_list = reset_context->reset_device_list;
 	struct amdgpu_device *tmp_adev = NULL;
+	struct amdgpu_ras *con;
 	int r;
 
 	if (reset_device_list == NULL)
@@ -355,7 +356,30 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
 		 */
 		amdgpu_register_gpu_instance(tmp_adev);
 
-		/* Resume RAS */
+		/* Resume RAS, ecc_irq */
+		con = amdgpu_ras_get_context(tmp_adev);
+		if (!amdgpu_sriov_vf(tmp_adev) && con) {
+			if (tmp_adev->sdma.ras &&
+				tmp_adev->sdma.ras->ras_block.ras_late_init) {
+				r = tmp_adev->sdma.ras->ras_block.ras_late_init(tmp_adev,
+						&tmp_adev->sdma.ras->ras_block.ras_comm);
+				if (r) {
+					dev_err(tmp_adev->dev, "SDMA failed to execute ras_late_init! ret:%d\n", r);
+					goto end;
+				}
+			}
+
+			if (tmp_adev->gfx.ras &&
+				tmp_adev->gfx.ras->ras_block.ras_late_init) {
+				r = tmp_adev->gfx.ras->ras_block.ras_late_init(tmp_adev,
+						&tmp_adev->gfx.ras->ras_block.ras_comm);
+				if (r) {
+					dev_err(tmp_adev->dev, "GFX failed to execute ras_late_init! ret:%d\n", r);
+					goto end;
+				}
+			}
+		}
+
 		amdgpu_ras_resume(tmp_adev);
 
 		/* Update PSP FW topology after reset */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9d92ca157677..79827a6dcd7f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -109,6 +109,8 @@
 #include "amdgpu_mca.h"
 #include "amdgpu_ras.h"
 #include "amdgpu_xcp.h"
+#include "amdgpu_seq64.h"
+#include "amdgpu_reg_state.h"
 
 #define MAX_GPU_INSTANCE		64
 
@@ -198,6 +200,7 @@ extern uint amdgpu_dc_debug_mask;
 extern uint amdgpu_dc_visual_confirm;
 extern uint amdgpu_dm_abm_level;
 extern int amdgpu_backlight;
+extern int amdgpu_damage_clips;
 extern struct amdgpu_mgpu_info mgpu_info;
 extern int amdgpu_ras_enable;
 extern uint amdgpu_ras_mask;
@@ -250,6 +253,8 @@ extern int amdgpu_seamless;
 extern int amdgpu_user_partt_mode;
 extern int amdgpu_agp;
 
+extern int amdgpu_wbrf;
+
 #define AMDGPU_VM_MAX_NUM_CTX			4096
 #define AMDGPU_SG_THRESHOLD			(256*1024*1024)
 #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS	        3000
@@ -468,6 +473,7 @@ struct amdgpu_fpriv {
 	struct amdgpu_vm	vm;
 	struct amdgpu_bo_va	*prt_va;
 	struct amdgpu_bo_va	*csa_va;
+	struct amdgpu_bo_va	*seq64_va;
 	struct mutex		bo_list_lock;
 	struct idr		bo_list_handles;
 	struct amdgpu_ctx_mgr	ctx_mgr;
@@ -506,6 +512,31 @@ struct amdgpu_allowed_register_entry {
 	bool grbm_indexed;
 };
 
+/**
+ * enum amd_reset_method - Methods for resetting AMD GPU devices
+ *
+ * @AMD_RESET_METHOD_NONE: The device will not be reset.
+ * @AMD_RESET_LEGACY: Method reserved for SI, CIK and VI ASICs.
+ * @AMD_RESET_MODE0: Reset the entire ASIC. Not currently available for the
+ *                   any device.
+ * @AMD_RESET_MODE1: Resets all IP blocks on the ASIC (SDMA, GFX, VCN, etc.)
+ *                   individually. Suitable only for some discrete GPU, not
+ *                   available for all ASICs.
+ * @AMD_RESET_MODE2: Resets a lesser level of IPs compared to MODE1. Which IPs
+ *                   are reset depends on the ASIC. Notably doesn't reset IPs
+ *                   shared with the CPU on APUs or the memory controllers (so
+ *                   VRAM is not lost). Not available on all ASICs.
+ * @AMD_RESET_BACO: BACO (Bus Alive, Chip Off) method powers off and on the card
+ *                  but without powering off the PCI bus. Suitable only for
+ *                  discrete GPUs.
+ * @AMD_RESET_PCI: Does a full bus reset using core Linux subsystem PCI reset
+ *                 and does a secondary bus reset or FLR, depending on what the
+ *                 underlying hardware supports.
+ *
+ * Methods available for AMD GPU driver for resetting the device. Not all
+ * methods are suitable for every device. User can override the method using
+ * module parameter `reset_method`.
+ */
 enum amd_reset_method {
 	AMD_RESET_METHOD_NONE = -1,
 	AMD_RESET_METHOD_LEGACY = 0,
@@ -585,6 +616,10 @@ struct amdgpu_asic_funcs {
 				  const struct amdgpu_video_codecs **codecs);
 	/* encode "> 32bits" smn addressing */
 	u64 (*encode_ext_smn_addressing)(int ext_id);
+
+	ssize_t (*get_reg_state)(struct amdgpu_device *adev,
+				 enum amdgpu_reg_state reg_state, void *buf,
+				 size_t max_size);
 };
 
 /*
@@ -757,6 +792,7 @@ struct amdgpu_mqd_prop {
 	uint64_t eop_gpu_addr;
 	uint32_t hqd_pipe_priority;
 	uint32_t hqd_queue_priority;
+	bool allow_tunneling;
 	bool hqd_active;
 };
 
@@ -986,6 +1022,9 @@ struct amdgpu_device {
 	/* GDS */
 	struct amdgpu_gds		gds;
 
+	/* for userq and VM fences */
+	struct amdgpu_seq64		seq64;
+
 	/* KFD */
 	struct amdgpu_kfd_dev		kfd;
 
@@ -1040,6 +1079,8 @@ struct amdgpu_device {
 	bool				in_s3;
 	bool				in_s4;
 	bool				in_s0ix;
+	/* indicate amdgpu suspension status */
+	bool				suspend_complete;
 
 	enum pp_mp1_state               mp1_state;
 	struct amdgpu_doorbell_index doorbell_index;
@@ -1106,6 +1147,7 @@ struct amdgpu_device {
 	bool                            debug_vm;
 	bool                            debug_largebar;
 	bool                            debug_disable_soft_recovery;
+	bool                            debug_use_vram_fw_buf;
 };
 
 static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,
@@ -1508,9 +1550,11 @@ static inline int amdgpu_acpi_smart_shift_update(struct drm_device *dev,
 #if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND)
 bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev);
 bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev);
+void amdgpu_choose_low_power_state(struct amdgpu_device *adev);
 #else
 static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; }
 static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; }
+static inline void amdgpu_choose_low_power_state(struct amdgpu_device *adev) { }
 #endif
 
 #if defined(CONFIG_DRM_AMD_DC)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 2deebece810e..7099ff9cf8c5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -1519,4 +1519,22 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev)
 #endif /* CONFIG_AMD_PMC */
 }
 
+/**
+ * amdgpu_choose_low_power_state
+ *
+ * @adev: amdgpu_device_pointer
+ *
+ * Choose the target low power state for the GPU
+ */
+void amdgpu_choose_low_power_state(struct amdgpu_device *adev)
+{
+	if (adev->in_runpm)
+		return;
+
+	if (amdgpu_acpi_is_s0ix_active(adev))
+		adev->in_s0ix = true;
+	else if (amdgpu_acpi_is_s3_active(adev))
+		adev->in_s3 = true;
+}
+
 #endif /* CONFIG_SUSPEND */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index b8412202a1b0..41db030ddc4e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -138,6 +138,30 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work)
 	amdgpu_device_gpu_recover(adev, NULL, &reset_context);
 }
 
+static const struct drm_client_funcs kfd_client_funcs = {
+	.unregister	= drm_client_release,
+};
+
+int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev)
+{
+	int ret;
+
+	if (!adev->kfd.init_complete)
+		return 0;
+
+	ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd",
+			      &kfd_client_funcs);
+	if (ret) {
+		dev_err(adev->dev, "Failed to init DRM client: %d\n",
+			ret);
+		return ret;
+	}
+
+	drm_client_register(&adev->kfd.client);
+
+	return 0;
+}
+
 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 {
 	int i;
@@ -547,7 +571,7 @@ int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst,
 	struct amdgpu_device *adev = dst, *peer_adev;
 	int num_links;
 
-	if (adev->asic_type != CHIP_ALDEBARAN)
+	if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 4, 2))
 		return 0;
 
 	if (src)
@@ -684,10 +708,8 @@ err:
 void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle)
 {
 	enum amd_powergating_state state = idle ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE;
-	/* Temporary workaround to fix issues observed in some
-	 * compute applications when GFXOFF is enabled on GFX11.
-	 */
-	if (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11) {
+	if (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11 &&
+	    ((adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK) <= 64)) {
 		pr_debug("GFXOFF is %s\n", idle ? "enabled" : "disabled");
 		amdgpu_gfx_off_ctrl(adev, idle);
 	} else if ((IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 9) &&
@@ -710,35 +732,6 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
 	return false;
 }
 
-int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev,
-				     uint16_t vmid)
-{
-	if (adev->family == AMDGPU_FAMILY_AI) {
-		int i;
-
-		for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
-			amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
-	} else {
-		amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0), 0);
-	}
-
-	return 0;
-}
-
-int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
-				      uint16_t pasid,
-				      enum TLB_FLUSH_TYPE flush_type,
-				      uint32_t inst)
-{
-	bool all_hub = false;
-
-	if (adev->family == AMDGPU_FAMILY_AI ||
-	    adev->family == AMDGPU_FAMILY_RV)
-		all_hub = true;
-
-	return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub, inst);
-}
-
 bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev)
 {
 	return adev->have_atomics_support;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index dac983da961d..27c61c535e29 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -33,6 +33,7 @@
 #include <linux/mmu_notifier.h>
 #include <linux/memremap.h>
 #include <kgd_kfd_interface.h>
+#include <drm/drm_client.h>
 #include "amdgpu_sync.h"
 #include "amdgpu_vm.h"
 #include "amdgpu_xcp.h"
@@ -83,6 +84,7 @@ struct kgd_mem {
 
 	struct amdgpu_sync sync;
 
+	uint32_t gem_handle;
 	bool aql_queue;
 	bool is_imported;
 };
@@ -105,6 +107,9 @@ struct amdgpu_kfd_dev {
 
 	/* HMM page migration MEMORY_DEVICE_PRIVATE mapping */
 	struct dev_pagemap pgmap;
+
+	/* Client for KFD BO GEM handle allocations */
+	struct drm_client_dev client;
 };
 
 enum kgd_engine_type {
@@ -162,11 +167,6 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
 				uint32_t *ib_cmd, uint32_t ib_len);
 void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle);
 bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev);
-int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev,
-				uint16_t vmid);
-int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
-				uint16_t pasid, enum TLB_FLUSH_TYPE flush_type,
-				uint32_t inst);
 
 bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
 
@@ -182,6 +182,8 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
 struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
 				struct mm_struct *mm,
 				struct svm_range_bo *svm_bo);
+
+int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev);
 #if defined(CONFIG_DEBUG_FS)
 int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data);
 #endif
@@ -301,7 +303,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(struct amdgpu_device *adev,
 					  struct kgd_mem *mem, void *drm_priv);
 int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
 		struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv);
-void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv);
+int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv);
 int amdgpu_amdkfd_gpuvm_sync_memory(
 		struct amdgpu_device *adev, struct kgd_mem *mem, bool intr);
 int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,
@@ -311,14 +313,13 @@ void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem);
 int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo);
 
 int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
-					    struct dma_fence **ef);
+					    struct dma_fence __rcu **ef);
 int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev,
 					      struct kfd_vm_fault_info *info);
-int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
-				      struct dma_buf *dmabuf,
-				      uint64_t va, void *drm_priv,
-				      struct kgd_mem **mem, uint64_t *size,
-				      uint64_t *mmap_offset);
+int amdgpu_amdkfd_gpuvm_import_dmabuf_fd(struct amdgpu_device *adev, int fd,
+					 uint64_t va, void *drm_priv,
+					 struct kgd_mem **mem, uint64_t *size,
+					 uint64_t *mmap_offset);
 int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_mem *mem,
 				      struct dma_buf **dmabuf);
 void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index 10d56979fe3b..3a3f3ce09f00 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -200,7 +200,7 @@ int kgd_arcturus_hqd_sdma_dump(struct amdgpu_device *adev,
 #undef HQD_N_REGS
 #define HQD_N_REGS (19+6+7+10)
 
-	*dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
 	if (*dump == NULL)
 		return -ENOMEM;
 
@@ -290,7 +290,7 @@ static int suspend_resume_compute_scheduler(struct amdgpu_device *adev, bool sus
 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
 
-		if (!(ring && drm_sched_wqueue_ready(&ring->sched)))
+		if (!amdgpu_ring_sched_ready(ring))
 			continue;
 
 		/* stop secheduler and drain ring. */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
index 469785d33791..1ef758ac5076 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
@@ -90,7 +90,7 @@ struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
 		return NULL;
 
 	fence = container_of(f, struct amdgpu_amdkfd_fence, base);
-	if (fence && f->ops == &amdkfd_fence_ops)
+	if (f->ops == &amdkfd_fence_ops)
 		return fence;
 
 	return NULL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
index f6598b9e4faa..a5c7259cf2a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
@@ -141,7 +141,7 @@ static int kgd_gfx_v9_4_3_hqd_sdma_dump(struct amdgpu_device *adev,
 		(*dump)[i++][1] = RREG32(addr);         \
 	} while (0)
 
-	*dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
 	if (*dump == NULL)
 		return -ENOMEM;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 6bf448ab3dff..ca4a6b82817f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -214,7 +214,7 @@ static int kgd_hqd_dump(struct amdgpu_device *adev,
 		(*dump)[i++][1] = RREG32(addr);		\
 	} while (0)
 
-	*dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
 	if (*dump == NULL)
 		return -ENOMEM;
 
@@ -301,7 +301,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
 #undef HQD_N_REGS
 #define HQD_N_REGS (19+4)
 
-	*dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
 	if (*dump == NULL)
 		return -ENOMEM;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index cd06e4a6d1da..0f3e2944edd7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -238,7 +238,7 @@ static int kgd_hqd_dump(struct amdgpu_device *adev,
 		(*dump)[i++][1] = RREG32(addr);		\
 	} while (0)
 
-	*dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
 	if (*dump == NULL)
 		return -ENOMEM;
 
@@ -324,7 +324,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
 #undef HQD_N_REGS
 #define HQD_N_REGS (19+4+2+3+7)
 
-	*dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
 	if (*dump == NULL)
 		return -ENOMEM;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 00fbc0f44c92..5a35a8ca8922 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -363,7 +363,7 @@ int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev,
 		(*dump)[i++][1] = RREG32(addr);		\
 	} while (0)
 
-	*dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
 	if (*dump == NULL)
 		return -ENOMEM;
 
@@ -460,7 +460,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
 #undef HQD_N_REGS
 #define HQD_N_REGS (19+6+7+10)
 
-	*dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
 	if (*dump == NULL)
 		return -ENOMEM;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 0bd3c4a6267a..231fd927dcfb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -25,6 +25,7 @@
 #include <linux/pagemap.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/task.h>
+#include <linux/fdtable.h>
 #include <drm/ttm/ttm_tt.h>
 
 #include <drm/drm_exec.h>
@@ -806,13 +807,22 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
 static int kfd_mem_export_dmabuf(struct kgd_mem *mem)
 {
 	if (!mem->dmabuf) {
-		struct dma_buf *ret = amdgpu_gem_prime_export(
-			&mem->bo->tbo.base,
+		struct amdgpu_device *bo_adev;
+		struct dma_buf *dmabuf;
+		int r, fd;
+
+		bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
+		r = drm_gem_prime_handle_to_fd(&bo_adev->ddev, bo_adev->kfd.client.file,
+					       mem->gem_handle,
 			mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
-				DRM_RDWR : 0);
-		if (IS_ERR(ret))
-			return PTR_ERR(ret);
-		mem->dmabuf = ret;
+					       DRM_RDWR : 0, &fd);
+		if (r)
+			return r;
+		dmabuf = dma_buf_get(fd);
+		close_fd(fd);
+		if (WARN_ON_ONCE(IS_ERR(dmabuf)))
+			return PTR_ERR(dmabuf);
+		mem->dmabuf = dmabuf;
 	}
 
 	return 0;
@@ -1384,7 +1394,6 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
 				  amdgpu_amdkfd_restore_userptr_worker);
 
 		*process_info = info;
-		*ef = dma_fence_get(&info->eviction_fence->base);
 	}
 
 	vm->process_info = *process_info;
@@ -1415,6 +1424,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
 	list_add_tail(&vm->vm_list_node,
 			&(vm->process_info->vm_list_head));
 	vm->process_info->n_vms++;
+
+	*ef = dma_fence_get(&vm->process_info->eviction_fence->base);
 	mutex_unlock(&vm->process_info->lock);
 
 	return 0;
@@ -1426,10 +1437,7 @@ validate_pd_fail:
 reserve_pd_fail:
 	vm->process_info = NULL;
 	if (info) {
-		/* Two fence references: one in info and one in *ef */
 		dma_fence_put(&info->eviction_fence->base);
-		dma_fence_put(*ef);
-		*ef = NULL;
 		*process_info = NULL;
 		put_pid(info->pid);
 create_evict_fence_fail:
@@ -1623,7 +1631,8 @@ int amdgpu_amdkfd_criu_resume(void *p)
 		goto out_unlock;
 	}
 	WRITE_ONCE(pinfo->block_mmu_notifications, false);
-	schedule_delayed_work(&pinfo->restore_userptr_work, 0);
+	queue_delayed_work(system_freezable_wq,
+			   &pinfo->restore_userptr_work, 0);
 
 out_unlock:
 	mutex_unlock(&pinfo->lock);
@@ -1779,6 +1788,9 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 		pr_debug("Failed to allow vma node access. ret %d\n", ret);
 		goto err_node_allow;
 	}
+	ret = drm_gem_handle_create(adev->kfd.client.file, gobj, &(*mem)->gem_handle);
+	if (ret)
+		goto err_gem_handle_create;
 	bo = gem_to_amdgpu_bo(gobj);
 	if (bo_type == ttm_bo_type_sg) {
 		bo->tbo.sg = sg;
@@ -1830,6 +1842,8 @@ allocate_init_user_pages_failed:
 err_pin_bo:
 err_validate_bo:
 	remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
+	drm_gem_handle_delete(adev->kfd.client.file, (*mem)->gem_handle);
+err_gem_handle_create:
 	drm_vma_node_revoke(&gobj->vma_node, drm_priv);
 err_node_allow:
 	/* Don't unreserve system mem limit twice */
@@ -1942,8 +1956,11 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 
 	/* Free the BO*/
 	drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
-	if (mem->dmabuf)
+	drm_gem_handle_delete(adev->kfd.client.file, mem->gem_handle);
+	if (mem->dmabuf) {
 		dma_buf_put(mem->dmabuf);
+		mem->dmabuf = NULL;
+	}
 	mutex_destroy(&mem->lock);
 
 	/* If this releases the last reference, it will end up calling
@@ -2068,21 +2085,35 @@ out:
 	return ret;
 }
 
-void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv)
+int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv)
 {
 	struct kfd_mem_attachment *entry;
 	struct amdgpu_vm *vm;
+	int ret;
 
 	vm = drm_priv_to_vm(drm_priv);
 
 	mutex_lock(&mem->lock);
 
+	ret = amdgpu_bo_reserve(mem->bo, true);
+	if (ret)
+		goto out;
+
 	list_for_each_entry(entry, &mem->attachments, list) {
-		if (entry->bo_va->base.vm == vm)
-			kfd_mem_dmaunmap_attachment(mem, entry);
+		if (entry->bo_va->base.vm != vm)
+			continue;
+		if (entry->bo_va->base.bo->tbo.ttm &&
+		    !entry->bo_va->base.bo->tbo.ttm->sg)
+			continue;
+
+		kfd_mem_dmaunmap_attachment(mem, entry);
 	}
 
+	amdgpu_bo_unreserve(mem->bo);
+out:
 	mutex_unlock(&mem->lock);
+
+	return ret;
 }
 
 int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
@@ -2295,34 +2326,26 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev,
 	return 0;
 }
 
-int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
-				      struct dma_buf *dma_buf,
-				      uint64_t va, void *drm_priv,
-				      struct kgd_mem **mem, uint64_t *size,
-				      uint64_t *mmap_offset)
+static int import_obj_create(struct amdgpu_device *adev,
+			     struct dma_buf *dma_buf,
+			     struct drm_gem_object *obj,
+			     uint64_t va, void *drm_priv,
+			     struct kgd_mem **mem, uint64_t *size,
+			     uint64_t *mmap_offset)
 {
 	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
-	struct drm_gem_object *obj;
 	struct amdgpu_bo *bo;
 	int ret;
 
-	obj = amdgpu_gem_prime_import(adev_to_drm(adev), dma_buf);
-	if (IS_ERR(obj))
-		return PTR_ERR(obj);
-
 	bo = gem_to_amdgpu_bo(obj);
 	if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
-				    AMDGPU_GEM_DOMAIN_GTT))) {
+				    AMDGPU_GEM_DOMAIN_GTT)))
 		/* Only VRAM and GTT BOs are supported */
-		ret = -EINVAL;
-		goto err_put_obj;
-	}
+		return -EINVAL;
 
 	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
-	if (!*mem) {
-		ret = -ENOMEM;
-		goto err_put_obj;
-	}
+	if (!*mem)
+		return -ENOMEM;
 
 	ret = drm_vma_node_allow(&obj->vma_node, drm_priv);
 	if (ret)
@@ -2372,8 +2395,41 @@ err_remove_mem:
 	drm_vma_node_revoke(&obj->vma_node, drm_priv);
 err_free_mem:
 	kfree(*mem);
+	return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_import_dmabuf_fd(struct amdgpu_device *adev, int fd,
+					 uint64_t va, void *drm_priv,
+					 struct kgd_mem **mem, uint64_t *size,
+					 uint64_t *mmap_offset)
+{
+	struct drm_gem_object *obj;
+	uint32_t handle;
+	int ret;
+
+	ret = drm_gem_prime_fd_to_handle(&adev->ddev, adev->kfd.client.file, fd,
+					 &handle);
+	if (ret)
+		return ret;
+	obj = drm_gem_object_lookup(adev->kfd.client.file, handle);
+	if (!obj) {
+		ret = -EINVAL;
+		goto err_release_handle;
+	}
+
+	ret = import_obj_create(adev, obj->dma_buf, obj, va, drm_priv, mem, size,
+				mmap_offset);
+	if (ret)
+		goto err_put_obj;
+
+	(*mem)->gem_handle = handle;
+
+	return 0;
+
 err_put_obj:
 	drm_gem_object_put(obj);
+err_release_handle:
+	drm_gem_handle_delete(adev->kfd.client.file, handle);
 	return ret;
 }
 
@@ -2426,7 +2482,8 @@ int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
 				       KFD_QUEUE_EVICTION_TRIGGER_USERPTR);
 		if (r)
 			pr_err("Failed to quiesce KFD\n");
-		schedule_delayed_work(&process_info->restore_userptr_work,
+		queue_delayed_work(system_freezable_wq,
+			&process_info->restore_userptr_work,
 			msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
 	}
 	mutex_unlock(&process_info->notifier_lock);
@@ -2749,7 +2806,8 @@ unlock_out:
 
 	/* If validation failed, reschedule another attempt */
 	if (evicted_bos) {
-		schedule_delayed_work(&process_info->restore_userptr_work,
+		queue_delayed_work(system_freezable_wq,
+			&process_info->restore_userptr_work,
 			msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
 
 		kfd_smi_event_queue_restore_rescheduled(mm);
@@ -2758,6 +2816,23 @@ unlock_out:
 	put_task_struct(usertask);
 }
 
+static void replace_eviction_fence(struct dma_fence __rcu **ef,
+				   struct dma_fence *new_ef)
+{
+	struct dma_fence *old_ef = rcu_replace_pointer(*ef, new_ef, true
+		/* protected by process_info->lock */);
+
+	/* If we're replacing an unsignaled eviction fence, that fence will
+	 * never be signaled, and if anyone is still waiting on that fence,
+	 * they will hang forever. This should never happen. We should only
+	 * replace the fence in restore_work that only gets scheduled after
+	 * eviction work signaled the fence.
+	 */
+	WARN_ONCE(!dma_fence_is_signaled(old_ef),
+		  "Replacing unsignaled eviction fence");
+	dma_fence_put(old_ef);
+}
+
 /** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given
  *   KFD process identified by process_info
  *
@@ -2776,12 +2851,11 @@ unlock_out:
  * 7.  Add fence to all PD and PT BOs.
  * 8.  Unreserve all BOs
  */
-int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
+int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu **ef)
 {
 	struct amdkfd_process_info *process_info = info;
 	struct amdgpu_vm *peer_vm;
 	struct kgd_mem *mem;
-	struct amdgpu_amdkfd_fence *new_fence;
 	struct list_head duplicate_save;
 	struct amdgpu_sync sync_obj;
 	unsigned long failed_size = 0;
@@ -2825,12 +2899,6 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 	if (ret)
 		goto validate_map_fail;
 
-	ret = process_sync_pds_resv(process_info, &sync_obj);
-	if (ret) {
-		pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n");
-		goto validate_map_fail;
-	}
-
 	/* Validate BOs and map them to GPUVM (update VM page tables). */
 	list_for_each_entry(mem, &process_info->kfd_bo_list,
 			    validate_list) {
@@ -2881,6 +2949,19 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 	if (failed_size)
 		pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size);
 
+	/* Update mappings not managed by KFD */
+	list_for_each_entry(peer_vm, &process_info->vm_list_head,
+			vm_list_node) {
+		struct amdgpu_device *adev = amdgpu_ttm_adev(
+			peer_vm->root.bo->tbo.bdev);
+
+		ret = amdgpu_vm_handle_moved(adev, peer_vm, &exec.ticket);
+		if (ret) {
+			pr_debug("Memory eviction: handle moved failed. Try again\n");
+			goto validate_map_fail;
+		}
+	}
+
 	/* Update page directories */
 	ret = process_update_pds(process_info, &sync_obj);
 	if (ret) {
@@ -2888,25 +2969,47 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 		goto validate_map_fail;
 	}
 
+	/* Sync with fences on all the page tables. They implicitly depend on any
+	 * move fences from amdgpu_vm_handle_moved above.
+	 */
+	ret = process_sync_pds_resv(process_info, &sync_obj);
+	if (ret) {
+		pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n");
+		goto validate_map_fail;
+	}
+
 	/* Wait for validate and PT updates to finish */
 	amdgpu_sync_wait(&sync_obj, false);
 
-	/* Release old eviction fence and create new one, because fence only
-	 * goes from unsignaled to signaled, fence cannot be reused.
-	 * Use context and mm from the old fence.
+	/* The old eviction fence may be unsignaled if restore happens
+	 * after a GPU reset or suspend/resume. Keep the old fence in that
+	 * case. Otherwise release the old eviction fence and create new
+	 * one, because fence only goes from unsignaled to signaled once
+	 * and cannot be reused. Use context and mm from the old fence.
+	 *
+	 * If an old eviction fence signals after this check, that's OK.
+	 * Anyone signaling an eviction fence must stop the queues first
+	 * and schedule another restore worker.
 	 */
-	new_fence = amdgpu_amdkfd_fence_create(
+	if (dma_fence_is_signaled(&process_info->eviction_fence->base)) {
+		struct amdgpu_amdkfd_fence *new_fence =
+			amdgpu_amdkfd_fence_create(
 				process_info->eviction_fence->base.context,
 				process_info->eviction_fence->mm,
 				NULL);
-	if (!new_fence) {
-		pr_err("Failed to create eviction fence\n");
-		ret = -ENOMEM;
-		goto validate_map_fail;
+
+		if (!new_fence) {
+			pr_err("Failed to create eviction fence\n");
+			ret = -ENOMEM;
+			goto validate_map_fail;
+		}
+		dma_fence_put(&process_info->eviction_fence->base);
+		process_info->eviction_fence = new_fence;
+		replace_eviction_fence(ef, dma_fence_get(&new_fence->base));
+	} else {
+		WARN_ONCE(*ef != &process_info->eviction_fence->base,
+			  "KFD eviction fence doesn't match KGD process_info");
 	}
-	dma_fence_put(&process_info->eviction_fence->base);
-	process_info->eviction_fence = new_fence;
-	*ef = dma_fence_get(&new_fence->base);
 
 	/* Attach new eviction fence to all BOs except pinned ones */
 	list_for_each_entry(mem, &process_info->kfd_bo_list, validate_list) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index 7473a42f7d45..9caba10315a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -103,7 +103,7 @@ int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector)
 	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
 	struct amdgpu_connector_atom_dig *dig_connector;
 	int bpc = 8;
-	unsigned mode_clock, max_tmds_clock;
+	unsigned int mode_clock, max_tmds_clock;
 
 	switch (connector->connector_type) {
 	case DRM_MODE_CONNECTOR_DVII:
@@ -255,6 +255,7 @@ struct edid *amdgpu_connector_edid(struct drm_connector *connector)
 		return amdgpu_connector->edid;
 	} else if (edid_blob) {
 		struct edid *edid = kmemdup(edid_blob->data, edid_blob->length, GFP_KERNEL);
+
 		if (edid)
 			amdgpu_connector->edid = edid;
 	}
@@ -581,6 +582,7 @@ static int amdgpu_connector_set_property(struct drm_connector *connector,
 			amdgpu_encoder = to_amdgpu_encoder(connector->encoder);
 		} else {
 			const struct drm_connector_helper_funcs *connector_funcs = connector->helper_private;
+
 			amdgpu_encoder = to_amdgpu_encoder(connector_funcs->best_encoder(connector));
 		}
 
@@ -797,6 +799,7 @@ static int amdgpu_connector_set_lcd_property(struct drm_connector *connector,
 		amdgpu_encoder = to_amdgpu_encoder(connector->encoder);
 	else {
 		const struct drm_connector_helper_funcs *connector_funcs = connector->helper_private;
+
 		amdgpu_encoder = to_amdgpu_encoder(connector_funcs->best_encoder(connector));
 	}
 
@@ -979,6 +982,41 @@ amdgpu_connector_check_hpd_status_unchanged(struct drm_connector *connector)
 	return false;
 }
 
+static void amdgpu_connector_shared_ddc(enum drm_connector_status *status,
+					struct drm_connector *connector,
+					struct amdgpu_connector *amdgpu_connector)
+{
+	struct drm_connector *list_connector;
+	struct drm_connector_list_iter iter;
+	struct amdgpu_connector *list_amdgpu_connector;
+	struct drm_device *dev = connector->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+
+	if (amdgpu_connector->shared_ddc && *status == connector_status_connected) {
+		drm_connector_list_iter_begin(dev, &iter);
+		drm_for_each_connector_iter(list_connector,
+					    &iter) {
+			if (connector == list_connector)
+				continue;
+			list_amdgpu_connector = to_amdgpu_connector(list_connector);
+			if (list_amdgpu_connector->shared_ddc &&
+			    list_amdgpu_connector->ddc_bus->rec.i2c_id ==
+			     amdgpu_connector->ddc_bus->rec.i2c_id) {
+				/* cases where both connectors are digital */
+				if (list_connector->connector_type != DRM_MODE_CONNECTOR_VGA) {
+					/* hpd is our only option in this case */
+					if (!amdgpu_display_hpd_sense(adev,
+								      amdgpu_connector->hpd.hpd)) {
+						amdgpu_connector_free_edid(connector);
+						*status = connector_status_disconnected;
+					}
+				}
+			}
+		}
+		drm_connector_list_iter_end(&iter);
+	}
+}
+
 /*
  * DVI is complicated
  * Do a DDC probe, if DDC probe passes, get the full EDID so
@@ -1065,32 +1103,7 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
 			 * DDC line.  The latter is more complex because with DVI<->HDMI adapters
 			 * you don't really know what's connected to which port as both are digital.
 			 */
-			if (amdgpu_connector->shared_ddc && (ret == connector_status_connected)) {
-				struct drm_connector *list_connector;
-				struct drm_connector_list_iter iter;
-				struct amdgpu_connector *list_amdgpu_connector;
-
-				drm_connector_list_iter_begin(dev, &iter);
-				drm_for_each_connector_iter(list_connector,
-							    &iter) {
-					if (connector == list_connector)
-						continue;
-					list_amdgpu_connector = to_amdgpu_connector(list_connector);
-					if (list_amdgpu_connector->shared_ddc &&
-					    (list_amdgpu_connector->ddc_bus->rec.i2c_id ==
-					     amdgpu_connector->ddc_bus->rec.i2c_id)) {
-						/* cases where both connectors are digital */
-						if (list_connector->connector_type != DRM_MODE_CONNECTOR_VGA) {
-							/* hpd is our only option in this case */
-							if (!amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) {
-								amdgpu_connector_free_edid(connector);
-								ret = connector_status_disconnected;
-							}
-						}
-					}
-				}
-				drm_connector_list_iter_end(&iter);
-			}
+			amdgpu_connector_shared_ddc(&ret, connector, amdgpu_connector);
 		}
 	}
 
@@ -1192,6 +1205,7 @@ amdgpu_connector_dvi_encoder(struct drm_connector *connector)
 static void amdgpu_connector_dvi_force(struct drm_connector *connector)
 {
 	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
 	if (connector->force == DRM_FORCE_ON)
 		amdgpu_connector->use_digital = false;
 	if (connector->force == DRM_FORCE_ON_DIGITAL)
@@ -1426,6 +1440,7 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force)
 				ret = connector_status_connected;
 			else if (amdgpu_connector->dac_load_detect) { /* try load detection */
 				const struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
+
 				ret = encoder_funcs->detect(encoder, connector);
 			}
 		}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 440e9a6786fc..6adeddfb3d56 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -870,9 +870,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 		struct amdgpu_bo *bo = e->bo;
 		int i;
 
-		e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
-					sizeof(struct page *),
-					GFP_KERNEL | __GFP_ZERO);
+		e->user_pages = kvcalloc(bo->tbo.ttm->num_pages,
+					 sizeof(struct page *),
+					 GFP_KERNEL);
 		if (!e->user_pages) {
 			DRM_ERROR("kvmalloc_array failure\n");
 			r = -ENOMEM;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index c1efa13bccbb..1afbb2e932c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -540,7 +540,11 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
 	while (size) {
 		uint32_t value;
 
-		value = RREG32_PCIE(*pos);
+		if (upper_32_bits(*pos))
+			value = RREG32_PCIE_EXT(*pos);
+		else
+			value = RREG32_PCIE(*pos);
+
 		r = put_user(value, (uint32_t *)buf);
 		if (r)
 			goto out;
@@ -600,7 +604,10 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
 		if (r)
 			goto out;
 
-		WREG32_PCIE(*pos, value);
+		if (upper_32_bits(*pos))
+			WREG32_PCIE_EXT(*pos, value);
+		else
+			WREG32_PCIE(*pos, value);
 
 		result += 4;
 		buf += 4;
@@ -638,6 +645,9 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
 	if (size & 0x3 || *pos & 0x3)
 		return -EINVAL;
 
+	if (!adev->didt_rreg)
+		return -EOPNOTSUPP;
+
 	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
 	if (r < 0) {
 		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
@@ -694,6 +704,9 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
 	if (size & 0x3 || *pos & 0x3)
 		return -EINVAL;
 
+	if (!adev->didt_wreg)
+		return -EOPNOTSUPP;
+
 	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
 	if (r < 0) {
 		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
@@ -749,7 +762,7 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
 	int r;
 
 	if (!adev->smc_rreg)
-		return -EPERM;
+		return -EOPNOTSUPP;
 
 	if (size & 0x3 || *pos & 0x3)
 		return -EINVAL;
@@ -808,7 +821,7 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
 	int r;
 
 	if (!adev->smc_wreg)
-		return -EPERM;
+		return -EOPNOTSUPP;
 
 	if (size & 0x3 || *pos & 0x3)
 		return -EINVAL;
@@ -1665,7 +1678,7 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused)
 	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
 		struct amdgpu_ring *ring = adev->rings[i];
 
-		if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+		if (!amdgpu_ring_sched_ready(ring))
 			continue;
 		drm_sched_wqueue_stop(&ring->sched);
 	}
@@ -1681,7 +1694,7 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused)
 	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
 		struct amdgpu_ring *ring = adev->rings[i];
 
-		if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+		if (!amdgpu_ring_sched_ready(ring))
 			continue;
 		drm_sched_wqueue_start(&ring->sched);
 	}
@@ -1903,8 +1916,8 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
 
 	ring = adev->rings[val];
 
-	if (!ring || !ring->funcs->preempt_ib ||
-	    !drm_sched_wqueue_ready(&ring->sched))
+	if (!amdgpu_ring_sched_ready(ring) ||
+	    !ring->funcs->preempt_ib)
 		return -EINVAL;
 
 	/* the last preemption failed */
@@ -2141,6 +2154,8 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
 	amdgpu_debugfs_firmware_init(adev);
 	amdgpu_ta_if_debugfs_init(adev);
 
+	amdgpu_debugfs_mes_event_log_init(adev);
+
 #if defined(CONFIG_DRM_AMD_DC)
 	if (adev->dc_enabled)
 		dtn_debugfs_init(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
index 371a6f0deb29..0425432d8659 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
@@ -32,3 +32,5 @@ void amdgpu_debugfs_fini(struct amdgpu_device *adev);
 void amdgpu_debugfs_fence_init(struct amdgpu_device *adev);
 void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev);
 void amdgpu_debugfs_gem_init(struct amdgpu_device *adev);
+void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev);
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 2a6684a38714..94bdb5fa6ebc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -162,6 +162,65 @@ static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
 static DEVICE_ATTR(pcie_replay_count, 0444,
 		amdgpu_device_get_pcie_replay_count, NULL);
 
+static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
+					  struct bin_attribute *attr, char *buf,
+					  loff_t ppos, size_t count)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+	ssize_t bytes_read;
+
+	switch (ppos) {
+	case AMDGPU_SYS_REG_STATE_XGMI:
+		bytes_read = amdgpu_asic_get_reg_state(
+			adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
+		break;
+	case AMDGPU_SYS_REG_STATE_WAFL:
+		bytes_read = amdgpu_asic_get_reg_state(
+			adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
+		break;
+	case AMDGPU_SYS_REG_STATE_PCIE:
+		bytes_read = amdgpu_asic_get_reg_state(
+			adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
+		break;
+	case AMDGPU_SYS_REG_STATE_USR:
+		bytes_read = amdgpu_asic_get_reg_state(
+			adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
+		break;
+	case AMDGPU_SYS_REG_STATE_USR_1:
+		bytes_read = amdgpu_asic_get_reg_state(
+			adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return bytes_read;
+}
+
+BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
+	 AMDGPU_SYS_REG_STATE_END);
+
+int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
+{
+	int ret;
+
+	if (!amdgpu_asic_get_reg_state_supported(adev))
+		return 0;
+
+	ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
+
+	return ret;
+}
+
+void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
+{
+	if (!amdgpu_asic_get_reg_state_supported(adev))
+		return;
+	sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
+}
+
 /**
  * DOC: board_info
  *
@@ -1485,6 +1544,7 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
 				return true;
 
 			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
+			release_firmware(adev->pm.fw);
 			if (fw_ver < 0x00160e00)
 				return true;
 		}
@@ -1540,7 +1600,7 @@ bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
 	if (adev->mman.keep_stolen_vga_memory)
 		return false;
 
-	return adev->ip_versions[DCE_HWIP][0] >= IP_VERSION(3, 0, 0);
+	return amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0);
 }
 
 /*
@@ -1551,11 +1611,15 @@ bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
  * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
  * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
  */
-static bool amdgpu_device_pcie_dynamic_switching_supported(void)
+static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
 {
 #if IS_ENABLED(CONFIG_X86)
 	struct cpuinfo_x86 *c = &cpu_data(0);
 
+	/* eGPU change speeds based on USB4 fabric conditions */
+	if (dev_is_removable(adev->dev))
+		return true;
+
 	if (c->x86_vendor == X86_VENDOR_INTEL)
 		return false;
 #endif
@@ -2188,15 +2252,8 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
 
 	adev->firmware.gpu_info_fw = NULL;
 
-	if (adev->mman.discovery_bin) {
-		/*
-		 * FIXME: The bounding box is still needed by Navi12, so
-		 * temporarily read it from gpu_info firmware. Should be dropped
-		 * when DAL no longer needs it.
-		 */
-		if (adev->asic_type != CHIP_NAVI12)
-			return 0;
-	}
+	if (adev->mman.discovery_bin)
+		return 0;
 
 	switch (adev->asic_type) {
 	default:
@@ -2395,7 +2452,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
 	if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
 		adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
-	if (!amdgpu_device_pcie_dynamic_switching_supported())
+	if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
 		adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
 
 	total = true;
@@ -2676,6 +2733,12 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 					goto init_failed;
 				}
 			}
+
+			r = amdgpu_seq64_init(adev);
+			if (r) {
+				DRM_ERROR("allocate seq64 failed %d\n", r);
+				goto init_failed;
+			}
 		}
 	}
 
@@ -3138,6 +3201,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
 			amdgpu_device_wb_fini(adev);
 			amdgpu_device_mem_scratch_fini(adev);
 			amdgpu_ib_pool_fini(adev);
+			amdgpu_seq64_fini(adev);
 		}
 
 		r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
@@ -3791,10 +3855,6 @@ static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
 		adev->gfx.mcbp = true;
 	else if (amdgpu_mcbp == 0)
 		adev->gfx.mcbp = false;
-	else if ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 0, 0)) &&
-		 (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 0)) &&
-		 adev->gfx.num_gfx_rings)
-		adev->gfx.mcbp = true;
 
 	if (amdgpu_sriov_vf(adev))
 		adev->gfx.mcbp = true;
@@ -4061,23 +4121,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 				}
 			}
 		} else {
-			switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
-			case IP_VERSION(13, 0, 0):
-			case IP_VERSION(13, 0, 7):
-			case IP_VERSION(13, 0, 10):
-				r = psp_gpu_reset(adev);
-				break;
-			default:
-				tmp = amdgpu_reset_method;
-				/* It should do a default reset when loading or reloading the driver,
-				 * regardless of the module parameter reset_method.
-				 */
-				amdgpu_reset_method = AMD_RESET_METHOD_NONE;
-				r = amdgpu_asic_reset(adev);
-				amdgpu_reset_method = tmp;
-				break;
-			}
-
+			tmp = amdgpu_reset_method;
+			/* It should do a default reset when loading or reloading the driver,
+			 * regardless of the module parameter reset_method.
+			 */
+			amdgpu_reset_method = AMD_RESET_METHOD_NONE;
+			r = amdgpu_asic_reset(adev);
+			amdgpu_reset_method = tmp;
 			if (r) {
 				dev_err(adev->dev, "asic reset on init failed\n");
 				goto failed;
@@ -4222,6 +4272,7 @@ fence_driver_init:
 			"Could not create amdgpu board attributes\n");
 
 	amdgpu_fru_sysfs_init(adev);
+	amdgpu_reg_state_sysfs_init(adev);
 
 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
 		r = amdgpu_pmu_init(adev);
@@ -4344,6 +4395,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
 	sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
 	amdgpu_fru_sysfs_fini(adev);
 
+	amdgpu_reg_state_sysfs_fini(adev);
+
 	/* disable ras feature must before hw fini */
 	amdgpu_ras_pre_fini(adev);
 
@@ -4461,13 +4514,15 @@ int amdgpu_device_prepare(struct drm_device *dev)
 	struct amdgpu_device *adev = drm_to_adev(dev);
 	int i, r;
 
+	amdgpu_choose_low_power_state(adev);
+
 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
 		return 0;
 
 	/* Evict the majority of BOs before starting suspend sequence */
 	r = amdgpu_device_evict_resources(adev);
 	if (r)
-		return r;
+		goto unprepare;
 
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 		if (!adev->ip_blocks[i].status.valid)
@@ -4476,10 +4531,15 @@ int amdgpu_device_prepare(struct drm_device *dev)
 			continue;
 		r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev);
 		if (r)
-			return r;
+			goto unprepare;
 	}
 
 	return 0;
+
+unprepare:
+	adev->in_s0ix = adev->in_s3 = false;
+
+	return r;
 }
 
 /**
@@ -4516,12 +4576,9 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
 
 	cancel_delayed_work_sync(&adev->delayed_init_work);
-	flush_delayed_work(&adev->gfx.gfx_off_delay_work);
 
 	amdgpu_ras_suspend(adev);
 
-	amdgpu_ttm_set_buffer_funcs_status(adev, false);
-
 	amdgpu_device_ip_suspend_phase1(adev);
 
 	if (!adev->in_s0ix)
@@ -4531,6 +4588,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
 	if (r)
 		return r;
 
+	amdgpu_ttm_set_buffer_funcs_status(adev, false);
+
 	amdgpu_fence_driver_hw_fini(adev);
 
 	amdgpu_device_ip_suspend_phase2(adev);
@@ -4538,6 +4597,10 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
 	if (amdgpu_sriov_vf(adev))
 		amdgpu_virt_release_full_gpu(adev, false);
 
+	r = amdgpu_dpm_notify_rlc_state(adev, false);
+	if (r)
+		return r;
+
 	return 0;
 }
 
@@ -4964,7 +5027,7 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 		struct amdgpu_ring *ring = adev->rings[i];
 
-		if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+		if (!amdgpu_ring_sched_ready(ring))
 			continue;
 
 		spin_lock(&ring->sched.job_list_lock);
@@ -5103,7 +5166,7 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 		struct amdgpu_ring *ring = adev->rings[i];
 
-		if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+		if (!amdgpu_ring_sched_ready(ring))
 			continue;
 
 		/* Clear job fence from fence drv to avoid force_completion
@@ -5179,7 +5242,6 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
 	struct amdgpu_device *tmp_adev = NULL;
 	bool need_full_reset, skip_hw_reset, vram_lost = false;
 	int r = 0;
-	bool gpu_reset_for_dev_remove = 0;
 
 	/* Try reset handler method first */
 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
@@ -5199,10 +5261,6 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
 	skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
 
-	gpu_reset_for_dev_remove =
-		test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
-			test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
-
 	/*
 	 * ASIC reset has to be done on all XGMI hive nodes ASAP
 	 * to allow proper links negotiation in FW (within 1 sec)
@@ -5245,18 +5303,6 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
 		amdgpu_ras_intr_cleared();
 	}
 
-	/* Since the mode1 reset affects base ip blocks, the
-	 * phase1 ip blocks need to be resumed. Otherwise there
-	 * will be a BIOS signature error and the psp bootloader
-	 * can't load kdb on the next amdgpu install.
-	 */
-	if (gpu_reset_for_dev_remove) {
-		list_for_each_entry(tmp_adev, device_list_handle, reset_list)
-			amdgpu_device_ip_resume_phase1(tmp_adev);
-
-		goto end;
-	}
-
 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
 		if (need_full_reset) {
 			/* post card */
@@ -5493,11 +5539,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 	int i, r = 0;
 	bool need_emergency_restart = false;
 	bool audio_suspended = false;
-	bool gpu_reset_for_dev_remove = false;
-
-	gpu_reset_for_dev_remove =
-			test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
-				test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
 
 	/*
 	 * Special case: RAS triggered and full reset isn't supported
@@ -5535,7 +5576,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 	if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
 			list_add_tail(&tmp_adev->reset_list, &device_list);
-			if (gpu_reset_for_dev_remove && adev->shutdown)
+			if (adev->shutdown)
 				tmp_adev->shutdown = true;
 		}
 		if (!list_is_first(&adev->reset_list, &device_list))
@@ -5592,7 +5633,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 			struct amdgpu_ring *ring = tmp_adev->rings[i];
 
-			if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+			if (!amdgpu_ring_sched_ready(ring))
 				continue;
 
 			drm_sched_stop(&ring->sched, job ? &job->base : NULL);
@@ -5620,10 +5661,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 
 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
-		if (gpu_reset_for_dev_remove) {
-			/* Workaroud for ASICs need to disable SMC first */
-			amdgpu_device_smu_fini_early(tmp_adev);
-		}
 		r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
 		/*TODO Should we stop ?*/
 		if (r) {
@@ -5655,9 +5692,6 @@ retry:	/* Rest of adevs pre asic reset from XGMI hive. */
 		r = amdgpu_do_asic_reset(device_list_handle, reset_context);
 		if (r && r == -EAGAIN)
 			goto retry;
-
-		if (!r && gpu_reset_for_dev_remove)
-			goto recover_end;
 	}
 
 skip_hw_reset:
@@ -5668,7 +5702,7 @@ skip_hw_reset:
 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 			struct amdgpu_ring *ring = tmp_adev->rings[i];
 
-			if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+			if (!amdgpu_ring_sched_ready(ring))
 				continue;
 
 			drm_sched_start(&ring->sched, true);
@@ -5713,7 +5747,6 @@ skip_sched_resume:
 		amdgpu_ras_set_error_query_ready(tmp_adev, true);
 	}
 
-recover_end:
 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
 					    reset_list);
 	amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
@@ -5731,6 +5764,39 @@ recover_end:
 }
 
 /**
+ * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
+ *
+ * @adev: amdgpu_device pointer
+ * @speed: pointer to the speed of the link
+ * @width: pointer to the width of the link
+ *
+ * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
+ * first physical partner to an AMD dGPU.
+ * This will exclude any virtual switches and links.
+ */
+static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
+					    enum pci_bus_speed *speed,
+					    enum pcie_link_width *width)
+{
+	struct pci_dev *parent = adev->pdev;
+
+	if (!speed || !width)
+		return;
+
+	*speed = PCI_SPEED_UNKNOWN;
+	*width = PCIE_LNK_WIDTH_UNKNOWN;
+
+	while ((parent = pci_upstream_bridge(parent))) {
+		/* skip upstream/downstream switches internal to dGPU*/
+		if (parent->vendor == PCI_VENDOR_ID_ATI)
+			continue;
+		*speed = pcie_get_speed_cap(parent);
+		*width = pcie_get_width_cap(parent);
+		break;
+	}
+}
+
+/**
  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
  *
  * @adev: amdgpu_device pointer
@@ -5763,8 +5829,8 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
 	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
 		return;
 
-	pcie_bandwidth_available(adev->pdev, NULL,
-				 &platform_speed_cap, &platform_link_width);
+	amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
+					&platform_link_width);
 
 	if (adev->pm.pcie_gen_mask == 0) {
 		/* asic caps */
@@ -5991,7 +6057,7 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 			struct amdgpu_ring *ring = adev->rings[i];
 
-			if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+			if (!amdgpu_ring_sched_ready(ring))
 				continue;
 
 			drm_sched_stop(&ring->sched, NULL);
@@ -6119,7 +6185,7 @@ void amdgpu_pci_resume(struct pci_dev *pdev)
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 		struct amdgpu_ring *ring = adev->rings[i];
 
-		if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+		if (!amdgpu_ring_sched_ready(ring))
 			continue;
 
 		drm_sched_start(&ring->sched, true);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 0431eafa86b5..c7d60dd0fb97 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -1963,8 +1963,6 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev)
 		amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
 		break;
 	case IP_VERSION(9, 4, 3):
-		if (!amdgpu_exp_hw_support)
-			return -EINVAL;
 		amdgpu_device_ip_block_add(adev, &gfx_v9_4_3_ip_block);
 		break;
 	case IP_VERSION(10, 1, 10):
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 0cacd0b9f8be..b8fbe97efe1d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -340,14 +340,11 @@ int amdgpu_display_crtc_set_config(struct drm_mode_set *set,
 		adev->have_disp_power_ref = true;
 		return ret;
 	}
-	/* if we have no active crtcs, then drop the power ref
-	 * we got before
+	/* if we have no active crtcs, then go to
+	 * drop the power ref we got before
 	 */
-	if (!active && adev->have_disp_power_ref) {
-		pm_runtime_put_autosuspend(dev->dev);
+	if (!active && adev->have_disp_power_ref)
 		adev->have_disp_power_ref = false;
-	}
-
 out:
 	/* drop the power reference we got coming in here */
 	pm_runtime_put_autosuspend(dev->dev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index e7e87a3b2601..decbbe3d4f06 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -42,6 +42,7 @@
 #include <linux/dma-fence-array.h>
 #include <linux/pci-p2pdma.h>
 #include <linux/pm_runtime.h>
+#include "amdgpu_trace.h"
 
 /**
  * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
@@ -63,6 +64,7 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
 		attach->peer2peer = false;
 
 	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+	trace_amdgpu_runpm_reference_dumps(1, __func__);
 	if (r < 0)
 		goto out;
 
@@ -70,6 +72,7 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
 
 out:
 	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+	trace_amdgpu_runpm_reference_dumps(0, __func__);
 	return r;
 }
 
@@ -90,6 +93,7 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
 
 	pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
 	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+	trace_amdgpu_runpm_reference_dumps(0, __func__);
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 8f24cabe2155..586f4d03039d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -115,9 +115,10 @@
  *   3.54.0 - Add AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS support
  * - 3.55.0 - Add AMDGPU_INFO_GPUVM_FAULT query
  * - 3.56.0 - Update IB start address and size alignment for decode and encode
+ * - 3.57.0 - Compute tunneling on GFX10+
  */
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	56
+#define KMS_DRIVER_MINOR	57
 #define KMS_DRIVER_PATCHLEVEL	0
 
 /*
@@ -127,6 +128,7 @@ enum AMDGPU_DEBUG_MASK {
 	AMDGPU_DEBUG_VM = BIT(0),
 	AMDGPU_DEBUG_LARGEBAR = BIT(1),
 	AMDGPU_DEBUG_DISABLE_GPU_SOFT_RECOVERY = BIT(2),
+	AMDGPU_DEBUG_USE_VRAM_FW_BUF = BIT(3),
 };
 
 unsigned int amdgpu_vram_limit = UINT_MAX;
@@ -208,6 +210,8 @@ int amdgpu_umsch_mm;
 int amdgpu_seamless = -1; /* auto */
 uint amdgpu_debug_mask;
 int amdgpu_agp = -1; /* auto */
+int amdgpu_wbrf = -1;
+int amdgpu_damage_clips = -1; /* auto */
 
 static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
 
@@ -857,6 +861,18 @@ MODULE_PARM_DESC(backlight, "Backlight control (0 = pwm, 1 = aux, -1 auto (defau
 module_param_named(backlight, amdgpu_backlight, bint, 0444);
 
 /**
+ * DOC: damageclips (int)
+ * Enable or disable damage clips support. If damage clips support is disabled,
+ * we will force full frame updates, irrespective of what user space sends to
+ * us.
+ *
+ * Defaults to -1 (where it is enabled unless a PSR-SU display is detected).
+ */
+MODULE_PARM_DESC(damageclips,
+		 "Damage clips support (0 = disable, 1 = enable, -1 auto (default))");
+module_param_named(damageclips, amdgpu_damage_clips, int, 0444);
+
+/**
  * DOC: tmz (int)
  * Trusted Memory Zone (TMZ) is a method to protect data being written
  * to or read from memory.
@@ -971,6 +987,22 @@ module_param_named(debug_mask, amdgpu_debug_mask, uint, 0444);
 MODULE_PARM_DESC(agp, "AGP (-1 = auto (default), 0 = disable, 1 = enable)");
 module_param_named(agp, amdgpu_agp, int, 0444);
 
+/**
+ * DOC: wbrf (int)
+ * Enable Wifi RFI interference mitigation feature.
+ * Due to electrical and mechanical constraints there may be likely interference of
+ * relatively high-powered harmonics of the (G-)DDR memory clocks with local radio
+ * module frequency bands used by Wifi 6/6e/7. To mitigate the possible RFI interference,
+ * with this feature enabled, PMFW will use either “shadowed P-State” or “P-State” based
+ * on active list of frequencies in-use (to be avoided) as part of initial setting or
+ * P-state transition. However, there may be potential performance impact with this
+ * feature enabled.
+ * (0 = disabled, 1 = enabled, -1 = auto (default setting, will be enabled if supported))
+ */
+MODULE_PARM_DESC(wbrf,
+	"Enable Wifi RFI interference mitigation (0 = disabled, 1 = enabled, -1 = auto(default)");
+module_param_named(wbrf, amdgpu_wbrf, int, 0444);
+
 /* These devices are not supported by amdgpu.
  * They are supported by the mach64, r128, radeon drivers
  */
@@ -2099,6 +2131,11 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev)
 		pr_info("debug: soft reset for GPU recovery disabled\n");
 		adev->debug_disable_soft_recovery = true;
 	}
+
+	if (amdgpu_debug_mask & AMDGPU_DEBUG_USE_VRAM_FW_BUF) {
+		pr_info("debug: place fw in vram for frontdoor loading\n");
+		adev->debug_use_vram_fw_buf = true;
+	}
 }
 
 static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags)
@@ -2210,6 +2247,8 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
 
 	pci_set_drvdata(pdev, ddev);
 
+	amdgpu_init_debug_options(adev);
+
 	ret = amdgpu_driver_load_kms(adev, flags);
 	if (ret)
 		goto err_pci;
@@ -2229,6 +2268,10 @@ retry_init:
 	if (ret)
 		goto err_pci;
 
+	ret = amdgpu_amdkfd_drm_client_create(adev);
+	if (ret)
+		goto err_pci;
+
 	/*
 	 * 1. don't init fbdev on hw without DCE
 	 * 2. don't init fbdev if there are no connectors
@@ -2263,6 +2306,8 @@ retry_init:
 		pm_runtime_mark_last_busy(ddev->dev);
 		pm_runtime_put_autosuspend(ddev->dev);
 
+		pci_wake_from_d3(pdev, TRUE);
+
 		/*
 		 * For runpm implemented via BACO, PMFW will handle the
 		 * timing for BACO in and out:
@@ -2288,8 +2333,6 @@ retry_init:
 			amdgpu_get_secondary_funcs(adev);
 	}
 
-	amdgpu_init_debug_options(adev);
-
 	return 0;
 
 err_pci:
@@ -2311,38 +2354,6 @@ amdgpu_pci_remove(struct pci_dev *pdev)
 		pm_runtime_forbid(dev->dev);
 	}
 
-	if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 2) &&
-	    !amdgpu_sriov_vf(adev)) {
-		bool need_to_reset_gpu = false;
-
-		if (adev->gmc.xgmi.num_physical_nodes > 1) {
-			struct amdgpu_hive_info *hive;
-
-			hive = amdgpu_get_xgmi_hive(adev);
-			if (hive->device_remove_count == 0)
-				need_to_reset_gpu = true;
-			hive->device_remove_count++;
-			amdgpu_put_xgmi_hive(hive);
-		} else {
-			need_to_reset_gpu = true;
-		}
-
-		/* Workaround for ASICs need to reset SMU.
-		 * Called only when the first device is removed.
-		 */
-		if (need_to_reset_gpu) {
-			struct amdgpu_reset_context reset_context;
-
-			adev->shutdown = true;
-			memset(&reset_context, 0, sizeof(reset_context));
-			reset_context.method = AMD_RESET_METHOD_NONE;
-			reset_context.reset_req_dev = adev;
-			set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
-			set_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context.flags);
-			amdgpu_device_gpu_recover(adev, NULL, &reset_context);
-		}
-	}
-
 	amdgpu_driver_unload_kms(dev);
 
 	/*
@@ -2478,6 +2489,7 @@ static int amdgpu_pmops_suspend(struct device *dev)
 	struct drm_device *drm_dev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = drm_to_adev(drm_dev);
 
+	adev->suspend_complete = false;
 	if (amdgpu_acpi_is_s0ix_active(adev))
 		adev->in_s0ix = true;
 	else if (amdgpu_acpi_is_s3_active(adev))
@@ -2492,6 +2504,7 @@ static int amdgpu_pmops_suspend_noirq(struct device *dev)
 	struct drm_device *drm_dev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = drm_to_adev(drm_dev);
 
+	adev->suspend_complete = true;
 	if (amdgpu_acpi_should_gpu_reset(adev))
 		return amdgpu_asic_reset(adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index dc230212746a..70bff8cecfda 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -183,6 +183,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd
 	amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
 			       seq, flags | AMDGPU_FENCE_FLAG_INT);
 	pm_runtime_get_noresume(adev_to_drm(adev)->dev);
+	trace_amdgpu_runpm_reference_dumps(1, __func__);
 	ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
 	if (unlikely(rcu_dereference_protected(*ptr, 1))) {
 		struct dma_fence *old;
@@ -310,6 +311,7 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
 		dma_fence_put(fence);
 		pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
 		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		trace_amdgpu_runpm_reference_dumps(0, __func__);
 	} while (last_seq != seq);
 
 	return true;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 73b8cca35bab..c623e23049d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -121,6 +121,7 @@ int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev)
 	struct amdgpu_bo_param bp;
 	dma_addr_t dma_addr;
 	struct page *p;
+	unsigned long x;
 	int ret;
 
 	if (adev->gart.bo != NULL)
@@ -130,6 +131,10 @@ int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev)
 	if (!p)
 		return -ENOMEM;
 
+	/* assign pages to this device */
+	for (x = 0; x < (1UL << order); x++)
+		p[x].mapping = adev->mman.bdev.dev_mapping;
+
 	/* If the hardware does not support UTCL2 snooping of the CPU caches
 	 * then set_memory_wc() could be used as a workaround to mark the pages
 	 * as write combine memory.
@@ -223,6 +228,7 @@ void amdgpu_gart_table_ram_free(struct amdgpu_device *adev)
 	unsigned int order = get_order(adev->gart.table_size);
 	struct sg_table *sg = adev->gart.bo->tbo.sg;
 	struct page *p;
+	unsigned long x;
 	int ret;
 
 	ret = amdgpu_bo_reserve(adev->gart.bo, false);
@@ -234,6 +240,8 @@ void amdgpu_gart_table_ram_free(struct amdgpu_device *adev)
 	sg_free_table(sg);
 	kfree(sg);
 	p = virt_to_page(adev->gart.ptr);
+	for (x = 0; x < (1UL << order); x++)
+		p[x].mapping = NULL;
 	__free_pages(p, order);
 
 	adev->gart.ptr = NULL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index b9674c57c436..6ddc8e3360e2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -723,8 +723,15 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
 
 		if (adev->gfx.gfx_off_req_count == 0 &&
 		    !adev->gfx.gfx_off_state) {
-			schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
+			/* If going to s2idle, no need to wait */
+			if (adev->in_s0ix) {
+				if (!amdgpu_dpm_set_powergating_by_smu(adev,
+						AMD_IP_BLOCK_TYPE_GFX, true))
+					adev->gfx.gfx_off_state = true;
+			} else {
+				schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
 					      delay);
+			}
 		}
 	} else {
 		if (adev->gfx.gfx_off_req_count == 0) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 5f71414190e9..55784a9f26c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -181,6 +181,9 @@ uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
 
+	if (!bo->ttm)
+		return AMDGPU_BO_INVALID_OFFSET;
+
 	if (bo->ttm->num_pages != 1 || bo->ttm->caching == ttm_cached)
 		return AMDGPU_BO_INVALID_OFFSET;
 
@@ -1042,21 +1045,28 @@ int amdgpu_gmc_vram_checking(struct amdgpu_device *adev)
 	 * seconds, so here, we just pick up three parts for emulation.
 	 */
 	ret = memcmp(vram_ptr, cptr, 10);
-	if (ret)
-		return ret;
+	if (ret) {
+		ret = -EIO;
+		goto release_buffer;
+	}
 
 	ret = memcmp(vram_ptr + (size / 2), cptr, 10);
-	if (ret)
-		return ret;
+	if (ret) {
+		ret = -EIO;
+		goto release_buffer;
+	}
 
 	ret = memcmp(vram_ptr + size - 10, cptr, 10);
-	if (ret)
-		return ret;
+	if (ret) {
+		ret = -EIO;
+		goto release_buffer;
+	}
 
+release_buffer:
 	amdgpu_bo_free_kernel(&vram_bo, &vram_gpu,
 			&vram_ptr);
 
-	return 0;
+	return ret;
 }
 
 static ssize_t current_memory_partition_show(
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
index 081267161d40..55b65fc04b65 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
@@ -190,8 +190,8 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
 		pr_debug("hmm range: start = 0x%lx, end = 0x%lx",
 			hmm_range->start, hmm_range->end);
 
-		/* Assuming 128MB takes maximum 1 second to fault page address */
-		timeout = max((hmm_range->end - hmm_range->start) >> 27, 1UL);
+		/* Assuming 64MB takes maximum 1 second to fault page address */
+		timeout = max((hmm_range->end - hmm_range->start) >> 26, 1UL);
 		timeout *= HMM_RANGE_DEFAULT_TIMEOUT;
 		timeout = jiffies + msecs_to_jiffies(timeout);
 
@@ -199,6 +199,7 @@ retry:
 		hmm_range->notifier_seq = mmu_interval_read_begin(notifier);
 		r = hmm_range_fault(hmm_range);
 		if (unlikely(r)) {
+			schedule();
 			/*
 			 * FIXME: This timeout should encompass the retry from
 			 * mmu_interval_read_retry() as well.
@@ -212,7 +213,6 @@ retry:
 			break;
 		hmm_range->hmm_pfns += MAX_WALK_BYTE >> PAGE_SHIFT;
 		hmm_range->start = hmm_range->end;
-		schedule();
 	} while (hmm_range->end < end);
 
 	hmm_range->start = start;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
index 82608df43396..d79cb13e1aa8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
@@ -175,7 +175,6 @@ struct amdgpu_i2c_chan *amdgpu_i2c_create(struct drm_device *dev,
 
 	i2c->rec = *rec;
 	i2c->adapter.owner = THIS_MODULE;
-	i2c->adapter.class = I2C_CLASS_DDC;
 	i2c->adapter.dev.parent = dev->dev;
 	i2c->dev = dev;
 	i2c_set_adapdata(&i2c->adapter, i2c);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 583cf03950cd..bf4f48fe438d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1105,7 +1105,12 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 			if (amdgpu_dpm_read_sensor(adev,
 						   AMDGPU_PP_SENSOR_GPU_AVG_POWER,
 						   (void *)&ui32, &ui32_size)) {
-				return -EINVAL;
+				/* fall back to input power for backwards compat */
+				if (amdgpu_dpm_read_sensor(adev,
+							   AMDGPU_PP_SENSOR_GPU_INPUT_POWER,
+							   (void *)&ui32, &ui32_size)) {
+					return -EINVAL;
+				}
 			}
 			ui32 >>= 8;
 			break;
@@ -1428,6 +1433,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 		fpriv->csa_va = NULL;
 	}
 
+	amdgpu_seq64_unmap(adev, fpriv);
+
 	pasid = fpriv->vm.pasid;
 	pd = amdgpu_bo_ref(fpriv->vm.root.bo);
 	if (!WARN_ON(amdgpu_bo_reserve(pd, true))) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
index cf33eb219e25..59fafb8392e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
@@ -218,6 +218,7 @@ static void amdgpu_mca_smu_mca_bank_dump(struct amdgpu_device *adev, int idx, st
 int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, struct ras_err_data *err_data)
 {
 	struct amdgpu_smuio_mcm_config_info mcm_info;
+	struct ras_err_addr err_addr = {0};
 	struct mca_bank_set mca_set;
 	struct mca_bank_node *node;
 	struct mca_bank_entry *entry;
@@ -246,10 +247,18 @@ int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_blo
 		mcm_info.socket_id = entry->info.socket_id;
 		mcm_info.die_id = entry->info.aid;
 
+		if (blk == AMDGPU_RAS_BLOCK__UMC) {
+			err_addr.err_status = entry->regs[MCA_REG_IDX_STATUS];
+			err_addr.err_ipid = entry->regs[MCA_REG_IDX_IPID];
+			err_addr.err_addr = entry->regs[MCA_REG_IDX_ADDR];
+		}
+
 		if (type == AMDGPU_MCA_ERROR_TYPE_UE)
-			amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, (uint64_t)count);
+			amdgpu_ras_error_statistic_ue_count(err_data,
+				&mcm_info, &err_addr, (uint64_t)count);
 		else
-			amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, (uint64_t)count);
+			amdgpu_ras_error_statistic_ce_count(err_data,
+				&mcm_info, &err_addr, (uint64_t)count);
 	}
 
 out_mca_release:
@@ -351,6 +360,9 @@ int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_err
 	const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
 	int count;
 
+	if (!mca_funcs || !mca_funcs->mca_get_mca_entry)
+		return -EOPNOTSUPP;
+
 	switch (type) {
 	case AMDGPU_MCA_ERROR_TYPE_UE:
 		count = mca_funcs->max_ue_count;
@@ -365,10 +377,7 @@ int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_err
 	if (idx >= count)
 		return -EINVAL;
 
-	if (mca_funcs && mca_funcs->mca_get_mca_entry)
-		return mca_funcs->mca_get_mca_entry(adev, type, idx, entry);
-
-	return -EOPNOTSUPP;
+	return mca_funcs->mca_get_mca_entry(adev, type, idx, entry);
 }
 
 #if defined(CONFIG_DEBUG_FS)
@@ -377,7 +386,7 @@ static int amdgpu_mca_smu_debug_mode_set(void *data, u64 val)
 	struct amdgpu_device *adev = (struct amdgpu_device *)data;
 	int ret;
 
-	ret = amdgpu_mca_smu_set_debug_mode(adev, val ? true : false);
+	ret = amdgpu_ras_set_mca_debug_mode(adev, val ? true : false);
 	if (ret)
 		return ret;
 
@@ -485,7 +494,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(mca_debug_mode_fops, NULL, amdgpu_mca_smu_debug_mode_se
 void amdgpu_mca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root)
 {
 #if defined(CONFIG_DEBUG_FS)
-	if (!root || adev->ip_versions[MP1_HWIP][0] != IP_VERSION(13, 0, 6))
+	if (!root || amdgpu_ip_version(adev, MP1_HWIP, 0) != IP_VERSION(13, 0, 6))
 		return;
 
 	debugfs_create_file("mca_debug_mode", 0200, root, adev, &mca_debug_mode_fops);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
index 2b488fcf2f95..b399f1b62887 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
@@ -46,6 +46,10 @@
 #define MCA_REG__STATUS__ERRORCODEEXT(x)	MCA_REG_FIELD(x, 21, 16)
 #define MCA_REG__STATUS__ERRORCODE(x)		MCA_REG_FIELD(x, 15, 0)
 
+#define MCA_REG__MISC0__ERRCNT(x)		MCA_REG_FIELD(x, 43, 32)
+
+#define MCA_REG__SYND__ERRORINFORMATION(x)	MCA_REG_FIELD(x, 17, 0)
+
 enum amdgpu_mca_ip {
 	AMDGPU_MCA_IP_UNKNOW = -1,
 	AMDGPU_MCA_IP_PSP = 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index abd0b9763904..da48b6da0107 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -98,6 +98,26 @@ static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev)
 	return 0;
 }
 
+static int amdgpu_mes_event_log_init(struct amdgpu_device *adev)
+{
+	int r;
+
+	r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+				    AMDGPU_GEM_DOMAIN_GTT,
+				    &adev->mes.event_log_gpu_obj,
+				    &adev->mes.event_log_gpu_addr,
+				    &adev->mes.event_log_cpu_addr);
+	if (r) {
+		dev_warn(adev->dev, "failed to create MES event log buffer (%d)", r);
+		return r;
+	}
+
+	memset(adev->mes.event_log_cpu_addr, 0, PAGE_SIZE);
+
+	return  0;
+
+}
+
 static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev)
 {
 	bitmap_free(adev->mes.doorbell_bitmap);
@@ -182,8 +202,14 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
 	if (r)
 		goto error;
 
+	r = amdgpu_mes_event_log_init(adev);
+	if (r)
+		goto error_doorbell;
+
 	return 0;
 
+error_doorbell:
+	amdgpu_mes_doorbell_free(adev);
 error:
 	amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
 	amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
@@ -199,6 +225,10 @@ error_ids:
 
 void amdgpu_mes_fini(struct amdgpu_device *adev)
 {
+	amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj,
+			      &adev->mes.event_log_gpu_addr,
+			      &adev->mes.event_log_cpu_addr);
+
 	amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
 	amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
 	amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
@@ -886,6 +916,11 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
 	op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER;
 	op_input.set_shader_debugger.process_context_addr = process_context_addr;
 	op_input.set_shader_debugger.flags.u32all = flags;
+
+	/* use amdgpu mes_flush_shader_debugger instead */
+	if (op_input.set_shader_debugger.flags.process_ctx_flush)
+		return -EINVAL;
+
 	op_input.set_shader_debugger.spi_gdbg_per_vmid_cntl = spi_gdbg_per_vmid_cntl;
 	memcpy(op_input.set_shader_debugger.tcp_watch_cntl, tcp_watch_cntl,
 			sizeof(op_input.set_shader_debugger.tcp_watch_cntl));
@@ -905,6 +940,32 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
 	return r;
 }
 
+int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev,
+				     uint64_t process_context_addr)
+{
+	struct mes_misc_op_input op_input = {0};
+	int r;
+
+	if (!adev->mes.funcs->misc_op) {
+		DRM_ERROR("mes flush shader debugger is not supported!\n");
+		return -EINVAL;
+	}
+
+	op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER;
+	op_input.set_shader_debugger.process_context_addr = process_context_addr;
+	op_input.set_shader_debugger.flags.process_ctx_flush = true;
+
+	amdgpu_mes_lock(&adev->mes);
+
+	r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+	if (r)
+		DRM_ERROR("failed to set_shader_debugger\n");
+
+	amdgpu_mes_unlock(&adev->mes);
+
+	return r;
+}
+
 static void
 amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev,
 			       struct amdgpu_ring *ring,
@@ -1479,3 +1540,34 @@ out:
 	amdgpu_ucode_release(&adev->mes.fw[pipe]);
 	return r;
 }
+
+#if defined(CONFIG_DEBUG_FS)
+
+static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused)
+{
+	struct amdgpu_device *adev = m->private;
+	uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr);
+
+	seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4,
+		     mem, PAGE_SIZE, false);
+
+	return 0;
+}
+
+
+DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_mes_event_log);
+
+#endif
+
+void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev)
+{
+
+#if defined(CONFIG_DEBUG_FS)
+	struct drm_minor *minor = adev_to_drm(adev)->primary;
+	struct dentry *root = minor->debugfs_root;
+
+	debugfs_create_file("amdgpu_mes_event_log", 0444, root,
+			    adev, &amdgpu_debugfs_mes_event_log_fops);
+
+#endif
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index a27b424ffe00..7d4f93fea937 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -133,6 +133,11 @@ struct amdgpu_mes {
 	uint32_t			num_mes_dbs;
 	unsigned long			*doorbell_bitmap;
 
+	/* MES event log buffer */
+	struct amdgpu_bo		*event_log_gpu_obj;
+	uint64_t                        event_log_gpu_addr;
+	void				*event_log_cpu_addr;
+
 	/* ip specific functions */
 	const struct amdgpu_mes_funcs   *funcs;
 };
@@ -291,9 +296,10 @@ struct mes_misc_op_input {
 			uint64_t process_context_addr;
 			union {
 				struct {
-					uint64_t single_memop : 1;
-					uint64_t single_alu_op : 1;
-					uint64_t reserved: 30;
+					uint32_t single_memop : 1;
+					uint32_t single_alu_op : 1;
+					uint32_t reserved: 29;
+					uint32_t process_ctx_flush: 1;
 				};
 				uint32_t u32all;
 			} flags;
@@ -369,7 +375,8 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
 				const uint32_t *tcp_watch_cntl,
 				uint32_t flags,
 				bool trap_en);
-
+int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev,
+				uint64_t process_context_addr);
 int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
 			int queue_type, int idx,
 			struct amdgpu_mes_ctx_data *ctx_data,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 32fe05c810c6..2e4911050cc5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -32,7 +32,6 @@
 
 #include <drm/display/drm_dp_helper.h>
 #include <drm/drm_crtc.h>
-#include <drm/drm_edid.h>
 #include <drm/drm_encoder.h>
 #include <drm/drm_fixed.h>
 #include <drm/drm_framebuffer.h>
@@ -51,6 +50,7 @@ struct amdgpu_device;
 struct amdgpu_encoder;
 struct amdgpu_router;
 struct amdgpu_hpd;
+struct edid;
 
 #define to_amdgpu_crtc(x) container_of(x, struct amdgpu_crtc, base)
 #define to_amdgpu_connector(x) container_of(x, struct amdgpu_connector, base)
@@ -343,6 +343,97 @@ struct amdgpu_mode_info {
 	int			disp_priority;
 	const struct amdgpu_display_funcs *funcs;
 	const enum drm_plane_type *plane_type;
+
+	/* Driver-private color mgmt props */
+
+	/* @plane_degamma_lut_property: Plane property to set a degamma LUT to
+	 * convert encoded values to light linear values before sampling or
+	 * blending.
+	 */
+	struct drm_property *plane_degamma_lut_property;
+	/* @plane_degamma_lut_size_property: Plane property to define the max
+	 * size of degamma LUT as supported by the driver (read-only).
+	 */
+	struct drm_property *plane_degamma_lut_size_property;
+	/**
+	 * @plane_degamma_tf_property: Plane pre-defined transfer function to
+	 * to go from scanout/encoded values to linear values.
+	 */
+	struct drm_property *plane_degamma_tf_property;
+	/**
+	 * @plane_hdr_mult_property:
+	 */
+	struct drm_property *plane_hdr_mult_property;
+
+	struct drm_property *plane_ctm_property;
+	/**
+	 * @shaper_lut_property: Plane property to set pre-blending shaper LUT
+	 * that converts color content before 3D LUT. If
+	 * plane_shaper_tf_property != Identity TF, AMD color module will
+	 * combine the user LUT values with pre-defined TF into the LUT
+	 * parameters to be programmed.
+	 */
+	struct drm_property *plane_shaper_lut_property;
+	/**
+	 * @shaper_lut_size_property: Plane property for the size of
+	 * pre-blending shaper LUT as supported by the driver (read-only).
+	 */
+	struct drm_property *plane_shaper_lut_size_property;
+	/**
+	 * @plane_shaper_tf_property: Plane property to set a predefined
+	 * transfer function for pre-blending shaper (before applying 3D LUT)
+	 * with or without LUT. There is no shaper ROM, but we can use AMD
+	 * color modules to program LUT parameters from predefined TF (or
+	 * from a combination of pre-defined TF and the custom 1D LUT).
+	 */
+	struct drm_property *plane_shaper_tf_property;
+	/**
+	 * @plane_lut3d_property: Plane property for color transformation using
+	 * a 3D LUT (pre-blending), a three-dimensional array where each
+	 * element is an RGB triplet. Each dimension has the size of
+	 * lut3d_size. The array contains samples from the approximated
+	 * function. On AMD, values between samples are estimated by
+	 * tetrahedral interpolation. The array is accessed with three indices,
+	 * one for each input dimension (color channel), blue being the
+	 * outermost dimension, red the innermost.
+	 */
+	struct drm_property *plane_lut3d_property;
+	/**
+	 * @plane_degamma_lut_size_property: Plane property to define the max
+	 * size of 3D LUT as supported by the driver (read-only). The max size
+	 * is the max size of one dimension and, therefore, the max number of
+	 * entries for 3D LUT array is the 3D LUT size cubed;
+	 */
+	struct drm_property *plane_lut3d_size_property;
+	/**
+	 * @plane_blend_lut_property: Plane property for output gamma before
+	 * blending. Userspace set a blend LUT to convert colors after 3D LUT
+	 * conversion. It works as a post-3DLUT 1D LUT. With shaper LUT, they
+	 * are sandwiching 3D LUT with two 1D LUT. If plane_blend_tf_property
+	 * != Identity TF, AMD color module will combine the user LUT values
+	 * with pre-defined TF into the LUT parameters to be programmed.
+	 */
+	struct drm_property *plane_blend_lut_property;
+	/**
+	 * @plane_blend_lut_size_property: Plane property to define the max
+	 * size of blend LUT as supported by the driver (read-only).
+	 */
+	struct drm_property *plane_blend_lut_size_property;
+	/**
+	 * @plane_blend_tf_property: Plane property to set a predefined
+	 * transfer function for pre-blending blend/out_gamma (after applying
+	 * 3D LUT) with or without LUT. There is no blend ROM, but we can use
+	 * AMD color modules to program LUT parameters from predefined TF (or
+	 * from a combination of pre-defined TF and the custom 1D LUT).
+	 */
+	struct drm_property *plane_blend_tf_property;
+	/* @regamma_tf_property: Transfer function for CRTC regamma
+	 * (post-blending). Possible values are defined by `enum
+	 * amdgpu_transfer_function`. There is no regamma ROM, but we can use
+	 * AMD color modules to program LUT parameters from predefined TF (or
+	 * from a combination of pre-defined TF and the custom 1D LUT).
+	 */
+	struct drm_property *regamma_tf_property;
 };
 
 #define AMDGPU_MAX_BL_LEVEL 0xFF
@@ -416,6 +507,10 @@ struct amdgpu_crtc {
 
 	int otg_inst;
 	struct drm_pending_vblank_event *event;
+
+	bool wb_pending;
+	bool wb_enabled;
+	struct drm_writeback_connector *wb_conn;
 };
 
 struct amdgpu_encoder_atom_dig {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index cef920a93924..425cebcc5cbf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1245,19 +1245,15 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
  * amdgpu_bo_move_notify - notification about a memory move
  * @bo: pointer to a buffer object
  * @evict: if this move is evicting the buffer from the graphics address space
- * @new_mem: new information of the bufer object
  *
  * Marks the corresponding &amdgpu_bo buffer object as invalid, also performs
  * bookkeeping.
  * TTM driver callback which is called when ttm moves a buffer.
  */
-void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
-			   bool evict,
-			   struct ttm_resource *new_mem)
+void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
 	struct amdgpu_bo *abo;
-	struct ttm_resource *old_mem = bo->resource;
 
 	if (!amdgpu_bo_is_amdgpu_bo(bo))
 		return;
@@ -1274,13 +1270,6 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
 	/* remember the eviction */
 	if (evict)
 		atomic64_inc(&adev->num_evictions);
-
-	/* update statistics */
-	if (!new_mem)
-		return;
-
-	/* move_notify is called before move happens */
-	trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
 }
 
 void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
@@ -1343,6 +1332,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
 
 	abo = ttm_to_amdgpu_bo(bo);
 
+	WARN_ON(abo->vm_bo);
+
 	if (abo->kfd_bo)
 		amdgpu_amdkfd_release_notify(abo);
 
@@ -1527,10 +1518,14 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
 u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-	uint64_t offset;
+	uint64_t offset = AMDGPU_BO_INVALID_OFFSET;
+
+	if (bo->tbo.resource->mem_type == TTM_PL_TT)
+		offset = amdgpu_gmc_agp_addr(&bo->tbo);
 
-	offset = (bo->tbo.resource->start << PAGE_SHIFT) +
-		 amdgpu_ttm_domain_start(adev, bo->tbo.resource->mem_type);
+	if (offset == AMDGPU_BO_INVALID_OFFSET)
+		offset = (bo->tbo.resource->start << PAGE_SHIFT) +
+			amdgpu_ttm_domain_start(adev, bo->tbo.resource->mem_type);
 
 	return amdgpu_gmc_sign_extend(offset);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index d28e21baef16..a3ea8a82db23 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -344,9 +344,7 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata,
 int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
 			   size_t buffer_size, uint32_t *metadata_size,
 			   uint64_t *flags);
-void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
-			   bool evict,
-			   struct ttm_resource *new_mem);
+void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict);
 void amdgpu_bo_release_notify(struct ttm_buffer_object *bo);
 vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
 void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index a21045d018f2..0328616473f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -466,7 +466,7 @@ static int psp_sw_init(void *handle)
 	}
 
 	ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
-				      amdgpu_sriov_vf(adev) ?
+				      (amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf) ?
 				      AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
 				      &psp->fw_pri_bo,
 				      &psp->fw_pri_mc_addr,
@@ -1433,8 +1433,8 @@ int psp_xgmi_get_topology_info(struct psp_context *psp,
 			 get_extended_data) ||
 			amdgpu_ip_version(psp->adev, MP0_HWIP, 0) ==
 				IP_VERSION(13, 0, 6);
-		bool ta_port_num_support = psp->xgmi_context.xgmi_ta_caps &
-						EXTEND_PEER_LINK_INFO_CMD_FLAG;
+		bool ta_port_num_support = amdgpu_sriov_vf(psp->adev) ? 0 :
+				psp->xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG;
 
 		/* popluate the shared output buffer rather than the cmd input buffer
 		 * with node_ids as the input for GET_PEER_LINKS command execution.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
index 468a67b302d4..ca5c86e5f7cd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
@@ -362,7 +362,7 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size
 		}
 	}
 
-	if (copy_to_user((char *)buf, context->mem_context.shared_buf, shared_buf_len))
+	if (copy_to_user((char *)&buf[copy_pos], context->mem_context.shared_buf, shared_buf_len))
 		ret = -EFAULT;
 
 err_free_shared_buf:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index a3dc68e98910..31823a30dea2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -28,6 +28,7 @@
 #include <linux/reboot.h>
 #include <linux/syscalls.h>
 #include <linux/pm_runtime.h>
+#include <linux/list_sort.h>
 
 #include "amdgpu.h"
 #include "amdgpu_ras.h"
@@ -304,11 +305,13 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
 			return -EINVAL;
 
 		data->head.block = block_id;
-		/* only ue and ce errors are supported */
+		/* only ue, ce and poison errors are supported */
 		if (!memcmp("ue", err, 2))
 			data->head.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
 		else if (!memcmp("ce", err, 2))
 			data->head.type = AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE;
+		else if (!memcmp("poison", err, 6))
+			data->head.type = AMDGPU_RAS_ERROR__POISON;
 		else
 			return -EINVAL;
 
@@ -430,9 +433,10 @@ static void amdgpu_ras_instance_mask_check(struct amdgpu_device *adev,
  * The block is one of: umc, sdma, gfx, etc.
  *	see ras_block_string[] for details
  *
- * The error type is one of: ue, ce, where,
+ * The error type is one of: ue, ce and poison where,
  *	ue is multi-uncorrectable
  *	ce is single-correctable
+ *	poison is poison
  *
  * The sub-block is a the sub-block index, pass 0 if there is no sub-block.
  * The address and value are hexadecimal numbers, leading 0x is optional.
@@ -1066,8 +1070,7 @@ static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev,
 			mcm_info = &err_info->mcm_info;
 			if (err_info->ce_count) {
 				dev_info(adev->dev, "socket: %d, die: %d, "
-					 "%lld new correctable hardware errors detected in %s block, "
-					 "no user action is needed\n",
+					 "%lld new correctable hardware errors detected in %s block\n",
 					 mcm_info->socket_id,
 					 mcm_info->die_id,
 					 err_info->ce_count,
@@ -1079,8 +1082,7 @@ static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev,
 			err_info = &err_node->err_info;
 			mcm_info = &err_info->mcm_info;
 			dev_info(adev->dev, "socket: %d, die: %d, "
-				 "%lld correctable hardware errors detected in total in %s block, "
-				 "no user action is needed\n",
+				 "%lld correctable hardware errors detected in total in %s block\n",
 				 mcm_info->socket_id, mcm_info->die_id, err_info->ce_count, blk_name);
 		}
 	}
@@ -1107,16 +1109,14 @@ static void amdgpu_ras_error_generate_report(struct amdgpu_device *adev,
 			   adev->smuio.funcs->get_die_id) {
 			dev_info(adev->dev, "socket: %d, die: %d "
 				 "%ld correctable hardware errors "
-				 "detected in %s block, no user "
-				 "action is needed.\n",
+				 "detected in %s block\n",
 				 adev->smuio.funcs->get_socket_id(adev),
 				 adev->smuio.funcs->get_die_id(adev),
 				 ras_mgr->err_data.ce_count,
 				 blk_name);
 		} else {
 			dev_info(adev->dev, "%ld correctable hardware errors "
-				 "detected in %s block, no user "
-				 "action is needed.\n",
+				 "detected in %s block\n",
 				 ras_mgr->err_data.ce_count,
 				 blk_name);
 		}
@@ -1155,8 +1155,10 @@ static void amdgpu_rasmgr_error_data_statistic_update(struct ras_manager *obj, s
 		for_each_ras_error(err_node, err_data) {
 			err_info = &err_node->err_info;
 
-			amdgpu_ras_error_statistic_ce_count(&obj->err_data, &err_info->mcm_info, err_info->ce_count);
-			amdgpu_ras_error_statistic_ue_count(&obj->err_data, &err_info->mcm_info, err_info->ue_count);
+			amdgpu_ras_error_statistic_ce_count(&obj->err_data,
+					&err_info->mcm_info, NULL, err_info->ce_count);
+			amdgpu_ras_error_statistic_ue_count(&obj->err_data,
+					&err_info->mcm_info, NULL, err_info->ue_count);
 		}
 	} else {
 		/* for legacy asic path which doesn't has error source info */
@@ -1173,6 +1175,9 @@ static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev,
 	enum amdgpu_ras_block blk = info ? info->head.block : AMDGPU_RAS_BLOCK_COUNT;
 	struct amdgpu_ras_block_object *block_obj = NULL;
 
+	if (blk == AMDGPU_RAS_BLOCK_COUNT)
+		return -EINVAL;
+
 	if (error_query_mode == AMDGPU_RAS_INVALID_ERROR_QUERY)
 		return -EINVAL;
 
@@ -1914,7 +1919,7 @@ static void amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj
 				struct amdgpu_iv_entry *entry)
 {
 	dev_info(obj->adev->dev,
-		"Poison is created, no user action is needed.\n");
+		"Poison is created\n");
 }
 
 static void amdgpu_ras_interrupt_umc_handler(struct ras_manager *obj,
@@ -2537,7 +2542,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
 		return 0;
 
 	data = &con->eh_data;
-	*data = kmalloc(sizeof(**data), GFP_KERNEL | __GFP_ZERO);
+	*data = kzalloc(sizeof(**data), GFP_KERNEL);
 	if (!*data) {
 		ret = -ENOMEM;
 		goto out;
@@ -2824,10 +2829,10 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
 	if (con)
 		return 0;
 
-	con = kmalloc(sizeof(struct amdgpu_ras) +
+	con = kzalloc(sizeof(*con) +
 			sizeof(struct ras_manager) * AMDGPU_RAS_BLOCK_COUNT +
 			sizeof(struct ras_manager) * AMDGPU_RAS_MCA_BLOCK_COUNT,
-			GFP_KERNEL|__GFP_ZERO);
+			GFP_KERNEL);
 	if (!con)
 		return -ENOMEM;
 
@@ -2914,6 +2919,11 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
 
 	amdgpu_ras_query_poison_mode(adev);
 
+	/* Packed socket_id to ras feature mask bits[31:29] */
+	if (adev->smuio.funcs &&
+	    adev->smuio.funcs->get_socket_id)
+		con->features |= ((adev->smuio.funcs->get_socket_id(adev)) << 29);
+
 	/* Get RAS schema for particular SOC */
 	con->schema = amdgpu_get_ras_schema(adev);
 
@@ -3132,6 +3142,8 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev)
 	if (amdgpu_sriov_vf(adev))
 		return 0;
 
+	amdgpu_ras_set_mca_debug_mode(adev, false);
+
 	list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {
 		if (!node->ras_obj) {
 			dev_warn(adev->dev, "Warning: abnormal ras list node.\n");
@@ -3405,12 +3417,18 @@ int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
 	return 0;
 }
 
-void amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable)
+int amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable)
 {
 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	int ret = 0;
 
-	if (con)
-		con->is_mca_debug_mode = enable;
+	if (con) {
+		ret = amdgpu_mca_smu_set_debug_mode(adev, enable);
+		if (!ret)
+			con->is_mca_debug_mode = enable;
+	}
+
+	return ret;
 }
 
 bool amdgpu_ras_get_mca_debug_mode(struct amdgpu_device *adev)
@@ -3665,8 +3683,24 @@ static struct ras_err_node *amdgpu_ras_error_node_new(void)
 	return err_node;
 }
 
+static int ras_err_info_cmp(void *priv, const struct list_head *a, const struct list_head *b)
+{
+	struct ras_err_node *nodea = container_of(a, struct ras_err_node, node);
+	struct ras_err_node *nodeb = container_of(b, struct ras_err_node, node);
+	struct amdgpu_smuio_mcm_config_info *infoa = &nodea->err_info.mcm_info;
+	struct amdgpu_smuio_mcm_config_info *infob = &nodeb->err_info.mcm_info;
+
+	if (unlikely(infoa->socket_id != infob->socket_id))
+		return infoa->socket_id - infob->socket_id;
+	else
+		return infoa->die_id - infob->die_id;
+
+	return 0;
+}
+
 static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_data,
-						      struct amdgpu_smuio_mcm_config_info *mcm_info)
+				struct amdgpu_smuio_mcm_config_info *mcm_info,
+				struct ras_err_addr *err_addr)
 {
 	struct ras_err_node *err_node;
 
@@ -3680,14 +3714,19 @@ static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_d
 
 	memcpy(&err_node->err_info.mcm_info, mcm_info, sizeof(*mcm_info));
 
+	if (err_addr)
+		memcpy(&err_node->err_info.err_addr, err_addr, sizeof(*err_addr));
+
 	err_data->err_list_count++;
 	list_add_tail(&err_node->node, &err_data->err_node_list);
+	list_sort(NULL, &err_data->err_node_list, ras_err_info_cmp);
 
 	return &err_node->err_info;
 }
 
 int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data,
-					struct amdgpu_smuio_mcm_config_info *mcm_info, u64 count)
+		struct amdgpu_smuio_mcm_config_info *mcm_info,
+		struct ras_err_addr *err_addr, u64 count)
 {
 	struct ras_err_info *err_info;
 
@@ -3697,7 +3736,7 @@ int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data,
 	if (!count)
 		return 0;
 
-	err_info = amdgpu_ras_error_get_info(err_data, mcm_info);
+	err_info = amdgpu_ras_error_get_info(err_data, mcm_info, err_addr);
 	if (!err_info)
 		return -EINVAL;
 
@@ -3708,7 +3747,8 @@ int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data,
 }
 
 int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data,
-					struct amdgpu_smuio_mcm_config_info *mcm_info, u64 count)
+		struct amdgpu_smuio_mcm_config_info *mcm_info,
+		struct ras_err_addr *err_addr, u64 count)
 {
 	struct ras_err_info *err_info;
 
@@ -3718,7 +3758,7 @@ int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data,
 	if (!count)
 		return 0;
 
-	err_info = amdgpu_ras_error_get_info(err_data, mcm_info);
+	err_info = amdgpu_ras_error_get_info(err_data, mcm_info, err_addr);
 	if (!err_info)
 		return -EINVAL;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 19161916ac46..76fb85628716 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -452,10 +452,17 @@ struct ras_fs_data {
 	char debugfs_name[32];
 };
 
+struct ras_err_addr {
+	uint64_t err_status;
+	uint64_t err_ipid;
+	uint64_t err_addr;
+};
+
 struct ras_err_info {
 	struct amdgpu_smuio_mcm_config_info mcm_info;
 	u64 ce_count;
 	u64 ue_count;
+	struct ras_err_addr err_addr;
 };
 
 struct ras_err_node {
@@ -773,7 +780,7 @@ struct amdgpu_ras* amdgpu_ras_get_context(struct amdgpu_device *adev);
 
 int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_con);
 
-void amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable);
+int amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable);
 bool amdgpu_ras_get_mca_debug_mode(struct amdgpu_device *adev);
 bool amdgpu_ras_get_error_query_mode(struct amdgpu_device *adev,
 				     unsigned int *mode);
@@ -806,8 +813,10 @@ void amdgpu_ras_inst_reset_ras_error_count(struct amdgpu_device *adev,
 int amdgpu_ras_error_data_init(struct ras_err_data *err_data);
 void amdgpu_ras_error_data_fini(struct ras_err_data *err_data);
 int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data,
-					struct amdgpu_smuio_mcm_config_info *mcm_info, u64 count);
+		struct amdgpu_smuio_mcm_config_info *mcm_info,
+		struct ras_err_addr *err_addr, u64 count);
 int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data,
-					struct amdgpu_smuio_mcm_config_info *mcm_info, u64 count);
+		struct amdgpu_smuio_mcm_config_info *mcm_info,
+		struct ras_err_addr *err_addr, u64 count);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index 65aa218380be..2fde93b00cab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -214,6 +214,12 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev,
 			control->i2c_address = EEPROM_I2C_MADDR_0;
 		return true;
 	case IP_VERSION(13, 0, 0):
+		if (strnstr(atom_ctx->vbios_pn, "D707",
+			    sizeof(atom_ctx->vbios_pn)))
+			control->i2c_address = EEPROM_I2C_MADDR_0;
+		else
+			control->i2c_address = EEPROM_I2C_MADDR_4;
+		return true;
 	case IP_VERSION(13, 0, 6):
 	case IP_VERSION(13, 0, 10):
 		control->i2c_address = EEPROM_I2C_MADDR_4;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
index b0335a1c5e90..19899f6b9b2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
@@ -32,7 +32,6 @@ enum AMDGPU_RESET_FLAGS {
 
 	AMDGPU_NEED_FULL_RESET = 0,
 	AMDGPU_SKIP_HW_RESET = 1,
-	AMDGPU_RESET_FOR_DEVICE_REMOVE = 2,
 };
 
 struct amdgpu_reset_context {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 6a80d3ec887e..5505d646f43a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -635,6 +635,7 @@ int amdgpu_ring_test_helper(struct amdgpu_ring *ring)
 			      ring->name);
 
 	ring->sched.ready = !r;
+
 	return r;
 }
 
@@ -642,6 +643,10 @@ static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring,
 				    struct amdgpu_mqd_prop *prop)
 {
 	struct amdgpu_device *adev = ring->adev;
+	bool is_high_prio_compute = ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE &&
+				    amdgpu_gfx_is_high_priority_compute_queue(adev, ring);
+	bool is_high_prio_gfx = ring->funcs->type == AMDGPU_RING_TYPE_GFX &&
+				amdgpu_gfx_is_high_priority_graphics_queue(adev, ring);
 
 	memset(prop, 0, sizeof(*prop));
 
@@ -659,10 +664,8 @@ static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring,
 	 */
 	prop->hqd_active = ring->funcs->type == AMDGPU_RING_TYPE_KIQ;
 
-	if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE &&
-	     amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) ||
-	    (ring->funcs->type == AMDGPU_RING_TYPE_GFX &&
-	     amdgpu_gfx_is_high_priority_graphics_queue(adev, ring))) {
+	prop->allow_tunneling = is_high_prio_compute;
+	if (is_high_prio_compute || is_high_prio_gfx) {
 		prop->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
 		prop->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
 	}
@@ -715,3 +718,14 @@ void amdgpu_ring_ib_on_emit_de(struct amdgpu_ring *ring)
 	if (ring->is_sw_ring)
 		amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_DE);
 }
+
+bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring)
+{
+	if (!ring)
+		return false;
+
+	if (ring->no_scheduler || !drm_sched_wqueue_ready(&ring->sched))
+		return false;
+
+	return true;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index bbb53720a018..fe1a61eb6e4c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -450,5 +450,5 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 int amdgpu_ib_pool_init(struct amdgpu_device *adev);
 void amdgpu_ib_pool_fini(struct amdgpu_device *adev);
 int amdgpu_ib_ring_tests(struct amdgpu_device *adev);
-
+bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
index 35e0ae9acadc..2c3675d91614 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
@@ -531,13 +531,12 @@ int amdgpu_gfx_rlc_init_microcode(struct amdgpu_device *adev,
 	if (version_major == 2 && version_minor == 1)
 		adev->gfx.rlc.is_rlc_v2_1 = true;
 
-	if (version_minor >= 0) {
-		err = amdgpu_gfx_rlc_init_microcode_v2_0(adev);
-		if (err) {
-			dev_err(adev->dev, "fail to init rlc v2_0 microcode\n");
-			return err;
-		}
+	err = amdgpu_gfx_rlc_init_microcode_v2_0(adev);
+	if (err) {
+		dev_err(adev->dev, "fail to init rlc v2_0 microcode\n");
+		return err;
 	}
+
 	if (version_minor >= 1)
 		amdgpu_gfx_rlc_init_microcode_v2_1(adev);
 	if (version_minor >= 2)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
new file mode 100644
index 000000000000..7a6a67275404
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
@@ -0,0 +1,247 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_seq64.h"
+
+#include <drm/drm_exec.h>
+
+/**
+ * DOC: amdgpu_seq64
+ *
+ * amdgpu_seq64 allocates a 64bit memory on each request in sequence order.
+ * seq64 driver is required for user queue fence memory allocation, TLB
+ * counters and VM updates. It has maximum count of 32768 64 bit slots.
+ */
+
+/**
+ * amdgpu_seq64_map - Map the seq64 memory to VM
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: vm pointer
+ * @bo_va: bo_va pointer
+ * @seq64_addr: seq64 vaddr start address
+ * @size: seq64 pool size
+ *
+ * Map the seq64 memory to the given VM.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure
+ */
+int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+		     struct amdgpu_bo_va **bo_va, u64 seq64_addr,
+		     uint32_t size)
+{
+	struct amdgpu_bo *bo;
+	struct drm_exec exec;
+	int r;
+
+	bo = adev->seq64.sbo;
+	if (!bo)
+		return -EINVAL;
+
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+	drm_exec_until_all_locked(&exec) {
+		r = amdgpu_vm_lock_pd(vm, &exec, 0);
+		if (likely(!r))
+			r = drm_exec_lock_obj(&exec, &bo->tbo.base);
+		drm_exec_retry_on_contention(&exec);
+		if (unlikely(r))
+			goto error;
+	}
+
+	*bo_va = amdgpu_vm_bo_add(adev, vm, bo);
+	if (!*bo_va) {
+		r = -ENOMEM;
+		goto error;
+	}
+
+	r = amdgpu_vm_bo_map(adev, *bo_va, seq64_addr, 0, size,
+			     AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
+			     AMDGPU_PTE_EXECUTABLE);
+	if (r) {
+		DRM_ERROR("failed to do bo_map on userq sem, err=%d\n", r);
+		amdgpu_vm_bo_del(adev, *bo_va);
+		goto error;
+	}
+
+	r = amdgpu_vm_bo_update(adev, *bo_va, false);
+	if (r) {
+		DRM_ERROR("failed to do vm_bo_update on userq sem\n");
+		amdgpu_vm_bo_del(adev, *bo_va);
+		goto error;
+	}
+
+error:
+	drm_exec_fini(&exec);
+	return r;
+}
+
+/**
+ * amdgpu_seq64_unmap - Unmap the seq64 memory
+ *
+ * @adev: amdgpu_device pointer
+ * @fpriv: DRM file private
+ *
+ * Unmap the seq64 memory from the given VM.
+ */
+void amdgpu_seq64_unmap(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv)
+{
+	struct amdgpu_vm *vm;
+	struct amdgpu_bo *bo;
+	struct drm_exec exec;
+	int r;
+
+	if (!fpriv->seq64_va)
+		return;
+
+	bo = adev->seq64.sbo;
+	if (!bo)
+		return;
+
+	vm = &fpriv->vm;
+
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+	drm_exec_until_all_locked(&exec) {
+		r = amdgpu_vm_lock_pd(vm, &exec, 0);
+		if (likely(!r))
+			r = drm_exec_lock_obj(&exec, &bo->tbo.base);
+		drm_exec_retry_on_contention(&exec);
+		if (unlikely(r))
+			goto error;
+	}
+
+	amdgpu_vm_bo_del(adev, fpriv->seq64_va);
+
+	fpriv->seq64_va = NULL;
+
+error:
+	drm_exec_fini(&exec);
+}
+
+/**
+ * amdgpu_seq64_alloc - Allocate a 64 bit memory
+ *
+ * @adev: amdgpu_device pointer
+ * @gpu_addr: allocated gpu VA start address
+ * @cpu_addr: allocated cpu VA start address
+ *
+ * Alloc a 64 bit memory from seq64 pool.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure
+ */
+int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *gpu_addr,
+		       u64 **cpu_addr)
+{
+	unsigned long bit_pos;
+	u32 offset;
+
+	bit_pos = find_first_zero_bit(adev->seq64.used, adev->seq64.num_sem);
+
+	if (bit_pos < adev->seq64.num_sem) {
+		__set_bit(bit_pos, adev->seq64.used);
+		offset = bit_pos << 6; /* convert to qw offset */
+	} else {
+		return -EINVAL;
+	}
+
+	*gpu_addr = offset + AMDGPU_SEQ64_VADDR_START;
+	*cpu_addr = offset + adev->seq64.cpu_base_addr;
+
+	return 0;
+}
+
+/**
+ * amdgpu_seq64_free - Free the given 64 bit memory
+ *
+ * @adev: amdgpu_device pointer
+ * @gpu_addr: gpu start address to be freed
+ *
+ * Free the given 64 bit memory from seq64 pool.
+ *
+ */
+void amdgpu_seq64_free(struct amdgpu_device *adev, u64 gpu_addr)
+{
+	u32 offset;
+
+	offset = gpu_addr - AMDGPU_SEQ64_VADDR_START;
+
+	offset >>= 6;
+	if (offset < adev->seq64.num_sem)
+		__clear_bit(offset, adev->seq64.used);
+}
+
+/**
+ * amdgpu_seq64_fini - Cleanup seq64 driver
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Free the memory space allocated for seq64.
+ *
+ */
+void amdgpu_seq64_fini(struct amdgpu_device *adev)
+{
+	amdgpu_bo_free_kernel(&adev->seq64.sbo,
+			      NULL,
+			      (void **)&adev->seq64.cpu_base_addr);
+}
+
+/**
+ * amdgpu_seq64_init - Initialize seq64 driver
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate the required memory space for seq64.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure
+ */
+int amdgpu_seq64_init(struct amdgpu_device *adev)
+{
+	int r;
+
+	if (adev->seq64.sbo)
+		return 0;
+
+	/*
+	 * AMDGPU_MAX_SEQ64_SLOTS * sizeof(u64) * 8 = AMDGPU_MAX_SEQ64_SLOTS
+	 * 64bit slots
+	 */
+	r = amdgpu_bo_create_kernel(adev, AMDGPU_SEQ64_SIZE,
+				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+				    &adev->seq64.sbo, NULL,
+				    (void **)&adev->seq64.cpu_base_addr);
+	if (r) {
+		dev_warn(adev->dev, "(%d) create seq64 failed\n", r);
+		return r;
+	}
+
+	memset(adev->seq64.cpu_base_addr, 0, AMDGPU_SEQ64_SIZE);
+
+	adev->seq64.num_sem = AMDGPU_MAX_SEQ64_SLOTS;
+	memset(&adev->seq64.used, 0, sizeof(adev->seq64.used));
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h
new file mode 100644
index 000000000000..2196e72be508
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_SEQ64_H__
+#define __AMDGPU_SEQ64_H__
+
+#define AMDGPU_SEQ64_SIZE		(2ULL << 20)
+#define AMDGPU_MAX_SEQ64_SLOTS		(AMDGPU_SEQ64_SIZE / (sizeof(u64) * 8))
+#define AMDGPU_SEQ64_VADDR_OFFSET	0x50000
+#define AMDGPU_SEQ64_VADDR_START	(AMDGPU_VA_RESERVED_SIZE + AMDGPU_SEQ64_VADDR_OFFSET)
+
+struct amdgpu_seq64 {
+	struct amdgpu_bo *sbo;
+	u32 num_sem;
+	u64 *cpu_base_addr;
+	DECLARE_BITMAP(used, AMDGPU_MAX_SEQ64_SLOTS);
+};
+
+void amdgpu_seq64_fini(struct amdgpu_device *adev);
+int amdgpu_seq64_init(struct amdgpu_device *adev);
+int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *gpu_addr, u64 **cpu_addr);
+void amdgpu_seq64_free(struct amdgpu_device *adev, u64 gpu_addr);
+int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+		     struct amdgpu_bo_va **bo_va, u64 seq64_addr, uint32_t size);
+void amdgpu_seq64_unmap(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv);
+
+#endif
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index dcd8c066bc1f..1b013a44ca99 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -191,7 +191,8 @@ static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,
 
 	/* Never sync to VM updates either. */
 	if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
-	    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
+	    owner != AMDGPU_FENCE_OWNER_UNDEFINED &&
+	    owner != AMDGPU_FENCE_OWNER_KFD)
 		return false;
 
 	/* Ignore fences depending on the sync mode */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 2fd1bfb35916..f539b1d00234 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -554,6 +554,21 @@ TRACE_EVENT(amdgpu_reset_reg_dumps,
 		      __entry->value)
 );
 
+TRACE_EVENT(amdgpu_runpm_reference_dumps,
+	    TP_PROTO(uint32_t index, const char *func),
+	    TP_ARGS(index, func),
+	    TP_STRUCT__entry(
+			     __field(uint32_t, index)
+			     __string(func, func)
+			     ),
+	    TP_fast_assign(
+			   __entry->index = index;
+			   __assign_str(func, func);
+			   ),
+	    TP_printk("amdgpu runpm reference dump 0x%x: 0x%s\n",
+		      __entry->index,
+		      __get_str(func))
+);
 #undef AMDGPU_JOB_GET_TIMELINE_NAME
 #endif
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 05991c5c8ddb..75c9fd2c6c2a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -545,10 +545,11 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
 			return r;
 	}
 
+	trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
 out:
 	/* update statistics */
 	atomic64_add(bo->base.size, &adev->num_bytes_moved);
-	amdgpu_bo_move_notify(bo, evict, new_mem);
+	amdgpu_bo_move_notify(bo, evict);
 	return 0;
 }
 
@@ -959,10 +960,8 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
 		return 0;
 
 	addr = amdgpu_gmc_agp_addr(bo);
-	if (addr != AMDGPU_BO_INVALID_OFFSET) {
-		bo->resource->start = addr >> PAGE_SHIFT;
+	if (addr != AMDGPU_BO_INVALID_OFFSET)
 		return 0;
-	}
 
 	/* allocate GART space */
 	placement.num_placement = 1;
@@ -1555,7 +1554,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
 static void
 amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo)
 {
-	amdgpu_bo_move_notify(bo, false, NULL);
+	amdgpu_bo_move_notify(bo, false);
 }
 
 static struct ttm_device_funcs amdgpu_bo_driver = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index b14127429f30..3e12763e477a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -1062,7 +1062,8 @@ int amdgpu_ucode_create_bo(struct amdgpu_device *adev)
 {
 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) {
 		amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE,
-			amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
+			(amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf) ?
+			AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
 			&adev->firmware.fw_buf,
 			&adev->firmware.fw_buf_mc,
 			&adev->firmware.fw_buf_ptr);
@@ -1397,9 +1398,13 @@ int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware **fw,
 
 	if (err)
 		return -ENODEV;
+
 	err = amdgpu_ucode_validate(*fw);
-	if (err)
+	if (err) {
 		dev_dbg(adev->dev, "\"%s\" failed to validate\n", fw_name);
+		release_firmware(*fw);
+		*fw = NULL;
+	}
 
 	return err;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 3a632c3b1a2c..0dcff2889e25 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -1099,7 +1099,8 @@ bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev)
 {
 	bool xnack_mode = true;
 
-	if (amdgpu_sriov_vf(adev) && adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
+	if (amdgpu_sriov_vf(adev) &&
+	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
 		xnack_mode = false;
 
 	return xnack_mode;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
index db6fc0cb18eb..453a4b786cfc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0+
 
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_edid.h>
 #include <drm/drm_simple_kms_helper.h>
 #include <drm/drm_vblank.h>
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index d1b8afd105c9..b8fcb6c55698 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -285,6 +285,7 @@ static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm)
 	list_for_each_entry_safe(vm_bo, tmp, &vm->idle, vm_status) {
 		struct amdgpu_bo *bo = vm_bo->bo;
 
+		vm_bo->moved = true;
 		if (!bo || bo->tbo.type != ttm_bo_type_kernel)
 			list_move(&vm_bo->vm_status, &vm_bo->vm->moved);
 		else if (bo->parent)
@@ -1438,6 +1439,51 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
 }
 
 /**
+ * amdgpu_vm_flush_compute_tlb - Flush TLB on compute VM
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: requested vm
+ * @flush_type: flush type
+ * @xcc_mask: mask of XCCs that belong to the compute partition in need of a TLB flush.
+ *
+ * Flush TLB if needed for a compute VM.
+ *
+ * Returns:
+ * 0 for success.
+ */
+int amdgpu_vm_flush_compute_tlb(struct amdgpu_device *adev,
+				struct amdgpu_vm *vm,
+				uint32_t flush_type,
+				uint32_t xcc_mask)
+{
+	uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm);
+	bool all_hub = false;
+	int xcc = 0, r = 0;
+
+	WARN_ON_ONCE(!vm->is_compute_context);
+
+	/*
+	 * It can be that we race and lose here, but that is extremely unlikely
+	 * and the worst thing which could happen is that we flush the changes
+	 * into the TLB once more which is harmless.
+	 */
+	if (atomic64_xchg(&vm->kfd_last_flushed_seq, tlb_seq) == tlb_seq)
+		return 0;
+
+	if (adev->family == AMDGPU_FAMILY_AI ||
+	    adev->family == AMDGPU_FAMILY_RV)
+		all_hub = true;
+
+	for_each_inst(xcc, xcc_mask) {
+		r = amdgpu_gmc_flush_gpu_tlb_pasid(adev, vm->pasid, flush_type,
+						   all_hub, xcc);
+		if (r)
+			break;
+	}
+	return r;
+}
+
+/**
  * amdgpu_vm_bo_add - add a bo to a specific vm
  *
  * @adev: amdgpu_device pointer
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 2cd86d2bf73f..4740dd65b99d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -116,7 +116,7 @@ struct amdgpu_mem_stats;
 #define AMDGPU_VM_FAULT_STOP_FIRST	1
 #define AMDGPU_VM_FAULT_STOP_ALWAYS	2
 
-/* Reserve 4MB VRAM for page tables */
+/* How much VRAM be reserved for page tables */
 #define AMDGPU_VM_RESERVED_VRAM		(8ULL << 20)
 
 /*
@@ -324,6 +324,7 @@ struct amdgpu_vm {
 	/* Last finished delayed update */
 	atomic64_t		tlb_seq;
 	struct dma_fence	*last_tlb_flush;
+	atomic64_t		kfd_last_flushed_seq;
 
 	/* How many times we had to re-generate the page tables */
 	uint64_t		generation;
@@ -445,6 +446,10 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
 			   struct amdgpu_vm *vm,
 			   struct ww_acquire_ctx *ticket);
+int amdgpu_vm_flush_compute_tlb(struct amdgpu_device *adev,
+				struct amdgpu_vm *vm,
+				uint32_t flush_type,
+				uint32_t xcc_mask);
 void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
 			    struct amdgpu_vm *vm, struct amdgpu_bo *bo);
 int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index a2287bb25223..a160265ddc07 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -642,13 +642,14 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
 
 	if (!entry->bo)
 		return;
+
+	entry->bo->vm_bo = NULL;
 	shadow = amdgpu_bo_shadowed(entry->bo);
 	if (shadow) {
 		ttm_bo_set_bulk_move(&shadow->tbo, NULL);
 		amdgpu_bo_unref(&shadow);
 	}
 	ttm_bo_set_bulk_move(&entry->bo->tbo, NULL);
-	entry->bo->vm_bo = NULL;
 
 	spin_lock(&entry->vm->status_lock);
 	list_del(&entry->vm_status);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
index e81579708e96..b9a15d51eb5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
@@ -26,6 +26,7 @@
 #include "amdgpu.h"
 #include "amdgpu_ucode.h"
 #include "amdgpu_vpe.h"
+#include "amdgpu_smu.h"
 #include "soc15_common.h"
 #include "vpe_v6_1.h"
 
@@ -33,8 +34,180 @@
 /* VPE CSA resides in the 4th page of CSA */
 #define AMDGPU_CSA_VPE_OFFSET 	(4096 * 3)
 
+/* 1 second timeout */
+#define VPE_IDLE_TIMEOUT	msecs_to_jiffies(1000)
+
+#define VPE_MAX_DPM_LEVEL			4
+#define FIXED1_8_BITS_PER_FRACTIONAL_PART	8
+#define GET_PRATIO_INTEGER_PART(x)		((x) >> FIXED1_8_BITS_PER_FRACTIONAL_PART)
+
 static void vpe_set_ring_funcs(struct amdgpu_device *adev);
 
+static inline uint16_t div16_u16_rem(uint16_t dividend, uint16_t divisor, uint16_t *remainder)
+{
+	*remainder = dividend % divisor;
+	return dividend / divisor;
+}
+
+static inline uint16_t complete_integer_division_u16(
+	uint16_t dividend,
+	uint16_t divisor,
+	uint16_t *remainder)
+{
+	return div16_u16_rem(dividend, divisor, (uint16_t *)remainder);
+}
+
+static uint16_t vpe_u1_8_from_fraction(uint16_t numerator, uint16_t denominator)
+{
+	u16 arg1_value = numerator;
+	u16 arg2_value = denominator;
+
+	uint16_t remainder;
+
+	/* determine integer part */
+	uint16_t res_value = complete_integer_division_u16(
+		arg1_value, arg2_value, &remainder);
+
+	if (res_value > 127 /* CHAR_MAX */)
+		return 0;
+
+	/* determine fractional part */
+	{
+		unsigned int i = FIXED1_8_BITS_PER_FRACTIONAL_PART;
+
+		do {
+			remainder <<= 1;
+
+			res_value <<= 1;
+
+			if (remainder >= arg2_value) {
+				res_value |= 1;
+				remainder -= arg2_value;
+			}
+		} while (--i != 0);
+	}
+
+	/* round up LSB */
+	{
+		uint16_t summand = (remainder << 1) >= arg2_value;
+
+		if ((res_value + summand) > 32767 /* SHRT_MAX */)
+			return 0;
+
+		res_value += summand;
+	}
+
+	return res_value;
+}
+
+static uint16_t vpe_internal_get_pratio(uint16_t from_frequency, uint16_t to_frequency)
+{
+	uint16_t pratio = vpe_u1_8_from_fraction(from_frequency, to_frequency);
+
+	if (GET_PRATIO_INTEGER_PART(pratio) > 1)
+		pratio = 0;
+
+	return pratio;
+}
+
+/*
+ * VPE has 4 DPM levels from level 0 (lowerest) to 3 (highest),
+ * VPE FW will dynamically decide which level should be used according to current loading.
+ *
+ * Get VPE and SOC clocks from PM, and select the appropriate four clock values,
+ * calculate the ratios of adjusting from one clock to another.
+ * The VPE FW can then request the appropriate frequency from the PMFW.
+ */
+int amdgpu_vpe_configure_dpm(struct amdgpu_vpe *vpe)
+{
+	struct amdgpu_device *adev = vpe->ring.adev;
+	uint32_t dpm_ctl;
+
+	if (adev->pm.dpm_enabled) {
+		struct dpm_clocks clock_table = { 0 };
+		struct dpm_clock *VPEClks;
+		struct dpm_clock *SOCClks;
+		uint32_t idx;
+		uint32_t pratio_vmax_vnorm = 0, pratio_vnorm_vmid = 0, pratio_vmid_vmin = 0;
+		uint16_t pratio_vmin_freq = 0, pratio_vmid_freq = 0, pratio_vnorm_freq = 0, pratio_vmax_freq = 0;
+
+		dpm_ctl = RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable));
+		dpm_ctl |= 1; /* DPM enablement */
+		WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable), dpm_ctl);
+
+		/* Get VPECLK and SOCCLK */
+		if (amdgpu_dpm_get_dpm_clock_table(adev, &clock_table)) {
+			dev_dbg(adev->dev, "%s: get clock failed!\n", __func__);
+			goto disable_dpm;
+		}
+
+		SOCClks = clock_table.SocClocks;
+		VPEClks = clock_table.VPEClocks;
+
+		/* vpe dpm only cares 4 levels. */
+		for (idx = 0; idx < VPE_MAX_DPM_LEVEL; idx++) {
+			uint32_t soc_dpm_level;
+			uint32_t min_freq;
+
+			if (idx == 0)
+				soc_dpm_level = 0;
+			else
+				soc_dpm_level = (idx * 2) + 1;
+
+			/* clamp the max level */
+			if (soc_dpm_level > PP_SMU_NUM_VPECLK_DPM_LEVELS - 1)
+				soc_dpm_level = PP_SMU_NUM_VPECLK_DPM_LEVELS - 1;
+
+			min_freq = (SOCClks[soc_dpm_level].Freq < VPEClks[soc_dpm_level].Freq) ?
+				   SOCClks[soc_dpm_level].Freq : VPEClks[soc_dpm_level].Freq;
+
+			switch (idx) {
+			case 0:
+				pratio_vmin_freq = min_freq;
+				break;
+			case 1:
+				pratio_vmid_freq = min_freq;
+				break;
+			case 2:
+				pratio_vnorm_freq = min_freq;
+				break;
+			case 3:
+				pratio_vmax_freq = min_freq;
+				break;
+			default:
+				break;
+			}
+		}
+
+		if (pratio_vmin_freq && pratio_vmid_freq && pratio_vnorm_freq && pratio_vmax_freq) {
+			uint32_t pratio_ctl;
+
+			pratio_vmax_vnorm = (uint32_t)vpe_internal_get_pratio(pratio_vmax_freq, pratio_vnorm_freq);
+			pratio_vnorm_vmid = (uint32_t)vpe_internal_get_pratio(pratio_vnorm_freq, pratio_vmid_freq);
+			pratio_vmid_vmin = (uint32_t)vpe_internal_get_pratio(pratio_vmid_freq, pratio_vmin_freq);
+
+			pratio_ctl = pratio_vmax_vnorm | (pratio_vnorm_vmid << 9) | (pratio_vmid_vmin << 18);
+			WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_pratio), pratio_ctl);		/* PRatio */
+			WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_request_interval), 24000);	/* 1ms, unit=1/24MHz */
+			WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_decision_threshold), 1200000);	/* 50ms */
+			WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_busy_clamp_threshold), 1200000);/* 50ms */
+			WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_idle_clamp_threshold), 1200000);/* 50ms */
+			dev_dbg(adev->dev, "%s: configure vpe dpm pratio done!\n", __func__);
+		} else {
+			dev_dbg(adev->dev, "%s: invalid pratio parameters!\n", __func__);
+			goto disable_dpm;
+		}
+	}
+	return 0;
+
+disable_dpm:
+	dpm_ctl = RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable));
+	dpm_ctl &= 0xfffffffe; /* Disable DPM */
+	WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable), dpm_ctl);
+	dev_dbg(adev->dev, "%s: disable vpe dpm\n", __func__);
+	return 0;
+}
+
 int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev)
 {
 	struct amdgpu_firmware_info ucode = {
@@ -134,6 +307,19 @@ static int vpe_early_init(void *handle)
 	return 0;
 }
 
+static void vpe_idle_work_handler(struct work_struct *work)
+{
+	struct amdgpu_device *adev =
+		container_of(work, struct amdgpu_device, vpe.idle_work.work);
+	unsigned int fences = 0;
+
+	fences += amdgpu_fence_count_emitted(&adev->vpe.ring);
+
+	if (fences == 0)
+		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_GATE);
+	else
+		schedule_delayed_work(&adev->vpe.idle_work, VPE_IDLE_TIMEOUT);
+}
 
 static int vpe_common_init(struct amdgpu_vpe *vpe)
 {
@@ -150,6 +336,9 @@ static int vpe_common_init(struct amdgpu_vpe *vpe)
 		return r;
 	}
 
+	vpe->context_started = false;
+	INIT_DELAYED_WORK(&adev->vpe.idle_work, vpe_idle_work_handler);
+
 	return 0;
 }
 
@@ -219,6 +408,9 @@ static int vpe_hw_fini(void *handle)
 
 	vpe_ring_stop(vpe);
 
+	/* Power off VPE */
+	amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_GATE);
+
 	return 0;
 }
 
@@ -226,6 +418,8 @@ static int vpe_suspend(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	cancel_delayed_work_sync(&adev->vpe.idle_work);
+
 	return vpe_hw_fini(adev);
 }
 
@@ -430,6 +624,21 @@ static int vpe_set_clockgating_state(void *handle,
 static int vpe_set_powergating_state(void *handle,
 				     enum amd_powergating_state state)
 {
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	struct amdgpu_vpe *vpe = &adev->vpe;
+
+	if (!adev->pm.dpm_enabled)
+		dev_err(adev->dev, "Without PM, cannot support powergating\n");
+
+	dev_dbg(adev->dev, "%s: %s!\n", __func__, (state == AMD_PG_STATE_GATE) ? "GATE":"UNGATE");
+
+	if (state == AMD_PG_STATE_GATE) {
+		amdgpu_dpm_enable_vpe(adev, false);
+		vpe->context_started = false;
+	} else {
+		amdgpu_dpm_enable_vpe(adev, true);
+	}
+
 	return 0;
 }
 
@@ -595,6 +804,38 @@ err0:
 	return ret;
 }
 
+static void vpe_ring_begin_use(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_vpe *vpe = &adev->vpe;
+
+	cancel_delayed_work_sync(&adev->vpe.idle_work);
+
+	/* Power on VPE and notify VPE of new context  */
+	if (!vpe->context_started) {
+		uint32_t context_notify;
+
+		/* Power on VPE */
+		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_UNGATE);
+
+		/* Indicates that a job from a new context has been submitted. */
+		context_notify = RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.context_indicator));
+		if ((context_notify & 0x1) == 0)
+			context_notify |= 0x1;
+		else
+			context_notify &= ~(0x1);
+		WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.context_indicator), context_notify);
+		vpe->context_started = true;
+	}
+}
+
+static void vpe_ring_end_use(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	schedule_delayed_work(&adev->vpe.idle_work, VPE_IDLE_TIMEOUT);
+}
+
 static const struct amdgpu_ring_funcs vpe_ring_funcs = {
 	.type = AMDGPU_RING_TYPE_VPE,
 	.align_mask = 0xf,
@@ -625,6 +866,8 @@ static const struct amdgpu_ring_funcs vpe_ring_funcs = {
 	.init_cond_exec = vpe_ring_init_cond_exec,
 	.patch_cond_exec = vpe_ring_patch_cond_exec,
 	.preempt_ib = vpe_ring_preempt_ib,
+	.begin_use = vpe_ring_begin_use,
+	.end_use = vpe_ring_end_use,
 };
 
 static void vpe_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h
index 29d56f7ae4a9..1153ddaea64d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h
@@ -47,6 +47,15 @@ struct vpe_regs {
 	uint32_t queue0_rb_wptr_lo;
 	uint32_t queue0_rb_wptr_hi;
 	uint32_t queue0_preempt;
+
+	uint32_t dpm_enable;
+	uint32_t dpm_pratio;
+	uint32_t dpm_request_interval;
+	uint32_t dpm_decision_threshold;
+	uint32_t dpm_busy_clamp_threshold;
+	uint32_t dpm_idle_clamp_threshold;
+	uint32_t dpm_request_lv;
+	uint32_t context_indicator;
 };
 
 struct amdgpu_vpe {
@@ -63,12 +72,15 @@ struct amdgpu_vpe {
 	struct amdgpu_bo		*cmdbuf_obj;
 	uint64_t			cmdbuf_gpu_addr;
 	uint32_t			*cmdbuf_cpu_addr;
+	struct delayed_work		idle_work;
+	bool				context_started;
 };
 
 int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev);
 int amdgpu_vpe_init_microcode(struct amdgpu_vpe *vpe);
 int amdgpu_vpe_ring_init(struct amdgpu_vpe *vpe);
 int amdgpu_vpe_ring_fini(struct amdgpu_vpe *vpe);
+int amdgpu_vpe_configure_dpm(struct amdgpu_vpe *vpe);
 
 #define vpe_ring_init(vpe) ((vpe)->funcs->ring_init ? (vpe)->funcs->ring_init((vpe)) : 0)
 #define vpe_ring_start(vpe) ((vpe)->funcs->ring_start ? (vpe)->funcs->ring_start((vpe)) : 0)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 08916538a615..8db880244324 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -221,8 +221,23 @@ static struct attribute *amdgpu_vram_mgr_attributes[] = {
 	NULL
 };
 
+static umode_t amdgpu_vram_attrs_is_visible(struct kobject *kobj,
+					    struct attribute *attr, int i)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+
+	if (attr == &dev_attr_mem_info_vram_vendor.attr &&
+	    !adev->gmc.vram_vendor)
+		return 0;
+
+	return attr->mode;
+}
+
 const struct attribute_group amdgpu_vram_mgr_attr_group = {
-	.attrs = amdgpu_vram_mgr_attributes
+	.attrs = amdgpu_vram_mgr_attributes,
+	.is_visible = amdgpu_vram_attrs_is_visible
 };
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index bd20cb3b9819..a6c88f2fe6e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -413,6 +413,38 @@ static ssize_t amdgpu_xgmi_show_num_links(struct device *dev,
 	return sysfs_emit(buf, "%s\n", buf);
 }
 
+static ssize_t amdgpu_xgmi_show_connected_port_num(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+	struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
+	int i, j, size = 0;
+	int current_node;
+	/*
+	 * get the node id in the sysfs for the current socket and show
+	 * it in the port num info output in the sysfs for easy reading.
+	 * it is NOT the one retrieved from xgmi ta.
+	 */
+	for (i = 0; i < top->num_nodes; i++) {
+		if (top->nodes[i].node_id == adev->gmc.xgmi.node_id) {
+			current_node = i;
+			break;
+		}
+	}
+
+	for (i = 0; i < top->num_nodes; i++) {
+		for (j = 0; j < top->nodes[i].num_links; j++)
+			/* node id in sysfs starts from 1 rather than 0 so +1 here */
+			size += sysfs_emit_at(buf, size, "%02x:%02x ->  %02x:%02x\n", current_node + 1,
+					      top->nodes[i].port_num[j].src_xgmi_port_num, i + 1,
+					      top->nodes[i].port_num[j].dst_xgmi_port_num);
+	}
+
+	return size;
+}
+
 #define AMDGPU_XGMI_SET_FICAA(o)	((o) | 0x456801)
 static ssize_t amdgpu_xgmi_show_error(struct device *dev,
 				      struct device_attribute *attr,
@@ -452,6 +484,7 @@ static DEVICE_ATTR(xgmi_physical_id, 0444, amdgpu_xgmi_show_physical_id, NULL);
 static DEVICE_ATTR(xgmi_error, S_IRUGO, amdgpu_xgmi_show_error, NULL);
 static DEVICE_ATTR(xgmi_num_hops, S_IRUGO, amdgpu_xgmi_show_num_hops, NULL);
 static DEVICE_ATTR(xgmi_num_links, S_IRUGO, amdgpu_xgmi_show_num_links, NULL);
+static DEVICE_ATTR(xgmi_port_num, S_IRUGO, amdgpu_xgmi_show_connected_port_num, NULL);
 
 static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev,
 					 struct amdgpu_hive_info *hive)
@@ -487,6 +520,13 @@ static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev,
 	if (ret)
 		pr_err("failed to create xgmi_num_links\n");
 
+	/* Create xgmi port num file if supported */
+	if (adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG) {
+		ret = device_create_file(adev->dev, &dev_attr_xgmi_port_num);
+		if (ret)
+			dev_err(adev->dev, "failed to create xgmi_port_num\n");
+	}
+
 	/* Create sysfs link to hive info folder on the first device */
 	if (hive->kobj.parent != (&adev->dev->kobj)) {
 		ret = sysfs_create_link(&adev->dev->kobj, &hive->kobj,
@@ -517,6 +557,8 @@ remove_file:
 	device_remove_file(adev->dev, &dev_attr_xgmi_error);
 	device_remove_file(adev->dev, &dev_attr_xgmi_num_hops);
 	device_remove_file(adev->dev, &dev_attr_xgmi_num_links);
+	if (adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG)
+		device_remove_file(adev->dev, &dev_attr_xgmi_port_num);
 
 success:
 	return ret;
@@ -533,6 +575,8 @@ static void amdgpu_xgmi_sysfs_rem_dev_info(struct amdgpu_device *adev,
 	device_remove_file(adev->dev, &dev_attr_xgmi_error);
 	device_remove_file(adev->dev, &dev_attr_xgmi_num_hops);
 	device_remove_file(adev->dev, &dev_attr_xgmi_num_links);
+	if (adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG)
+		device_remove_file(adev->dev, &dev_attr_xgmi_port_num);
 
 	if (hive->kobj.parent != (&adev->dev->kobj))
 		sysfs_remove_link(&adev->dev->kobj,"xgmi_hive_info");
@@ -779,6 +823,28 @@ static int amdgpu_xgmi_initialize_hive_get_data_partition(struct amdgpu_hive_inf
 	return 0;
 }
 
+static void amdgpu_xgmi_fill_topology_info(struct amdgpu_device *adev,
+	struct amdgpu_device *peer_adev)
+{
+	struct psp_xgmi_topology_info *top_info = &adev->psp.xgmi_context.top_info;
+	struct psp_xgmi_topology_info *peer_info = &peer_adev->psp.xgmi_context.top_info;
+
+	for (int i = 0; i < peer_info->num_nodes; i++) {
+		if (peer_info->nodes[i].node_id == adev->gmc.xgmi.node_id) {
+			for (int j = 0; j < top_info->num_nodes; j++) {
+				if (top_info->nodes[j].node_id == peer_adev->gmc.xgmi.node_id) {
+					peer_info->nodes[i].num_hops = top_info->nodes[j].num_hops;
+					peer_info->nodes[i].is_sharing_enabled =
+							top_info->nodes[j].is_sharing_enabled;
+					peer_info->nodes[i].num_links =
+							top_info->nodes[j].num_links;
+					return;
+				}
+			}
+		}
+	}
+}
+
 int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
 {
 	struct psp_xgmi_topology_info *top_info;
@@ -853,18 +919,38 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
 				goto exit_unlock;
 		}
 
-		/* get latest topology info for each device from psp */
-		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
-			ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count,
-					&tmp_adev->psp.xgmi_context.top_info, false);
+		if (amdgpu_sriov_vf(adev) &&
+			adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG) {
+			/* only get topology for VF being init if it can support full duplex */
+			ret = psp_xgmi_get_topology_info(&adev->psp, count,
+						&adev->psp.xgmi_context.top_info, false);
 			if (ret) {
-				dev_err(tmp_adev->dev,
+				dev_err(adev->dev,
 					"XGMI: Get topology failure on device %llx, hive %llx, ret %d",
-					tmp_adev->gmc.xgmi.node_id,
-					tmp_adev->gmc.xgmi.hive_id, ret);
-				/* To do : continue with some node failed or disable the whole hive */
+					adev->gmc.xgmi.node_id,
+					adev->gmc.xgmi.hive_id, ret);
+				/* To do: continue with some node failed or disable the whole hive*/
 				goto exit_unlock;
 			}
+
+			/* fill the topology info for peers instead of getting from PSP */
+			list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+				amdgpu_xgmi_fill_topology_info(adev, tmp_adev);
+			}
+		} else {
+			/* get latest topology info for each device from psp */
+			list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+				ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count,
+					&tmp_adev->psp.xgmi_context.top_info, false);
+				if (ret) {
+					dev_err(tmp_adev->dev,
+						"XGMI: Get topology failure on device %llx, hive %llx, ret %d",
+						tmp_adev->gmc.xgmi.node_id,
+						tmp_adev->gmc.xgmi.hive_id, ret);
+					/* To do : continue with some node failed or disable the whole hive */
+					goto exit_unlock;
+				}
+			}
 		}
 
 		/* get topology again for hives that support extended data */
@@ -1227,10 +1313,10 @@ static void __xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, struct a
 
 	switch (xgmi_v6_4_0_pcs_mca_get_error_type(adev, status)) {
 	case AMDGPU_MCA_ERROR_TYPE_UE:
-		amdgpu_ras_error_statistic_ue_count(err_data, mcm_info, 1ULL);
+		amdgpu_ras_error_statistic_ue_count(err_data, mcm_info, NULL, 1ULL);
 		break;
 	case AMDGPU_MCA_ERROR_TYPE_CE:
-		amdgpu_ras_error_statistic_ce_count(err_data, mcm_info, 1ULL);
+		amdgpu_ras_error_statistic_ce_count(err_data, mcm_info, NULL, 1ULL);
 		break;
 	default:
 		break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
index 6cab882e8061..1592c63b3099 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -43,7 +43,6 @@ struct amdgpu_hive_info {
 	} pstate;
 
 	struct amdgpu_reset_domain *reset_domain;
-	uint32_t device_remove_count;
 	atomic_t ras_recovery;
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
index 3f715e7fe1a9..d6f808acfb17 100644
--- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
+++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
@@ -24,6 +24,7 @@
 #include "soc15.h"
 
 #include "soc15_common.h"
+#include "amdgpu_reg_state.h"
 #include "amdgpu_xcp.h"
 #include "gfx_v9_4_3.h"
 #include "gfxhub_v1_2.h"
@@ -656,3 +657,416 @@ int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev)
 
 	return 0;
 }
+
+static void aqua_read_smn(struct amdgpu_device *adev,
+			  struct amdgpu_smn_reg_data *regdata,
+			  uint64_t smn_addr)
+{
+	regdata->addr = smn_addr;
+	regdata->value = RREG32_PCIE(smn_addr);
+}
+
+struct aqua_reg_list {
+	uint64_t start_addr;
+	uint32_t num_regs;
+	uint32_t incrx;
+};
+
+#define DW_ADDR_INCR	4
+
+static void aqua_read_smn_ext(struct amdgpu_device *adev,
+			      struct amdgpu_smn_reg_data *regdata,
+			      uint64_t smn_addr, int i)
+{
+	regdata->addr =
+		smn_addr + adev->asic_funcs->encode_ext_smn_addressing(i);
+	regdata->value = RREG32_PCIE_EXT(regdata->addr);
+}
+
+#define smnreg_0x1A340218	0x1A340218
+#define smnreg_0x1A3402E4	0x1A3402E4
+#define smnreg_0x1A340294	0x1A340294
+#define smreg_0x1A380088	0x1A380088
+
+#define NUM_PCIE_SMN_REGS	14
+
+static struct aqua_reg_list pcie_reg_addrs[] = {
+	{ smnreg_0x1A340218, 1, 0 },
+	{ smnreg_0x1A3402E4, 1, 0 },
+	{ smnreg_0x1A340294, 6, DW_ADDR_INCR },
+	{ smreg_0x1A380088, 6, DW_ADDR_INCR },
+};
+
+static ssize_t aqua_vanjaram_read_pcie_state(struct amdgpu_device *adev,
+					     void *buf, size_t max_size)
+{
+	struct amdgpu_reg_state_pcie_v1_0 *pcie_reg_state;
+	uint32_t start_addr, incrx, num_regs, szbuf;
+	struct amdgpu_regs_pcie_v1_0 *pcie_regs;
+	struct amdgpu_smn_reg_data *reg_data;
+	struct pci_dev *us_pdev, *ds_pdev;
+	int aer_cap, r, n;
+
+	if (!buf || !max_size)
+		return -EINVAL;
+
+	pcie_reg_state = (struct amdgpu_reg_state_pcie_v1_0 *)buf;
+
+	szbuf = sizeof(*pcie_reg_state) +
+		amdgpu_reginst_size(1, sizeof(*pcie_regs), NUM_PCIE_SMN_REGS);
+	/* Only one instance of pcie regs */
+	if (max_size < szbuf)
+		return -EOVERFLOW;
+
+	pcie_regs = (struct amdgpu_regs_pcie_v1_0 *)((uint8_t *)buf +
+						     sizeof(*pcie_reg_state));
+	pcie_regs->inst_header.instance = 0;
+	pcie_regs->inst_header.state = AMDGPU_INST_S_OK;
+	pcie_regs->inst_header.num_smn_regs = NUM_PCIE_SMN_REGS;
+
+	reg_data = pcie_regs->smn_reg_values;
+
+	for (r = 0; r < ARRAY_SIZE(pcie_reg_addrs); r++) {
+		start_addr = pcie_reg_addrs[r].start_addr;
+		incrx = pcie_reg_addrs[r].incrx;
+		num_regs = pcie_reg_addrs[r].num_regs;
+		for (n = 0; n < num_regs; n++) {
+			aqua_read_smn(adev, reg_data, start_addr + n * incrx);
+			++reg_data;
+		}
+	}
+
+	ds_pdev = pci_upstream_bridge(adev->pdev);
+	us_pdev = pci_upstream_bridge(ds_pdev);
+
+	pcie_capability_read_word(us_pdev, PCI_EXP_DEVSTA,
+				  &pcie_regs->device_status);
+	pcie_capability_read_word(us_pdev, PCI_EXP_LNKSTA,
+				  &pcie_regs->link_status);
+
+	aer_cap = pci_find_ext_capability(us_pdev, PCI_EXT_CAP_ID_ERR);
+	if (aer_cap) {
+		pci_read_config_dword(us_pdev, aer_cap + PCI_ERR_COR_STATUS,
+				      &pcie_regs->pcie_corr_err_status);
+		pci_read_config_dword(us_pdev, aer_cap + PCI_ERR_UNCOR_STATUS,
+				      &pcie_regs->pcie_uncorr_err_status);
+	}
+
+	pci_read_config_dword(us_pdev, PCI_PRIMARY_BUS,
+			      &pcie_regs->sub_bus_number_latency);
+
+	pcie_reg_state->common_header.structure_size = szbuf;
+	pcie_reg_state->common_header.format_revision = 1;
+	pcie_reg_state->common_header.content_revision = 0;
+	pcie_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_PCIE;
+	pcie_reg_state->common_header.num_instances = 1;
+
+	return pcie_reg_state->common_header.structure_size;
+}
+
+#define smnreg_0x11A00050	0x11A00050
+#define smnreg_0x11A00180	0x11A00180
+#define smnreg_0x11A00070	0x11A00070
+#define smnreg_0x11A00200	0x11A00200
+#define smnreg_0x11A0020C	0x11A0020C
+#define smnreg_0x11A00210	0x11A00210
+#define smnreg_0x11A00108	0x11A00108
+
+#define XGMI_LINK_REG(smnreg, l) ((smnreg) | (l << 20))
+
+#define NUM_XGMI_SMN_REGS 25
+
+static struct aqua_reg_list xgmi_reg_addrs[] = {
+	{ smnreg_0x11A00050, 1, 0 },
+	{ smnreg_0x11A00180, 16, DW_ADDR_INCR },
+	{ smnreg_0x11A00070, 4, DW_ADDR_INCR },
+	{ smnreg_0x11A00200, 1, 0 },
+	{ smnreg_0x11A0020C, 1, 0 },
+	{ smnreg_0x11A00210, 1, 0 },
+	{ smnreg_0x11A00108, 1, 0 },
+};
+
+static ssize_t aqua_vanjaram_read_xgmi_state(struct amdgpu_device *adev,
+					     void *buf, size_t max_size)
+{
+	struct amdgpu_reg_state_xgmi_v1_0 *xgmi_reg_state;
+	uint32_t start_addr, incrx, num_regs, szbuf;
+	struct amdgpu_regs_xgmi_v1_0 *xgmi_regs;
+	struct amdgpu_smn_reg_data *reg_data;
+	const int max_xgmi_instances = 8;
+	int inst = 0, i, j, r, n;
+	const int xgmi_inst = 2;
+	void *p;
+
+	if (!buf || !max_size)
+		return -EINVAL;
+
+	xgmi_reg_state = (struct amdgpu_reg_state_xgmi_v1_0 *)buf;
+
+	szbuf = sizeof(*xgmi_reg_state) +
+		amdgpu_reginst_size(max_xgmi_instances, sizeof(*xgmi_regs),
+				    NUM_XGMI_SMN_REGS);
+	/* Only one instance of pcie regs */
+	if (max_size < szbuf)
+		return -EOVERFLOW;
+
+	p = &xgmi_reg_state->xgmi_state_regs[0];
+	for_each_inst(i, adev->aid_mask) {
+		for (j = 0; j < xgmi_inst; ++j) {
+			xgmi_regs = (struct amdgpu_regs_xgmi_v1_0 *)p;
+			xgmi_regs->inst_header.instance = inst++;
+
+			xgmi_regs->inst_header.state = AMDGPU_INST_S_OK;
+			xgmi_regs->inst_header.num_smn_regs = NUM_XGMI_SMN_REGS;
+
+			reg_data = xgmi_regs->smn_reg_values;
+
+			for (r = 0; r < ARRAY_SIZE(xgmi_reg_addrs); r++) {
+				start_addr = xgmi_reg_addrs[r].start_addr;
+				incrx = xgmi_reg_addrs[r].incrx;
+				num_regs = xgmi_reg_addrs[r].num_regs;
+
+				for (n = 0; n < num_regs; n++) {
+					aqua_read_smn_ext(
+						adev, reg_data,
+						XGMI_LINK_REG(start_addr, j) +
+							n * incrx,
+						i);
+					++reg_data;
+				}
+			}
+			p = reg_data;
+		}
+	}
+
+	xgmi_reg_state->common_header.structure_size = szbuf;
+	xgmi_reg_state->common_header.format_revision = 1;
+	xgmi_reg_state->common_header.content_revision = 0;
+	xgmi_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_XGMI;
+	xgmi_reg_state->common_header.num_instances = max_xgmi_instances;
+
+	return xgmi_reg_state->common_header.structure_size;
+}
+
+#define smnreg_0x11C00070	0x11C00070
+#define smnreg_0x11C00210	0x11C00210
+
+static struct aqua_reg_list wafl_reg_addrs[] = {
+	{ smnreg_0x11C00070, 4, DW_ADDR_INCR },
+	{ smnreg_0x11C00210, 1, 0 },
+};
+
+#define WAFL_LINK_REG(smnreg, l) ((smnreg) | (l << 20))
+
+#define NUM_WAFL_SMN_REGS 5
+
+static ssize_t aqua_vanjaram_read_wafl_state(struct amdgpu_device *adev,
+					     void *buf, size_t max_size)
+{
+	struct amdgpu_reg_state_wafl_v1_0 *wafl_reg_state;
+	uint32_t start_addr, incrx, num_regs, szbuf;
+	struct amdgpu_regs_wafl_v1_0 *wafl_regs;
+	struct amdgpu_smn_reg_data *reg_data;
+	const int max_wafl_instances = 8;
+	int inst = 0, i, j, r, n;
+	const int wafl_inst = 2;
+	void *p;
+
+	if (!buf || !max_size)
+		return -EINVAL;
+
+	wafl_reg_state = (struct amdgpu_reg_state_wafl_v1_0 *)buf;
+
+	szbuf = sizeof(*wafl_reg_state) +
+		amdgpu_reginst_size(max_wafl_instances, sizeof(*wafl_regs),
+				    NUM_WAFL_SMN_REGS);
+
+	if (max_size < szbuf)
+		return -EOVERFLOW;
+
+	p = &wafl_reg_state->wafl_state_regs[0];
+	for_each_inst(i, adev->aid_mask) {
+		for (j = 0; j < wafl_inst; ++j) {
+			wafl_regs = (struct amdgpu_regs_wafl_v1_0 *)p;
+			wafl_regs->inst_header.instance = inst++;
+
+			wafl_regs->inst_header.state = AMDGPU_INST_S_OK;
+			wafl_regs->inst_header.num_smn_regs = NUM_WAFL_SMN_REGS;
+
+			reg_data = wafl_regs->smn_reg_values;
+
+			for (r = 0; r < ARRAY_SIZE(wafl_reg_addrs); r++) {
+				start_addr = wafl_reg_addrs[r].start_addr;
+				incrx = wafl_reg_addrs[r].incrx;
+				num_regs = wafl_reg_addrs[r].num_regs;
+				for (n = 0; n < num_regs; n++) {
+					aqua_read_smn_ext(
+						adev, reg_data,
+						WAFL_LINK_REG(start_addr, j) +
+							n * incrx,
+						i);
+					++reg_data;
+				}
+			}
+			p = reg_data;
+		}
+	}
+
+	wafl_reg_state->common_header.structure_size = szbuf;
+	wafl_reg_state->common_header.format_revision = 1;
+	wafl_reg_state->common_header.content_revision = 0;
+	wafl_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_WAFL;
+	wafl_reg_state->common_header.num_instances = max_wafl_instances;
+
+	return wafl_reg_state->common_header.structure_size;
+}
+
+#define smnreg_0x1B311060 0x1B311060
+#define smnreg_0x1B411060 0x1B411060
+#define smnreg_0x1B511060 0x1B511060
+#define smnreg_0x1B611060 0x1B611060
+
+#define smnreg_0x1C307120 0x1C307120
+#define smnreg_0x1C317120 0x1C317120
+
+#define smnreg_0x1C320830 0x1C320830
+#define smnreg_0x1C380830 0x1C380830
+#define smnreg_0x1C3D0830 0x1C3D0830
+#define smnreg_0x1C420830 0x1C420830
+
+#define smnreg_0x1C320100 0x1C320100
+#define smnreg_0x1C380100 0x1C380100
+#define smnreg_0x1C3D0100 0x1C3D0100
+#define smnreg_0x1C420100 0x1C420100
+
+#define smnreg_0x1B310500 0x1B310500
+#define smnreg_0x1C300400 0x1C300400
+
+#define USR_CAKE_INCR 0x11000
+#define USR_LINK_INCR 0x100000
+#define USR_CP_INCR 0x10000
+
+#define NUM_USR_SMN_REGS	20
+
+struct aqua_reg_list usr_reg_addrs[] = {
+	{ smnreg_0x1B311060, 4, DW_ADDR_INCR },
+	{ smnreg_0x1B411060, 4, DW_ADDR_INCR },
+	{ smnreg_0x1B511060, 4, DW_ADDR_INCR },
+	{ smnreg_0x1B611060, 4, DW_ADDR_INCR },
+	{ smnreg_0x1C307120, 2, DW_ADDR_INCR },
+	{ smnreg_0x1C317120, 2, DW_ADDR_INCR },
+};
+
+#define NUM_USR1_SMN_REGS	46
+struct aqua_reg_list usr1_reg_addrs[] = {
+	{ smnreg_0x1C320830, 6, USR_CAKE_INCR },
+	{ smnreg_0x1C380830, 5, USR_CAKE_INCR },
+	{ smnreg_0x1C3D0830, 5, USR_CAKE_INCR },
+	{ smnreg_0x1C420830, 4, USR_CAKE_INCR },
+	{ smnreg_0x1C320100, 6, USR_CAKE_INCR },
+	{ smnreg_0x1C380100, 5, USR_CAKE_INCR },
+	{ smnreg_0x1C3D0100, 5, USR_CAKE_INCR },
+	{ smnreg_0x1C420100, 4, USR_CAKE_INCR },
+	{ smnreg_0x1B310500, 4, USR_LINK_INCR },
+	{ smnreg_0x1C300400, 2, USR_CP_INCR },
+};
+
+static ssize_t aqua_vanjaram_read_usr_state(struct amdgpu_device *adev,
+					    void *buf, size_t max_size,
+					    int reg_state)
+{
+	uint32_t start_addr, incrx, num_regs, szbuf, num_smn;
+	struct amdgpu_reg_state_usr_v1_0 *usr_reg_state;
+	struct amdgpu_regs_usr_v1_0 *usr_regs;
+	struct amdgpu_smn_reg_data *reg_data;
+	const int max_usr_instances = 4;
+	struct aqua_reg_list *reg_addrs;
+	int inst = 0, i, n, r, arr_size;
+	void *p;
+
+	if (!buf || !max_size)
+		return -EINVAL;
+
+	switch (reg_state) {
+	case AMDGPU_REG_STATE_TYPE_USR:
+		arr_size = ARRAY_SIZE(usr_reg_addrs);
+		reg_addrs = usr_reg_addrs;
+		num_smn = NUM_USR_SMN_REGS;
+		break;
+	case AMDGPU_REG_STATE_TYPE_USR_1:
+		arr_size = ARRAY_SIZE(usr1_reg_addrs);
+		reg_addrs = usr1_reg_addrs;
+		num_smn = NUM_USR1_SMN_REGS;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	usr_reg_state = (struct amdgpu_reg_state_usr_v1_0 *)buf;
+
+	szbuf = sizeof(*usr_reg_state) + amdgpu_reginst_size(max_usr_instances,
+							     sizeof(*usr_regs),
+							     num_smn);
+	if (max_size < szbuf)
+		return -EOVERFLOW;
+
+	p = &usr_reg_state->usr_state_regs[0];
+	for_each_inst(i, adev->aid_mask) {
+		usr_regs = (struct amdgpu_regs_usr_v1_0 *)p;
+		usr_regs->inst_header.instance = inst++;
+		usr_regs->inst_header.state = AMDGPU_INST_S_OK;
+		usr_regs->inst_header.num_smn_regs = num_smn;
+		reg_data = usr_regs->smn_reg_values;
+
+		for (r = 0; r < arr_size; r++) {
+			start_addr = reg_addrs[r].start_addr;
+			incrx = reg_addrs[r].incrx;
+			num_regs = reg_addrs[r].num_regs;
+			for (n = 0; n < num_regs; n++) {
+				aqua_read_smn_ext(adev, reg_data,
+						  start_addr + n * incrx, i);
+				reg_data++;
+			}
+		}
+		p = reg_data;
+	}
+
+	usr_reg_state->common_header.structure_size = szbuf;
+	usr_reg_state->common_header.format_revision = 1;
+	usr_reg_state->common_header.content_revision = 0;
+	usr_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_USR;
+	usr_reg_state->common_header.num_instances = max_usr_instances;
+
+	return usr_reg_state->common_header.structure_size;
+}
+
+ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev,
+				    enum amdgpu_reg_state reg_state, void *buf,
+				    size_t max_size)
+{
+	ssize_t size;
+
+	switch (reg_state) {
+	case AMDGPU_REG_STATE_TYPE_PCIE:
+		size = aqua_vanjaram_read_pcie_state(adev, buf, max_size);
+		break;
+	case AMDGPU_REG_STATE_TYPE_XGMI:
+		size = aqua_vanjaram_read_xgmi_state(adev, buf, max_size);
+		break;
+	case AMDGPU_REG_STATE_TYPE_WAFL:
+		size = aqua_vanjaram_read_wafl_state(adev, buf, max_size);
+		break;
+	case AMDGPU_REG_STATE_TYPE_USR:
+		size = aqua_vanjaram_read_usr_state(adev, buf, max_size,
+						    AMDGPU_REG_STATE_TYPE_USR);
+		break;
+	case AMDGPU_REG_STATE_TYPE_USR_1:
+		size = aqua_vanjaram_read_usr_state(
+			adev, buf, max_size, AMDGPU_REG_STATE_TYPE_USR_1);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return size;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c
index f0737fb3a999..d1bba9c64e16 100644
--- a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c
@@ -30,6 +30,8 @@
 
 #define regATHUB_MISC_CNTL_V3_0_1			0x00d7
 #define regATHUB_MISC_CNTL_V3_0_1_BASE_IDX		0
+#define regATHUB_MISC_CNTL_V3_3_0			0x00d8
+#define regATHUB_MISC_CNTL_V3_3_0_BASE_IDX		0
 
 
 static uint32_t athub_v3_0_get_cg_cntl(struct amdgpu_device *adev)
@@ -40,6 +42,9 @@ static uint32_t athub_v3_0_get_cg_cntl(struct amdgpu_device *adev)
 	case IP_VERSION(3, 0, 1):
 		data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1);
 		break;
+	case IP_VERSION(3, 3, 0):
+		data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_3_0);
+		break;
 	default:
 		data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL);
 		break;
@@ -53,6 +58,9 @@ static void athub_v3_0_set_cg_cntl(struct amdgpu_device *adev, uint32_t data)
 	case IP_VERSION(3, 0, 1):
 		WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1, data);
 		break;
+	case IP_VERSION(3, 3, 0):
+		WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_3_0, data);
+		break;
 	default:
 		WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data);
 		break;
diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c
index 2c221000782c..a33e890c70d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/atom.c
+++ b/drivers/gpu/drm/amd/amdgpu/atom.c
@@ -395,7 +395,6 @@ static void atom_skip_src_int(atom_exec_context *ctx, uint8_t attr, int *ptr)
 			(*ptr)++;
 			return;
 		}
-		return;
 	}
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
index 3ee219aa2891..7672abe6c140 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
@@ -28,6 +28,7 @@
 
 #include <acpi/video.h>
 
+#include <drm/drm_edid.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
 #include "amdgpu_connectors.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
index 6f7c031dd197..f24e34dc33d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
@@ -204,6 +204,12 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev,
 		tmp = RREG32(mmIH_RB_CNTL);
 		tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
 		WREG32(mmIH_RB_CNTL, tmp);
+
+		/* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+		 * can be detected.
+		 */
+		tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
+		WREG32(mmIH_RB_CNTL, tmp);
 	}
 	return (wptr & ih->ptr_mask);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
index b8c47e0cf37a..c19681492efa 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
@@ -216,6 +216,11 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev,
 	tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
 	WREG32(mmIH_RB_CNTL, tmp);
 
+	/* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+	 * can be detected.
+	 */
+	tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+	WREG32(mmIH_RB_CNTL, tmp);
 
 out:
 	return (wptr & ih->ptr_mask);
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index bb666cb7522e..587ee632a3b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -21,6 +21,7 @@
  *
  */
 
+#include <drm/drm_edid.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_modeset_helper.h>
 #include <drm/drm_modeset_helper_vtables.h>
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index 7af277f61cca..f22ec27365bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -21,6 +21,7 @@
  *
  */
 
+#include <drm/drm_edid.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_modeset_helper.h>
 #include <drm/drm_modeset_helper_vtables.h>
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index 143efc37a17f..4dbe9b3259b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -23,6 +23,7 @@
 
 #include <linux/pci.h>
 
+#include <drm/drm_edid.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_modeset_helper.h>
 #include <drm/drm_modeset_helper_vtables.h>
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index adeddfb7ff12..05bcce23385e 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -21,6 +21,7 @@
  *
  */
 
+#include <drm/drm_edid.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_modeset_helper.h>
 #include <drm/drm_modeset_helper_vtables.h>
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index c8a3bf01743f..dcdecb18b230 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -3996,16 +3996,13 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
 
 	if (!amdgpu_sriov_vf(adev)) {
 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix);
-		err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
-		/* don't check this.  There are apparently firmwares in the wild with
-		 * incorrect size in the header
-		 */
-		if (err == -ENODEV)
-			goto out;
+		err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
 		if (err)
-			dev_dbg(adev->dev,
-				"gfx10: amdgpu_ucode_request() failed \"%s\"\n",
-				fw_name);
+			goto out;
+
+		/* don't validate this firmware. There are apparently firmwares
+		 * in the wild with incorrect size in the header
+		 */
 		rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
 		version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
 		version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
@@ -4030,8 +4027,6 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
 		err = 0;
 		adev->gfx.mec2_fw = NULL;
 	}
-	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
-	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
 
 	gfx_v10_0_check_fw_write_wait(adev);
 out:
@@ -6592,8 +6587,9 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
 #ifdef __BIG_ENDIAN
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
 #endif
-	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
-	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
+	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
+	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH,
+			    prop->allow_tunneling);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
 	mqd->cp_hqd_pq_control = tmp;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 0c6133cc5e57..4f3bfdc75b37 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -67,6 +67,7 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin");
@@ -89,6 +90,10 @@ MODULE_FIRMWARE("amdgpu/gc_11_5_0_me.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_5_0_mec.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_5_0_rlc.bin");
 
+static const struct soc15_reg_golden golden_settings_gc_11_0[] = {
+	SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000)
+};
+
 static const struct soc15_reg_golden golden_settings_gc_11_0_1[] =
 {
 	SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010),
@@ -102,23 +107,6 @@ static const struct soc15_reg_golden golden_settings_gc_11_0_1[] =
 	SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a)
 };
 
-static const struct soc15_reg_golden golden_settings_gc_11_5_0[] = {
-	SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_DEBUG5, 0xffffffff, 0x00000800),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, regGB_ADDR_CONFIG, 0x0c1807ff, 0x00000242),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, regGCR_GENERAL_CNTL, 0x1ff1ffff, 0x00000500),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL, 0xffffffff, 0xf37fff3f),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xfffffffb, 0x00f40188),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL4, 0xf0ffffff, 0x8000b007),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf1ffffff, 0x00880007),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, regPC_CONFIG_CNTL_1, 0xffffffff, 0x00010000),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL2, 0x007f0000, 0x00000000),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xffcfffff, 0x0000200a),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, regUTCL1_CTRL_2, 0xffffffff, 0x0000048f)
-};
-
 #define DEFAULT_SH_MEM_CONFIG \
 	((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
 	 (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
@@ -289,6 +277,9 @@ static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
 
 static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
 {
+	if (amdgpu_sriov_vf(adev))
+		return;
+
 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
 	case IP_VERSION(11, 0, 1):
 	case IP_VERSION(11, 0, 4):
@@ -296,14 +287,13 @@ static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
 						golden_settings_gc_11_0_1,
 						(const u32)ARRAY_SIZE(golden_settings_gc_11_0_1));
 		break;
-	case IP_VERSION(11, 5, 0):
-		soc15_program_register_sequence(adev,
-						golden_settings_gc_11_5_0,
-						(const u32)ARRAY_SIZE(golden_settings_gc_11_5_0));
-		break;
 	default:
 		break;
 	}
+	soc15_program_register_sequence(adev,
+					golden_settings_gc_11_0,
+					(const u32)ARRAY_SIZE(golden_settings_gc_11_0));
+
 }
 
 static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
@@ -419,7 +409,7 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 		adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
 		cpu_ptr = &adev->wb.wb[index];
 
-		r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib);
+		r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
 		if (r) {
 			DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
 			goto err1;
@@ -556,7 +546,11 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
 	}
 
 	if (!amdgpu_sriov_vf(adev)) {
-		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix);
+		if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 0) &&
+		    adev->pdev->revision == 0xCE)
+			snprintf(fw_name, sizeof(fw_name), "amdgpu/gc_11_0_0_rlc_1.bin");
+		else
+			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix);
 		err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
 		if (err)
 			goto out;
@@ -3830,8 +3824,9 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
 			    (order_base_2(prop->queue_size / 4) - 1));
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
 			    (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
-	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
-	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
+	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
+	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH,
+			    prop->allow_tunneling);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
 	mqd->cp_hqd_pq_control = tmp;
@@ -4457,11 +4452,43 @@ static int gfx_v11_0_wait_for_idle(void *handle)
 	return -ETIMEDOUT;
 }
 
+static int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev,
+					     int req)
+{
+	u32 i, tmp, val;
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		/* Request with MeId=2, PipeId=0 */
+		tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req);
+		tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4);
+		WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp);
+
+		val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX);
+		if (req) {
+			if (val == tmp)
+				break;
+		} else {
+			tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX,
+					    REQUEST, 1);
+
+			/* unlocked or locked by firmware */
+			if (val != tmp)
+				break;
+		}
+		udelay(1);
+	}
+
+	if (i >= adev->usec_timeout)
+		return -EINVAL;
+
+	return 0;
+}
+
 static int gfx_v11_0_soft_reset(void *handle)
 {
 	u32 grbm_soft_reset = 0;
 	u32 tmp;
-	int i, j, k;
+	int r, i, j, k;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
@@ -4501,6 +4528,13 @@ static int gfx_v11_0_soft_reset(void *handle)
 		}
 	}
 
+	/* Try to acquire the gfx mutex before access to CP_VMID_RESET */
+	r = gfx_v11_0_request_gfx_index_mutex(adev, 1);
+	if (r) {
+		DRM_ERROR("Failed to acquire the gfx mutex during soft reset\n");
+		return r;
+	}
+
 	WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe);
 
 	// Read CP_VMID_RESET register three times.
@@ -4509,6 +4543,13 @@ static int gfx_v11_0_soft_reset(void *handle)
 	RREG32_SOC15(GC, 0, regCP_VMID_RESET);
 	RREG32_SOC15(GC, 0, regCP_VMID_RESET);
 
+	/* release the gfx mutex */
+	r = gfx_v11_0_request_gfx_index_mutex(adev, 0);
+	if (r) {
+		DRM_ERROR("Failed to release the gfx mutex during soft reset\n");
+		return r;
+	}
+
 	for (i = 0; i < adev->usec_timeout; i++) {
 		if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) &&
 		    !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE))
@@ -6320,6 +6361,9 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
 	mutex_lock(&adev->grbm_idx_mutex);
 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+			bitmap = i * adev->gfx.config.max_sh_per_se + j;
+			if (!((gfx_v11_0_get_sa_active_bitmap(adev) >> bitmap) & 1))
+				continue;
 			mask = 1;
 			counter = 0;
 			gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 885ebd703260..1943beb135c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -883,8 +883,8 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	gpu_addr = adev->wb.gpu_addr + (index * 4);
 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
 	memset(&ib, 0, sizeof(ib));
-	r = amdgpu_ib_get(adev, NULL, 16,
-					AMDGPU_IB_POOL_DIRECT, &ib);
+
+	r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
 	if (r)
 		goto err1;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index e3ff6e46f3f7..3bc6943365a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1039,8 +1039,8 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	gpu_addr = adev->wb.gpu_addr + (index * 4);
 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
 	memset(&ib, 0, sizeof(ib));
-	r = amdgpu_ib_get(adev, NULL, 16,
-					AMDGPU_IB_POOL_DIRECT, &ib);
+
+	r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
 	if (r)
 		goto err1;
 
@@ -3034,6 +3034,14 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
 
 	gfx_v9_0_cp_gfx_enable(adev, true);
 
+	/* Now only limit the quirk on the APU gfx9 series and already
+	 * confirmed that the APU gfx10/gfx11 needn't such update.
+	 */
+	if (adev->flags & AMD_IS_APU &&
+			adev->in_s3 && !adev->suspend_complete) {
+		DRM_INFO(" Will skip the CSB packet resubmit\n");
+		return 0;
+	}
 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
 	if (r) {
 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 40d06d32bb74..131cddbdda0d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -297,8 +297,8 @@ static int gfx_v9_4_3_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	gpu_addr = adev->wb.gpu_addr + (index * 4);
 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
 	memset(&ib, 0, sizeof(ib));
-	r = amdgpu_ib_get(adev, NULL, 16,
-			  AMDGPU_IB_POOL_DIRECT, &ib);
+
+	r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
 	if (r)
 		goto err1;
 
@@ -3828,8 +3828,8 @@ static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev,
 	/* the caller should make sure initialize value of
 	 * err_data->ue_count and err_data->ce_count
 	 */
-	amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count);
-	amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count);
+	amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count);
+	amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, ce_count);
 }
 
 static void gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev,
@@ -3882,150 +3882,6 @@ static void gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev,
 	mutex_unlock(&adev->grbm_idx_mutex);
 }
 
-static void gfx_v9_4_3_inst_query_utc_err_status(struct amdgpu_device *adev,
-					int xcc_id)
-{
-	uint32_t data;
-
-	data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regUTCL2_MEM_ECC_STATUS);
-	if (data) {
-		dev_warn(adev->dev, "GFX UTCL2 Mem Ecc Status: 0x%x!\n", data);
-		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regUTCL2_MEM_ECC_STATUS, 0x3);
-	}
-
-	data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_MEM_ECC_STATUS);
-	if (data) {
-		dev_warn(adev->dev, "GFX VML2 Mem Ecc Status: 0x%x!\n", data);
-		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_MEM_ECC_STATUS, 0x3);
-	}
-
-	data = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
-				regVML2_WALKER_MEM_ECC_STATUS);
-	if (data) {
-		dev_warn(adev->dev, "GFX VML2 Walker Mem Ecc Status: 0x%x!\n", data);
-		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_WALKER_MEM_ECC_STATUS,
-				0x3);
-	}
-}
-
-static void gfx_v9_4_3_log_cu_timeout_status(struct amdgpu_device *adev,
-					uint32_t status, int xcc_id)
-{
-	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
-	uint32_t i, simd, wave;
-	uint32_t wave_status;
-	uint32_t wave_pc_lo, wave_pc_hi;
-	uint32_t wave_exec_lo, wave_exec_hi;
-	uint32_t wave_inst_dw0, wave_inst_dw1;
-	uint32_t wave_ib_sts;
-
-	for (i = 0; i < 32; i++) {
-		if (!((i << 1) & status))
-			continue;
-
-		simd = i / cu_info->max_waves_per_simd;
-		wave = i % cu_info->max_waves_per_simd;
-
-		wave_status = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_STATUS);
-		wave_pc_lo = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_LO);
-		wave_pc_hi = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_HI);
-		wave_exec_lo =
-			wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_LO);
-		wave_exec_hi =
-			wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_HI);
-		wave_inst_dw0 =
-			wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW0);
-		wave_inst_dw1 =
-			wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW1);
-		wave_ib_sts = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_IB_STS);
-
-		dev_info(
-			adev->dev,
-			"\t SIMD %d, Wave %d: status 0x%x, pc 0x%llx, exec 0x%llx, inst 0x%llx, ib_sts 0x%x\n",
-			simd, wave, wave_status,
-			((uint64_t)wave_pc_hi << 32 | wave_pc_lo),
-			((uint64_t)wave_exec_hi << 32 | wave_exec_lo),
-			((uint64_t)wave_inst_dw1 << 32 | wave_inst_dw0),
-			wave_ib_sts);
-	}
-}
-
-static void gfx_v9_4_3_inst_query_sq_timeout_status(struct amdgpu_device *adev,
-					int xcc_id)
-{
-	uint32_t se_idx, sh_idx, cu_idx;
-	uint32_t status;
-
-	mutex_lock(&adev->grbm_idx_mutex);
-	for (se_idx = 0; se_idx < adev->gfx.config.max_shader_engines; se_idx++) {
-		for (sh_idx = 0; sh_idx < adev->gfx.config.max_sh_per_se; sh_idx++) {
-			for (cu_idx = 0; cu_idx < adev->gfx.config.max_cu_per_sh; cu_idx++) {
-				gfx_v9_4_3_xcc_select_se_sh(adev, se_idx, sh_idx,
-							cu_idx, xcc_id);
-				status = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
-						      regSQ_TIMEOUT_STATUS);
-				if (status != 0) {
-					dev_info(
-						adev->dev,
-						"GFX Watchdog Timeout: SE %d, SH %d, CU %d\n",
-						se_idx, sh_idx, cu_idx);
-					gfx_v9_4_3_log_cu_timeout_status(
-						adev, status, xcc_id);
-				}
-				/* clear old status */
-				WREG32_SOC15(GC, GET_INST(GC, xcc_id),
-						regSQ_TIMEOUT_STATUS, 0);
-			}
-		}
-	}
-	gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
-			xcc_id);
-	mutex_unlock(&adev->grbm_idx_mutex);
-}
-
-static void gfx_v9_4_3_inst_query_ras_err_status(struct amdgpu_device *adev,
-					void *ras_error_status, int xcc_id)
-{
-	gfx_v9_4_3_inst_query_utc_err_status(adev, xcc_id);
-	gfx_v9_4_3_inst_query_sq_timeout_status(adev, xcc_id);
-}
-
-static void gfx_v9_4_3_inst_reset_utc_err_status(struct amdgpu_device *adev,
-					int xcc_id)
-{
-	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regUTCL2_MEM_ECC_STATUS, 0x3);
-	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_MEM_ECC_STATUS, 0x3);
-	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_WALKER_MEM_ECC_STATUS, 0x3);
-}
-
-static void gfx_v9_4_3_inst_reset_sq_timeout_status(struct amdgpu_device *adev,
-					int xcc_id)
-{
-	uint32_t se_idx, sh_idx, cu_idx;
-
-	mutex_lock(&adev->grbm_idx_mutex);
-	for (se_idx = 0; se_idx < adev->gfx.config.max_shader_engines; se_idx++) {
-		for (sh_idx = 0; sh_idx < adev->gfx.config.max_sh_per_se; sh_idx++) {
-			for (cu_idx = 0; cu_idx < adev->gfx.config.max_cu_per_sh; cu_idx++) {
-				gfx_v9_4_3_xcc_select_se_sh(adev, se_idx, sh_idx,
-							cu_idx, xcc_id);
-				WREG32_SOC15(GC, GET_INST(GC, xcc_id),
-						regSQ_TIMEOUT_STATUS, 0);
-			}
-		}
-	}
-	gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
-			xcc_id);
-	mutex_unlock(&adev->grbm_idx_mutex);
-}
-
-static void gfx_v9_4_3_inst_reset_ras_err_status(struct amdgpu_device *adev,
-					void *ras_error_status, int xcc_id)
-{
-	gfx_v9_4_3_inst_reset_utc_err_status(adev, xcc_id);
-	gfx_v9_4_3_inst_reset_sq_timeout_status(adev, xcc_id);
-}
-
 static void gfx_v9_4_3_inst_enable_watchdog_timer(struct amdgpu_device *adev,
 					void *ras_error_status, int xcc_id)
 {
@@ -4067,16 +3923,6 @@ static void gfx_v9_4_3_reset_ras_error_count(struct amdgpu_device *adev)
 	amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_reset_ras_err_count);
 }
 
-static void gfx_v9_4_3_query_ras_error_status(struct amdgpu_device *adev)
-{
-	amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_query_ras_err_status);
-}
-
-static void gfx_v9_4_3_reset_ras_error_status(struct amdgpu_device *adev)
-{
-	amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_reset_ras_err_status);
-}
-
 static void gfx_v9_4_3_enable_watchdog_timer(struct amdgpu_device *adev)
 {
 	amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_enable_watchdog_timer);
@@ -4394,8 +4240,6 @@ struct amdgpu_xcp_ip_funcs gfx_v9_4_3_xcp_funcs = {
 struct amdgpu_ras_block_hw_ops  gfx_v9_4_3_ras_ops = {
 	.query_ras_error_count = &gfx_v9_4_3_query_ras_error_count,
 	.reset_ras_error_count = &gfx_v9_4_3_reset_ras_error_count,
-	.query_ras_error_status = &gfx_v9_4_3_query_ras_error_status,
-	.reset_ras_error_status = &gfx_v9_4_3_reset_ras_error_status,
 };
 
 struct amdgpu_gfx_ras gfx_v9_4_3_ras = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index 53a2ba5fcf4b..22175da0e16a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -102,7 +102,9 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
 		WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
 			min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
 
-		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+		if (adev->apu_flags & (AMD_APU_IS_RAVEN2 |
+				       AMD_APU_IS_RENOIR |
+				       AMD_APU_IS_GREEN_SARDINE))
 		       /*
 			* Raven2 has a HW issue that it is unable to use the
 			* vram which is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR.
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
index 55423ff1bb49..49aecdcee006 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
@@ -139,7 +139,9 @@ gfxhub_v1_2_xcc_init_system_aperture_regs(struct amdgpu_device *adev,
 			WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_LOW_ADDR,
 				min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
 
-			if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+			if (adev->apu_flags & (AMD_APU_IS_RAVEN2 |
+					       AMD_APU_IS_RENOIR |
+					       AMD_APU_IS_GREEN_SARDINE))
 			       /*
 				* Raven2 has a HW issue that it is unable to use the
 				* vram which is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR.
@@ -454,10 +456,12 @@ static void gfxhub_v1_2_xcc_gart_disable(struct amdgpu_device *adev,
 		WREG32_SOC15_RLC(GC, GET_INST(GC, j), regMC_VM_MX_L1_TLB_CNTL, tmp);
 
 		/* Setup L2 cache */
-		tmp = RREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL);
-		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
-		WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL, tmp);
-		WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL3, 0);
+		if (!amdgpu_sriov_vf(adev)) {
+			tmp = RREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL);
+			tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
+			WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL, tmp);
+			WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL3, 0);
+		}
 	}
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index a5a05c16c10d..6c5185608854 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -1041,6 +1041,10 @@ static int gmc_v10_0_hw_fini(void *handle)
 
 	amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
 
+	if (adev->gmc.ecc_irq.funcs &&
+		amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
+		amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index 23d7b548d13f..c9c653cfc765 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -941,6 +941,11 @@ static int gmc_v11_0_hw_fini(void *handle)
 	}
 
 	amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
+
+	if (adev->gmc.ecc_irq.funcs &&
+		amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
+		amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
+
 	gmc_v11_0_gart_disable(adev);
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index 42e103d7077d..59d9215e5556 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -915,8 +915,8 @@ static int gmc_v6_0_hw_init(void *handle)
 
 	if (amdgpu_emu_mode == 1)
 		return amdgpu_gmc_vram_checking(adev);
-	else
-		return r;
+
+	return 0;
 }
 
 static int gmc_v6_0_hw_fini(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index efc16e580f1e..45a2f8e031a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -1099,8 +1099,8 @@ static int gmc_v7_0_hw_init(void *handle)
 
 	if (amdgpu_emu_mode == 1)
 		return amdgpu_gmc_vram_checking(adev);
-	else
-		return r;
+
+	return 0;
 }
 
 static int gmc_v7_0_hw_fini(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index ff4ae73d27ec..4422b27a3cc2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -1219,8 +1219,8 @@ static int gmc_v8_0_hw_init(void *handle)
 
 	if (amdgpu_emu_mode == 1)
 		return amdgpu_gmc_vram_checking(adev);
-	else
-		return r;
+
+	return 0;
 }
 
 static int gmc_v8_0_hw_fini(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 2ac5820e9c92..e67a62db9e12 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -883,7 +883,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	 * GRBM interface.
 	 */
 	if ((vmhub == AMDGPU_GFXHUB(0)) &&
-	    (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2)))
+	    (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 4, 2)))
 		RREG32_NO_KIQ(req);
 
 	for (j = 0; j < adev->usec_timeout; j++) {
@@ -1947,13 +1947,6 @@ static int gmc_v9_0_init_mem_ranges(struct amdgpu_device *adev)
 
 static void gmc_v9_4_3_init_vram_info(struct amdgpu_device *adev)
 {
-	static const u32 regBIF_BIOS_SCRATCH_4 = 0x50;
-	u32 vram_info;
-
-	if (!amdgpu_sriov_vf(adev)) {
-		vram_info = RREG32(regBIF_BIOS_SCRATCH_4);
-		adev->gmc.vram_vendor = vram_info & 0xF;
-	}
 	adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM;
 	adev->gmc.vram_width = 128 * 64;
 }
@@ -2340,8 +2333,8 @@ static int gmc_v9_0_hw_init(void *handle)
 
 	if (amdgpu_emu_mode == 1)
 		return amdgpu_gmc_vram_checking(adev);
-	else
-		return r;
+
+	return 0;
 }
 
 /**
@@ -2380,6 +2373,10 @@ static int gmc_v9_0_hw_fini(void *handle)
 
 	amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
 
+	if (adev->gmc.ecc_irq.funcs &&
+		amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
+		amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
index 49e934975719..4db6bb73ead4 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
@@ -129,6 +129,11 @@ static void hdp_v4_0_get_clockgating_state(struct amdgpu_device *adev,
 {
 	int data;
 
+	if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 2)) {
+		/* Default enabled */
+		*flags |= AMD_CG_SUPPORT_HDP_MGCG;
+		return;
+	}
 	/* AMD_CG_SUPPORT_HDP_LS */
 	data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS));
 	if (data & HDP_MEM_POWER_LS__LS_ENABLE_MASK)
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
index aecad530b10a..2c02ae69883d 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
@@ -215,6 +215,11 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev,
 	tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
 	WREG32(mmIH_RB_CNTL, tmp);
 
+	/* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+	 * can be detected.
+	 */
+	tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+	WREG32(mmIH_RB_CNTL, tmp);
 
 out:
 	return (wptr & ih->ptr_mask);
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
index d9ed7332d805..ad4ad39f128f 100644
--- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
@@ -418,6 +418,12 @@ static u32 ih_v6_0_get_wptr(struct amdgpu_device *adev,
 	tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
 	tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
 	WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
+	/* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+	 * can be detected.
+	 */
+	tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+	WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
 out:
 	return (wptr & ih->ptr_mask);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c
index 8fb05eae340a..b8da0fc29378 100644
--- a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c
@@ -418,6 +418,13 @@ static u32 ih_v6_1_get_wptr(struct amdgpu_device *adev,
 	tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
 	tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
 	WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
+	/* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+	 * can be detected.
+	 */
+	tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+	WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
 out:
 	return (wptr & ih->ptr_mask);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
index bc38b90f8cf8..88ea58d5c4ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
@@ -674,14 +674,6 @@ static int jpeg_v4_0_set_powergating_state(void *handle,
 	return ret;
 }
 
-static int jpeg_v4_0_set_interrupt_state(struct amdgpu_device *adev,
-					struct amdgpu_irq_src *source,
-					unsigned type,
-					enum amdgpu_interrupt_state state)
-{
-	return 0;
-}
-
 static int jpeg_v4_0_set_ras_interrupt_state(struct amdgpu_device *adev,
 					struct amdgpu_irq_src *source,
 					unsigned int type,
@@ -765,7 +757,6 @@ static void jpeg_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev)
 }
 
 static const struct amdgpu_irq_src_funcs jpeg_v4_0_irq_funcs = {
-	.set = jpeg_v4_0_set_interrupt_state,
 	.process = jpeg_v4_0_process_interrupt,
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
index 9df011323d4b..78b74daf4eeb 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
@@ -155,13 +155,6 @@ static int jpeg_v4_0_5_hw_init(void *handle)
 	struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
 	int r;
 
-	adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
-				(adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0);
-
-	WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL,
-		ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
-		VCN_JPEG_DB_CTRL__EN_MASK);
-
 	r = amdgpu_ring_test_helper(ring);
 	if (r)
 		return r;
@@ -188,7 +181,6 @@ static int jpeg_v4_0_5_hw_fini(void *handle)
 			RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS))
 			jpeg_v4_0_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
 	}
-	amdgpu_irq_put(adev, &adev->jpeg.inst->irq, 0);
 
 	return 0;
 }
@@ -336,6 +328,14 @@ static int jpeg_v4_0_5_start(struct amdgpu_device *adev)
 	if (adev->pm.dpm_enabled)
 		amdgpu_dpm_enable_jpeg(adev, true);
 
+	/* doorbell programming is done for every playback */
+	adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+				(adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0);
+
+	WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL,
+		ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
+		VCN_JPEG_DB_CTRL__EN_MASK);
+
 	/* disable power gating */
 	r = jpeg_v4_0_5_disable_static_power_gating(adev);
 	if (r)
@@ -515,14 +515,6 @@ static int jpeg_v4_0_5_set_powergating_state(void *handle,
 	return ret;
 }
 
-static int jpeg_v4_0_5_set_interrupt_state(struct amdgpu_device *adev,
-					struct amdgpu_irq_src *source,
-					unsigned type,
-					enum amdgpu_interrupt_state state)
-{
-	return 0;
-}
-
 static int jpeg_v4_0_5_process_interrupt(struct amdgpu_device *adev,
 				      struct amdgpu_irq_src *source,
 				      struct amdgpu_iv_entry *entry)
@@ -602,7 +594,6 @@ static void jpeg_v4_0_5_set_dec_ring_funcs(struct amdgpu_device *adev)
 }
 
 static const struct amdgpu_irq_src_funcs jpeg_v4_0_5_irq_funcs = {
-	.set = jpeg_v4_0_5_set_interrupt_state,
 	.process = jpeg_v4_0_5_process_interrupt,
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 4dfec56e1b7f..26d71a22395d 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -408,6 +408,8 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
 	mes_set_hw_res_pkt.enable_reg_active_poll = 1;
 	mes_set_hw_res_pkt.enable_level_process_quantum_check = 1;
 	mes_set_hw_res_pkt.oversubscription_timer = 50;
+	mes_set_hw_res_pkt.enable_mes_event_int_logging = 1;
+	mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr;
 
 	return mes_v11_0_submit_pkt_and_poll_completion(mes,
 			&mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index 843219a91736..e3ddd22aa172 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -96,7 +96,9 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
 	WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
 		     min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
 
-	if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+	if (adev->apu_flags & (AMD_APU_IS_RAVEN2 |
+			       AMD_APU_IS_RENOIR |
+			       AMD_APU_IS_GREEN_SARDINE))
 		/*
 		 * Raven2 has a HW issue that it is unable to use the vram which
 		 * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
index 9b0146732e13..fb53aacdcba2 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
@@ -652,8 +652,8 @@ static void mmhub_v1_8_inst_query_ras_error_count(struct amdgpu_device *adev,
 					AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
 					&ue_count);
 
-	amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count);
-	amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count);
+	amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, ce_count);
+	amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count);
 }
 
 static void mmhub_v1_8_query_ras_error_count(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
index e64b33115848..de93614726c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
@@ -442,6 +442,12 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev,
 	tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
 	tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
 	WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
+	/* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+	 * can be detected.
+	 */
+	tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+	WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
 out:
 	return (wptr & ih->ptr_mask);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c
index 676ab1d20d2f..1f52b4b1db03 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c
@@ -259,17 +259,17 @@ const struct nbio_hdp_flush_reg nbio_v7_11_hdp_flush_reg = {
 
 static void nbio_v7_11_init_registers(struct amdgpu_device *adev)
 {
-/*	uint32_t def, data;
+	uint32_t def, data;
+
+	def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3);
+	data = REG_SET_FIELD(data, BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3,
+				CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1);
+	data = REG_SET_FIELD(data, BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3,
+				CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1);
 
-		def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3);
-		data = REG_SET_FIELD(data, BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3,
-			CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1);
-		data = REG_SET_FIELD(data, BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3,
-			CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1);
+	if (def != data)
+		WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, data);
 
-		if (def != data)
-			WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, data);
-*/
 }
 
 static void nbio_v7_11_update_medium_grain_clock_gating(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index 6d24c84924cb..19986ff6a48d 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -401,8 +401,7 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device
 
 			if (err_data.ce_count)
 				dev_info(adev->dev, "%ld correctable hardware "
-						"errors detected in %s block, "
-						"no user action is needed.\n",
+						"errors detected in %s block\n",
 						obj->err_data.ce_count,
 						get_ras_block_str(adev->nbio.ras_if));
 
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
index 23f26f8caad4..b4723d68eab0 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
@@ -431,6 +431,12 @@ static void nbio_v7_9_init_registers(struct amdgpu_device *adev)
 	u32 inst_mask;
 	int i;
 
+	if (amdgpu_sriov_vf(adev))
+		adev->rmmio_remap.reg_offset =
+			SOC15_REG_OFFSET(
+				NBIO, 0,
+				regBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL)
+			<< 2;
 	WREG32_SOC15(NBIO, 0, regXCC_DOORBELL_FENCE,
 		0xff & ~(adev->gfx.xcc_mask));
 
@@ -597,8 +603,7 @@ static void nbio_v7_9_handle_ras_controller_intr_no_bifring(struct amdgpu_device
 
 			if (err_data.ce_count)
 				dev_info(adev->dev, "%ld correctable hardware "
-						"errors detected in %s block, "
-						"no user action is needed.\n",
+						"errors detected in %s block\n",
 						obj->err_data.ce_count,
 						get_ras_block_str(adev->nbio.ras_if));
 
@@ -611,11 +616,6 @@ static void nbio_v7_9_handle_ras_controller_intr_no_bifring(struct amdgpu_device
 
 		dev_info(adev->dev, "RAS controller interrupt triggered "
 					"by NBIF error\n");
-
-		/* ras_controller_int is dedicated for nbif ras error,
-		 * not the global interrupt for sync flood
-		 */
-		amdgpu_ras_reset_gpu(adev);
 	}
 
 	amdgpu_ras_error_data_fini(&err_data);
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index 3cf4684d0d3f..df1844d0800f 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -60,7 +60,7 @@ MODULE_FIRMWARE("amdgpu/psp_14_0_0_ta.bin");
 #define GFX_CMD_USB_PD_USE_LFB 0x480
 
 /* Retry times for vmbx ready wait */
-#define PSP_VMBX_POLLING_LIMIT 20000
+#define PSP_VMBX_POLLING_LIMIT 3000
 
 /* VBIOS gfl defines */
 #define MBOX_READY_MASK 0x80000000
@@ -161,14 +161,18 @@ static int psp_v13_0_wait_for_vmbx_ready(struct psp_context *psp)
 static int psp_v13_0_wait_for_bootloader(struct psp_context *psp)
 {
 	struct amdgpu_device *adev = psp->adev;
-	int retry_loop, ret;
+	int retry_loop, retry_cnt, ret;
 
+	retry_cnt =
+		(amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6)) ?
+			PSP_VMBX_POLLING_LIMIT :
+			10;
 	/* Wait for bootloader to signify that it is ready having bit 31 of
 	 * C2PMSG_35 set to 1. All other bits are expected to be cleared.
 	 * If there is an error in processing command, bits[7:0] will be set.
 	 * This is applicable for PSP v13.0.6 and newer.
 	 */
-	for (retry_loop = 0; retry_loop < PSP_VMBX_POLLING_LIMIT; retry_loop++) {
+	for (retry_loop = 0; retry_loop < retry_cnt; retry_loop++) {
 		ret = psp_wait_for(
 			psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
 			0x80000000, 0xffffffff, false);
@@ -821,7 +825,7 @@ static int psp_v13_0_query_boot_status(struct psp_context *psp)
 	if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6))
 		return 0;
 
-	if (RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_59) < 0x00a10007)
+	if (RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_59) < 0x00a10109)
 		return 0;
 
 	for_each_inst(i, inst_mask) {
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index 45377a175250..8d5d86675a7f 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -813,12 +813,12 @@ static int sdma_v2_4_early_init(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	int r;
 
+	adev->sdma.num_instances = SDMA_MAX_INSTANCE;
+
 	r = sdma_v2_4_init_microcode(adev);
 	if (r)
 		return r;
 
-	adev->sdma.num_instances = SDMA_MAX_INSTANCE;
-
 	sdma_v2_4_set_ring_funcs(adev);
 	sdma_v2_4_set_buffer_funcs(adev);
 	sdma_v2_4_set_vm_pte_funcs(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index 0f24af6f2810..2d688dca26be 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -2156,7 +2156,7 @@ static void sdma_v4_4_2_inst_query_ras_error_count(struct amdgpu_device *adev,
 					AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
 					&ue_count);
 
-	amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count);
+	amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count);
 }
 
 static void sdma_v4_4_2_query_ras_error_count(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index 83c240f741b5..0058f3f7cf6e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -1643,6 +1643,32 @@ static void sdma_v5_2_get_clockgating_state(void *handle, u64 *flags)
 		*flags |= AMD_CG_SUPPORT_SDMA_LS;
 }
 
+static void sdma_v5_2_ring_begin_use(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	/* SDMA 5.2.3 (RMB) FW doesn't seem to properly
+	 * disallow GFXOFF in some cases leading to
+	 * hangs in SDMA.  Disallow GFXOFF while SDMA is active.
+	 * We can probably just limit this to 5.2.3,
+	 * but it shouldn't hurt for other parts since
+	 * this GFXOFF will be disallowed anyway when SDMA is
+	 * active, this just makes it explicit.
+	 */
+	amdgpu_gfx_off_ctrl(adev, false);
+}
+
+static void sdma_v5_2_ring_end_use(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	/* SDMA 5.2.3 (RMB) FW doesn't seem to properly
+	 * disallow GFXOFF in some cases leading to
+	 * hangs in SDMA.  Allow GFXOFF when SDMA is complete.
+	 */
+	amdgpu_gfx_off_ctrl(adev, true);
+}
+
 const struct amd_ip_funcs sdma_v5_2_ip_funcs = {
 	.name = "sdma_v5_2",
 	.early_init = sdma_v5_2_early_init,
@@ -1690,6 +1716,8 @@ static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = {
 	.test_ib = sdma_v5_2_ring_test_ib,
 	.insert_nop = sdma_v5_2_ring_insert_nop,
 	.pad_ib = sdma_v5_2_ring_pad_ib,
+	.begin_use = sdma_v5_2_ring_begin_use,
+	.end_use = sdma_v5_2_ring_end_use,
 	.emit_wreg = sdma_v5_2_ring_emit_wreg,
 	.emit_reg_wait = sdma_v5_2_ring_emit_reg_wait,
 	.emit_reg_write_reg_wait = sdma_v5_2_ring_emit_reg_write_reg_wait,
diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c
index 9a24f17a5750..cada9f300a7f 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c
@@ -119,6 +119,12 @@ static u32 si_ih_get_wptr(struct amdgpu_device *adev,
 		tmp = RREG32(IH_RB_CNTL);
 		tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
 		WREG32(IH_RB_CNTL, tmp);
+
+		/* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+		 * can be detected.
+		 */
+		tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
+		WREG32(IH_RB_CNTL, tmp);
 	}
 	return (wptr & ih->ptr_mask);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index d4b8d62f4294..c64c01e2944a 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -902,6 +902,7 @@ static const struct amdgpu_asic_funcs aqua_vanjaram_asic_funcs =
 	.pre_asic_init = &soc15_pre_asic_init,
 	.query_video_codecs = &soc15_query_video_codecs,
 	.encode_ext_smn_addressing = &aqua_vanjaram_encode_ext_smn_addressing,
+	.get_reg_state = &aqua_vanjaram_get_reg_state,
 };
 
 static int soc15_common_early_init(void *handle)
@@ -1161,6 +1162,11 @@ static int soc15_common_early_init(void *handle)
 			AMD_PG_SUPPORT_VCN_DPG |
 			AMD_PG_SUPPORT_JPEG;
 		adev->external_rev_id = adev->rev_id + 0x46;
+		/* GC 9.4.3 uses MMIO register region hole at a different offset */
+		if (!amdgpu_sriov_vf(adev)) {
+			adev->rmmio_remap.reg_offset = 0x1A000;
+			adev->rmmio_remap.bus_addr = adev->rmmio_base + 0x1A000;
+		}
 		break;
 	default:
 		/* FIXME: not supported yet */
@@ -1292,10 +1298,32 @@ static int soc15_common_suspend(void *handle)
 	return soc15_common_hw_fini(adev);
 }
 
+static bool soc15_need_reset_on_resume(struct amdgpu_device *adev)
+{
+	u32 sol_reg;
+
+	sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
+
+	/* Will reset for the following suspend abort cases.
+	 * 1) Only reset limit on APU side, dGPU hasn't checked yet.
+	 * 2) S3 suspend abort and TOS already launched.
+	 */
+	if (adev->flags & AMD_IS_APU && adev->in_s3 &&
+			!adev->suspend_complete &&
+			sol_reg)
+		return true;
+
+	return false;
+}
+
 static int soc15_common_resume(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	if (soc15_need_reset_on_resume(adev)) {
+		dev_info(adev->dev, "S3 suspend abort case, let's reset ASIC.\n");
+		soc15_asic_reset(adev);
+	}
 	return soc15_common_hw_init(adev);
 }
 
@@ -1418,11 +1446,14 @@ static void soc15_common_get_clockgating_state(void *handle, u64 *flags)
 	if (amdgpu_sriov_vf(adev))
 		*flags = 0;
 
-	adev->nbio.funcs->get_clockgating_state(adev, flags);
+	if (adev->nbio.funcs && adev->nbio.funcs->get_clockgating_state)
+		adev->nbio.funcs->get_clockgating_state(adev, flags);
 
-	adev->hdp.funcs->get_clock_gating_state(adev, flags);
+	if (adev->hdp.funcs && adev->hdp.funcs->get_clock_gating_state)
+		adev->hdp.funcs->get_clock_gating_state(adev, flags);
 
-	if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 2)) {
+	if ((amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 2)) &&
+	    (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6))) {
 		/* AMD_CG_SUPPORT_DRM_MGCG */
 		data = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_CGTT_CTRL0));
 		if (!(data & 0x01000000))
@@ -1435,9 +1466,11 @@ static void soc15_common_get_clockgating_state(void *handle, u64 *flags)
 	}
 
 	/* AMD_CG_SUPPORT_ROM_MGCG */
-	adev->smuio.funcs->get_clock_gating_state(adev, flags);
+	if (adev->smuio.funcs && adev->smuio.funcs->get_clock_gating_state)
+		adev->smuio.funcs->get_clock_gating_state(adev, flags);
 
-	adev->df.funcs->get_clockgating_state(adev, flags);
+	if (adev->df.funcs && adev->df.funcs->get_clockgating_state)
+		adev->df.funcs->get_clockgating_state(adev, flags);
 }
 
 static int soc15_common_set_powergating_state(void *handle,
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h
index eac54042c6c0..1444b7765e4b 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.h
@@ -27,6 +27,7 @@
 #include "nbio_v6_1.h"
 #include "nbio_v7_0.h"
 #include "nbio_v7_4.h"
+#include "amdgpu_reg_state.h"
 
 extern const struct amdgpu_ip_block_version vega10_common_ip_block;
 
@@ -114,6 +115,9 @@ int aldebaran_reg_base_init(struct amdgpu_device *adev);
 void aqua_vanjaram_ip_map_init(struct amdgpu_device *adev);
 u64 aqua_vanjaram_encode_ext_smn_addressing(int ext_id);
 int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev);
+ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev,
+				    enum amdgpu_reg_state reg_state, void *buf,
+				    size_t max_size);
 
 void vega10_doorbell_index_init(struct amdgpu_device *adev);
 void vega20_doorbell_index_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c
index 48c6efcdeac9..4d7188912edf 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -50,13 +50,13 @@ static const struct amd_ip_funcs soc21_common_ip_funcs;
 /* SOC21 */
 static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_vcn0[] = {
 	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
-	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)},
 	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
 };
 
 static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_vcn1[] = {
 	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
-	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)},
 };
 
 static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_encode_vcn0 = {
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
index 917707bba7f3..450b6e831509 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
@@ -219,6 +219,12 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev,
 	tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
 	WREG32(mmIH_RB_CNTL, tmp);
 
+	/* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+	 * can be detected.
+	 */
+	tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+	WREG32(mmIH_RB_CNTL, tmp);
+
 out:
 	return (wptr & ih->ptr_mask);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
index e9c2ff74f0bc..7458a218e89d 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
@@ -26,6 +26,7 @@
 #include "amdgpu.h"
 #include "umc/umc_12_0_0_offset.h"
 #include "umc/umc_12_0_0_sh_mask.h"
+#include "mp/mp_13_0_6_sh_mask.h"
 
 const uint32_t
 	umc_v12_0_channel_idx_tbl[]
@@ -88,16 +89,26 @@ static void umc_v12_0_reset_error_count(struct amdgpu_device *adev)
 		umc_v12_0_reset_error_count_per_channel, NULL);
 }
 
-bool umc_v12_0_is_uncorrectable_error(uint64_t mc_umc_status)
+bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status)
 {
+	if (amdgpu_ras_is_poison_mode_supported(adev) &&
+	    (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
+	    (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1))
+		return true;
+
 	return ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
 		(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
 		REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
 		REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1));
 }
 
-bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status)
+bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status)
 {
+	if (amdgpu_ras_is_poison_mode_supported(adev) &&
+	    (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
+	    (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1))
+		return false;
+
 	return (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
 		(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1 ||
 		(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 &&
@@ -105,7 +116,7 @@ bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status)
 		/* Identify data parity error in replay mode */
 		((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 0x5 ||
 		REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 0xb) &&
-		!(umc_v12_0_is_uncorrectable_error(mc_umc_status)))));
+		!(umc_v12_0_is_uncorrectable_error(adev, mc_umc_status)))));
 }
 
 static void umc_v12_0_query_correctable_error_count(struct amdgpu_device *adev,
@@ -124,7 +135,7 @@ static void umc_v12_0_query_correctable_error_count(struct amdgpu_device *adev,
 	mc_umc_status =
 		RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4);
 
-	if (umc_v12_0_is_correctable_error(mc_umc_status))
+	if (umc_v12_0_is_correctable_error(adev, mc_umc_status))
 		*error_count += 1;
 }
 
@@ -142,7 +153,7 @@ static void umc_v12_0_query_uncorrectable_error_count(struct amdgpu_device *adev
 	mc_umc_status =
 		RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4);
 
-	if (umc_v12_0_is_uncorrectable_error(mc_umc_status))
+	if (umc_v12_0_is_uncorrectable_error(adev, mc_umc_status))
 		*error_count += 1;
 }
 
@@ -166,8 +177,8 @@ static int umc_v12_0_query_error_count(struct amdgpu_device *adev,
 	umc_v12_0_query_correctable_error_count(adev, umc_reg_offset, &ce_count);
 	umc_v12_0_query_uncorrectable_error_count(adev, umc_reg_offset, &ue_count);
 
-	amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count);
-	amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count);
+	amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count);
+	amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, ce_count);
 
 	return 0;
 }
@@ -360,6 +371,59 @@ static int umc_v12_0_err_cnt_init_per_channel(struct amdgpu_device *adev,
 	return 0;
 }
 
+static void umc_v12_0_ecc_info_query_ras_error_count(struct amdgpu_device *adev,
+					void *ras_error_status)
+{
+	amdgpu_mca_smu_log_ras_error(adev,
+		AMDGPU_RAS_BLOCK__UMC, AMDGPU_MCA_ERROR_TYPE_CE, ras_error_status);
+	amdgpu_mca_smu_log_ras_error(adev,
+		AMDGPU_RAS_BLOCK__UMC, AMDGPU_MCA_ERROR_TYPE_UE, ras_error_status);
+}
+
+static void umc_v12_0_ecc_info_query_ras_error_address(struct amdgpu_device *adev,
+					void *ras_error_status)
+{
+	struct ras_err_node *err_node;
+	uint64_t mc_umc_status;
+	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+	for_each_ras_error(err_node, err_data) {
+		mc_umc_status = err_node->err_info.err_addr.err_status;
+		if (!mc_umc_status)
+			continue;
+
+		if (umc_v12_0_is_uncorrectable_error(adev, mc_umc_status)) {
+			uint64_t mca_addr, err_addr, mca_ipid;
+			uint32_t InstanceIdLo;
+			struct amdgpu_smuio_mcm_config_info *mcm_info;
+
+			mcm_info = &err_node->err_info.mcm_info;
+			mca_addr = err_node->err_info.err_addr.err_addr;
+			mca_ipid = err_node->err_info.err_addr.err_ipid;
+
+			err_addr =  REG_GET_FIELD(mca_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+			InstanceIdLo = REG_GET_FIELD(mca_ipid, MCMP1_IPIDT0, InstanceIdLo);
+
+			dev_info(adev->dev, "UMC:IPID:0x%llx, aid:%d, inst:%d, ch:%d, err_addr:0x%llx\n",
+				mca_ipid,
+				mcm_info->die_id,
+				MCA_IPID_LO_2_UMC_INST(InstanceIdLo),
+				MCA_IPID_LO_2_UMC_CH(InstanceIdLo),
+				err_addr);
+
+			umc_v12_0_convert_error_address(adev,
+				err_data, err_addr,
+				MCA_IPID_LO_2_UMC_CH(InstanceIdLo),
+				MCA_IPID_LO_2_UMC_INST(InstanceIdLo),
+				mcm_info->die_id);
+
+			/* Clear umc error address content */
+			memset(&err_node->err_info.err_addr,
+				0, sizeof(err_node->err_info.err_addr));
+		}
+	}
+}
+
 static void umc_v12_0_err_cnt_init(struct amdgpu_device *adev)
 {
 	amdgpu_umc_loop_channels(adev,
@@ -386,4 +450,6 @@ struct amdgpu_umc_ras umc_v12_0_ras = {
 	},
 	.err_cnt_init = umc_v12_0_err_cnt_init,
 	.query_ras_poison_mode = umc_v12_0_query_ras_poison_mode,
+	.ecc_info_query_ras_error_count = umc_v12_0_ecc_info_query_ras_error_count,
+	.ecc_info_query_ras_error_address = umc_v12_0_ecc_info_query_ras_error_address,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
index b34b1e358f8b..e8de3a92251a 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
@@ -117,8 +117,12 @@
 		(pa) |= (UMC_V12_0_CHANNEL_HASH_CH6(channel_idx, pa) << UMC_V12_0_PA_CH6_BIT); \
 	} while (0)
 
-bool umc_v12_0_is_uncorrectable_error(uint64_t mc_umc_status);
-bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status);
+#define MCA_IPID_LO_2_UMC_CH(_ipid_lo) (((((_ipid_lo) >> 20) & 0x1) * 4) + \
+			(((_ipid_lo) >> 12) & 0xF))
+#define MCA_IPID_LO_2_UMC_INST(_ipid_lo) (((_ipid_lo) >> 21) & 0x7)
+
+bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status);
+bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status);
 
 extern const uint32_t
 	umc_v12_0_channel_idx_tbl[]
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
index 530549314ce4..a3ee3c4c650f 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
@@ -64,7 +64,7 @@ static void umc_v6_7_query_error_status_helper(struct amdgpu_device *adev,
 	uint64_t reg_value;
 
 	if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1)
-		dev_info(adev->dev, "Deferred error, no user action is needed.\n");
+		dev_info(adev->dev, "Deferred error\n");
 
 	if (mc_umc_status)
 		dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n", mc_umc_status, umc_reg_offset);
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index 48bfcd0d558b..8ab01ae919d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -100,6 +100,31 @@ static int vcn_v4_0_early_init(void *handle)
 	return amdgpu_vcn_early_init(adev);
 }
 
+static int vcn_v4_0_fw_shared_init(struct amdgpu_device *adev, int inst_idx)
+{
+	volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+
+	fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+	fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
+	fw_shared->sq.is_enabled = 1;
+
+	fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG);
+	fw_shared->smu_dpm_interface.smu_interface_type = (adev->flags & AMD_IS_APU) ?
+		AMDGPU_VCN_SMU_DPM_INTERFACE_APU : AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU;
+
+	if (amdgpu_ip_version(adev, VCN_HWIP, 0) ==
+	    IP_VERSION(4, 0, 2)) {
+		fw_shared->present_flag_0 |= AMDGPU_FW_SHARED_FLAG_0_DRM_KEY_INJECT;
+		fw_shared->drm_key_wa.method =
+			AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING;
+	}
+
+	if (amdgpu_vcnfw_log)
+		amdgpu_vcn_fwlog_init(&adev->vcn.inst[inst_idx]);
+
+	return 0;
+}
+
 /**
  * vcn_v4_0_sw_init - sw init for VCN block
  *
@@ -124,8 +149,6 @@ static int vcn_v4_0_sw_init(void *handle)
 		return r;
 
 	for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
-		volatile struct amdgpu_vcn4_fw_shared *fw_shared;
-
 		if (adev->vcn.harvest_config & (1 << i))
 			continue;
 
@@ -161,23 +184,7 @@ static int vcn_v4_0_sw_init(void *handle)
 		if (r)
 			return r;
 
-		fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
-		fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
-		fw_shared->sq.is_enabled = 1;
-
-		fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG);
-		fw_shared->smu_dpm_interface.smu_interface_type = (adev->flags & AMD_IS_APU) ?
-			AMDGPU_VCN_SMU_DPM_INTERFACE_APU : AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU;
-
-		if (amdgpu_ip_version(adev, VCN_HWIP, 0) ==
-		    IP_VERSION(4, 0, 2)) {
-			fw_shared->present_flag_0 |= AMDGPU_FW_SHARED_FLAG_0_DRM_KEY_INJECT;
-			fw_shared->drm_key_wa.method =
-				AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING;
-		}
-
-		if (amdgpu_vcnfw_log)
-			amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
+		vcn_v4_0_fw_shared_init(adev, i);
 	}
 
 	if (amdgpu_sriov_vf(adev)) {
@@ -1273,6 +1280,9 @@ static int vcn_v4_0_start_sriov(struct amdgpu_device *adev)
 		if (adev->vcn.harvest_config & (1 << i))
 			continue;
 
+		// Must re/init fw_shared at beginning
+		vcn_v4_0_fw_shared_init(adev, i);
+
 		table_size = 0;
 
 		MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, i,
@@ -2008,22 +2018,6 @@ static int vcn_v4_0_set_powergating_state(void *handle, enum amd_powergating_sta
 }
 
 /**
- * vcn_v4_0_set_interrupt_state - set VCN block interrupt state
- *
- * @adev: amdgpu_device pointer
- * @source: interrupt sources
- * @type: interrupt types
- * @state: interrupt states
- *
- * Set VCN block interrupt state
- */
-static int vcn_v4_0_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source,
-      unsigned type, enum amdgpu_interrupt_state state)
-{
-	return 0;
-}
-
-/**
  * vcn_v4_0_set_ras_interrupt_state - set VCN block RAS interrupt state
  *
  * @adev: amdgpu_device pointer
@@ -2087,7 +2081,6 @@ static int vcn_v4_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_
 }
 
 static const struct amdgpu_irq_src_funcs vcn_v4_0_irq_funcs = {
-	.set = vcn_v4_0_set_interrupt_state,
 	.process = vcn_v4_0_process_interrupt,
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
index 2eda30e78f61..49e4c3c09aca 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
@@ -269,8 +269,6 @@ static int vcn_v4_0_5_hw_fini(void *handle)
 				vcn_v4_0_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
 			}
 		}
-
-		amdgpu_irq_put(adev, &adev->vcn.inst[i].irq, 0);
 	}
 
 	return 0;
@@ -1669,22 +1667,6 @@ static int vcn_v4_0_5_set_powergating_state(void *handle, enum amd_powergating_s
 }
 
 /**
- * vcn_v4_0_5_set_interrupt_state - set VCN block interrupt state
- *
- * @adev: amdgpu_device pointer
- * @source: interrupt sources
- * @type: interrupt types
- * @state: interrupt states
- *
- * Set VCN block interrupt state
- */
-static int vcn_v4_0_5_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source,
-		unsigned type, enum amdgpu_interrupt_state state)
-{
-	return 0;
-}
-
-/**
  * vcn_v4_0_5_process_interrupt - process VCN block interrupt
  *
  * @adev: amdgpu_device pointer
@@ -1726,7 +1708,6 @@ static int vcn_v4_0_5_process_interrupt(struct amdgpu_device *adev, struct amdgp
 }
 
 static const struct amdgpu_irq_src_funcs vcn_v4_0_5_irq_funcs = {
-	.set = vcn_v4_0_5_set_interrupt_state,
 	.process = vcn_v4_0_5_process_interrupt,
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index d364c6dd152c..bf68e18e3824 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -373,6 +373,12 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev,
 	tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
 	WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
 
+	/* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+	 * can be detected.
+	 */
+	tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+	WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
 out:
 	return (wptr & ih->ptr_mask);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
index ddfc6941f9d5..db66e6cccaf2 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
@@ -421,6 +421,12 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev,
 	tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
 	WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
 
+	/* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+	 * can be detected.
+	 */
+	tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+	WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
 out:
 	return (wptr & ih->ptr_mask);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c
index 174f13eff575..d20060a51e05 100644
--- a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c
@@ -96,6 +96,10 @@ static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe)
 		adev->vpe.cmdbuf_cpu_addr[1] = f32_cntl;
 
 		amdgpu_vpe_psp_update_sram(adev);
+
+		/* Config DPM */
+		amdgpu_vpe_configure_dpm(vpe);
+
 		return 0;
 	}
 
@@ -128,6 +132,8 @@ static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe)
 	}
 
 	vpe_v6_1_halt(vpe, false);
+	/* Config DPM */
+	amdgpu_vpe_configure_dpm(vpe);
 
 	return 0;
 }
@@ -264,6 +270,15 @@ static int vpe_v6_1_set_regs(struct amdgpu_vpe *vpe)
 	vpe->regs.queue0_rb_wptr_hi = regVPEC_QUEUE0_RB_WPTR_HI;
 	vpe->regs.queue0_preempt = regVPEC_QUEUE0_PREEMPT;
 
+	vpe->regs.dpm_enable = regVPEC_PUB_DUMMY2;
+	vpe->regs.dpm_pratio = regVPEC_QUEUE6_DUMMY4;
+	vpe->regs.dpm_request_interval = regVPEC_QUEUE5_DUMMY3;
+	vpe->regs.dpm_decision_threshold = regVPEC_QUEUE5_DUMMY4;
+	vpe->regs.dpm_busy_clamp_threshold = regVPEC_QUEUE7_DUMMY2;
+	vpe->regs.dpm_idle_clamp_threshold = regVPEC_QUEUE7_DUMMY3;
+	vpe->regs.dpm_request_lv = regVPEC_QUEUE7_DUMMY1;
+	vpe->regs.context_indicator = regVPEC_QUEUE6_DUMMY3;
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index d7cd5fa313ff..d1caaf0e6a7c 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -674,7 +674,7 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
 	0x86ea6a6a, 0x8f6e837a,
 	0xb96ee0c2, 0xbf800002,
 	0xb97a0002, 0xbf8a0000,
-	0xbe801f6c, 0xbf810000,
+	0xbe801f6c, 0xbf9b0000,
 };
 
 static const uint32_t cwsr_trap_nv1x_hex[] = {
@@ -1091,7 +1091,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
 	0xb9eef807, 0x876dff6d,
 	0x0000ffff, 0x87fe7e7e,
 	0x87ea6a6a, 0xb9faf802,
-	0xbe80226c, 0xbf810000,
+	0xbe80226c, 0xbf9b0000,
 	0xbf9f0000, 0xbf9f0000,
 	0xbf9f0000, 0xbf9f0000,
 	0xbf9f0000, 0x00000000,
@@ -1574,7 +1574,7 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
 	0x86ea6a6a, 0x8f6e837a,
 	0xb96ee0c2, 0xbf800002,
 	0xb97a0002, 0xbf8a0000,
-	0xbe801f6c, 0xbf810000,
+	0xbe801f6c, 0xbf9b0000,
 };
 
 static const uint32_t cwsr_trap_aldebaran_hex[] = {
@@ -2065,11 +2065,11 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = {
 	0x86ea6a6a, 0x8f6e837a,
 	0xb96ee0c2, 0xbf800002,
 	0xb97a0002, 0xbf8a0000,
-	0xbe801f6c, 0xbf810000,
+	0xbe801f6c, 0xbf9b0000,
 };
 
 static const uint32_t cwsr_trap_gfx10_hex[] = {
-	0xbf820001, 0xbf820220,
+	0xbf820001, 0xbf820221,
 	0xb0804004, 0xb978f802,
 	0x8a78ff78, 0x00020006,
 	0xb97bf803, 0x876eff78,
@@ -2118,391 +2118,391 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
 	0xbf900004, 0xbf8cc07f,
 	0x877aff7f, 0x04000000,
 	0x8f7a857a, 0x886d7a6d,
-	0xbefa037e, 0x877bff7f,
-	0x0000ffff, 0xbefe03c1,
-	0xbeff03c1, 0xdc5f8000,
-	0x007a0000, 0x7e000280,
-	0xbefe037a, 0xbeff037b,
-	0xb97b02dc, 0x8f7b997b,
-	0xb97a3a05, 0x807a817a,
-	0xbf0d997b, 0xbf850002,
-	0x8f7a897a, 0xbf820001,
-	0x8f7a8a7a, 0xb97b1e06,
-	0x8f7b8a7b, 0x807a7b7a,
+	0x7e008200, 0xbefa037e,
 	0x877bff7f, 0x0000ffff,
-	0x807aff7a, 0x00000200,
-	0x807a7e7a, 0x827b807b,
-	0xd7610000, 0x00010870,
-	0xd7610000, 0x00010a71,
-	0xd7610000, 0x00010c72,
-	0xd7610000, 0x00010e73,
-	0xd7610000, 0x00011074,
-	0xd7610000, 0x00011275,
-	0xd7610000, 0x00011476,
-	0xd7610000, 0x00011677,
-	0xd7610000, 0x00011a79,
-	0xd7610000, 0x00011c7e,
-	0xd7610000, 0x00011e7f,
-	0xbefe03ff, 0x00003fff,
-	0xbeff0380, 0xdc5f8040,
-	0x007a0000, 0xd760007a,
-	0x00011d00, 0xd760007b,
-	0x00011f00, 0xbefe037a,
-	0xbeff037b, 0xbef4037e,
-	0x8775ff7f, 0x0000ffff,
-	0x8875ff75, 0x00040000,
-	0xbef60380, 0xbef703ff,
-	0x10807fac, 0xbef1037c,
-	0xbef00380, 0xb97302dc,
-	0x8f739973, 0xbefe03c1,
-	0x907c9973, 0x877c817c,
-	0xbf06817c, 0xbf850002,
-	0xbeff0380, 0xbf820002,
-	0xbeff03c1, 0xbf820009,
+	0xbefe03c1, 0xbeff03c1,
+	0xdc5f8000, 0x007a0000,
+	0x7e000280, 0xbefe037a,
+	0xbeff037b, 0xb97b02dc,
+	0x8f7b997b, 0xb97a3a05,
+	0x807a817a, 0xbf0d997b,
+	0xbf850002, 0x8f7a897a,
+	0xbf820001, 0x8f7a8a7a,
+	0xb97b1e06, 0x8f7b8a7b,
+	0x807a7b7a, 0x877bff7f,
+	0x0000ffff, 0x807aff7a,
+	0x00000200, 0x807a7e7a,
+	0x827b807b, 0xd7610000,
+	0x00010870, 0xd7610000,
+	0x00010a71, 0xd7610000,
+	0x00010c72, 0xd7610000,
+	0x00010e73, 0xd7610000,
+	0x00011074, 0xd7610000,
+	0x00011275, 0xd7610000,
+	0x00011476, 0xd7610000,
+	0x00011677, 0xd7610000,
+	0x00011a79, 0xd7610000,
+	0x00011c7e, 0xd7610000,
+	0x00011e7f, 0xbefe03ff,
+	0x00003fff, 0xbeff0380,
+	0xdc5f8040, 0x007a0000,
+	0xd760007a, 0x00011d00,
+	0xd760007b, 0x00011f00,
+	0xbefe037a, 0xbeff037b,
+	0xbef4037e, 0x8775ff7f,
+	0x0000ffff, 0x8875ff75,
+	0x00040000, 0xbef60380,
+	0xbef703ff, 0x10807fac,
+	0xbef1037c, 0xbef00380,
+	0xb97302dc, 0x8f739973,
+	0xbefe03c1, 0x907c9973,
+	0x877c817c, 0xbf06817c,
+	0xbf850002, 0xbeff0380,
+	0xbf820002, 0xbeff03c1,
+	0xbf820009, 0xbef603ff,
+	0x01000000, 0xe0704080,
+	0x705d0100, 0xe0704100,
+	0x705d0200, 0xe0704180,
+	0x705d0300, 0xbf820008,
 	0xbef603ff, 0x01000000,
-	0xe0704080, 0x705d0100,
-	0xe0704100, 0x705d0200,
-	0xe0704180, 0x705d0300,
-	0xbf820008, 0xbef603ff,
-	0x01000000, 0xe0704100,
-	0x705d0100, 0xe0704200,
-	0x705d0200, 0xe0704300,
-	0x705d0300, 0xb9703a05,
-	0x80708170, 0xbf0d9973,
-	0xbf850002, 0x8f708970,
-	0xbf820001, 0x8f708a70,
-	0xb97a1e06, 0x8f7a8a7a,
-	0x80707a70, 0x8070ff70,
-	0x00000200, 0xbef603ff,
-	0x01000000, 0x7e000280,
-	0x7e020280, 0x7e040280,
-	0xbefc0380, 0xd7610002,
-	0x0000f871, 0x807c817c,
-	0xd7610002, 0x0000f86c,
-	0x807c817c, 0x8a7aff6d,
-	0x80000000, 0xd7610002,
-	0x0000f87a, 0x807c817c,
-	0xd7610002, 0x0000f86e,
-	0x807c817c, 0xd7610002,
-	0x0000f86f, 0x807c817c,
-	0xd7610002, 0x0000f878,
-	0x807c817c, 0xb97af803,
-	0xd7610002, 0x0000f87a,
-	0x807c817c, 0xd7610002,
-	0x0000f87b, 0x807c817c,
-	0xb971f801, 0xd7610002,
-	0x0000f871, 0x807c817c,
-	0xb971f814, 0xd7610002,
-	0x0000f871, 0x807c817c,
-	0xb971f815, 0xd7610002,
-	0x0000f871, 0x807c817c,
-	0xbefe03ff, 0x0000ffff,
-	0xbeff0380, 0xe0704000,
-	0x705d0200, 0xbefe03c1,
+	0xe0704100, 0x705d0100,
+	0xe0704200, 0x705d0200,
+	0xe0704300, 0x705d0300,
 	0xb9703a05, 0x80708170,
 	0xbf0d9973, 0xbf850002,
 	0x8f708970, 0xbf820001,
 	0x8f708a70, 0xb97a1e06,
 	0x8f7a8a7a, 0x80707a70,
+	0x8070ff70, 0x00000200,
 	0xbef603ff, 0x01000000,
-	0xbef90380, 0xbefc0380,
-	0xbf800000, 0xbe802f00,
-	0xbe822f02, 0xbe842f04,
-	0xbe862f06, 0xbe882f08,
-	0xbe8a2f0a, 0xbe8c2f0c,
-	0xbe8e2f0e, 0xd7610002,
-	0x0000f200, 0x80798179,
-	0xd7610002, 0x0000f201,
+	0x7e000280, 0x7e020280,
+	0x7e040280, 0xbefc0380,
+	0xd7610002, 0x0000f871,
+	0x807c817c, 0xd7610002,
+	0x0000f86c, 0x807c817c,
+	0x8a7aff6d, 0x80000000,
+	0xd7610002, 0x0000f87a,
+	0x807c817c, 0xd7610002,
+	0x0000f86e, 0x807c817c,
+	0xd7610002, 0x0000f86f,
+	0x807c817c, 0xd7610002,
+	0x0000f878, 0x807c817c,
+	0xb97af803, 0xd7610002,
+	0x0000f87a, 0x807c817c,
+	0xd7610002, 0x0000f87b,
+	0x807c817c, 0xb971f801,
+	0xd7610002, 0x0000f871,
+	0x807c817c, 0xb971f814,
+	0xd7610002, 0x0000f871,
+	0x807c817c, 0xb971f815,
+	0xd7610002, 0x0000f871,
+	0x807c817c, 0xbefe03ff,
+	0x0000ffff, 0xbeff0380,
+	0xe0704000, 0x705d0200,
+	0xbefe03c1, 0xb9703a05,
+	0x80708170, 0xbf0d9973,
+	0xbf850002, 0x8f708970,
+	0xbf820001, 0x8f708a70,
+	0xb97a1e06, 0x8f7a8a7a,
+	0x80707a70, 0xbef603ff,
+	0x01000000, 0xbef90380,
+	0xbefc0380, 0xbf800000,
+	0xbe802f00, 0xbe822f02,
+	0xbe842f04, 0xbe862f06,
+	0xbe882f08, 0xbe8a2f0a,
+	0xbe8c2f0c, 0xbe8e2f0e,
+	0xd7610002, 0x0000f200,
 	0x80798179, 0xd7610002,
-	0x0000f202, 0x80798179,
-	0xd7610002, 0x0000f203,
+	0x0000f201, 0x80798179,
+	0xd7610002, 0x0000f202,
 	0x80798179, 0xd7610002,
-	0x0000f204, 0x80798179,
-	0xd7610002, 0x0000f205,
+	0x0000f203, 0x80798179,
+	0xd7610002, 0x0000f204,
 	0x80798179, 0xd7610002,
-	0x0000f206, 0x80798179,
-	0xd7610002, 0x0000f207,
+	0x0000f205, 0x80798179,
+	0xd7610002, 0x0000f206,
 	0x80798179, 0xd7610002,
-	0x0000f208, 0x80798179,
-	0xd7610002, 0x0000f209,
+	0x0000f207, 0x80798179,
+	0xd7610002, 0x0000f208,
 	0x80798179, 0xd7610002,
-	0x0000f20a, 0x80798179,
-	0xd7610002, 0x0000f20b,
+	0x0000f209, 0x80798179,
+	0xd7610002, 0x0000f20a,
 	0x80798179, 0xd7610002,
-	0x0000f20c, 0x80798179,
-	0xd7610002, 0x0000f20d,
+	0x0000f20b, 0x80798179,
+	0xd7610002, 0x0000f20c,
 	0x80798179, 0xd7610002,
-	0x0000f20e, 0x80798179,
-	0xd7610002, 0x0000f20f,
-	0x80798179, 0xbf06a079,
-	0xbf840006, 0xe0704000,
-	0x705d0200, 0x8070ff70,
-	0x00000080, 0xbef90380,
-	0x7e040280, 0x807c907c,
-	0xbf0aff7c, 0x00000060,
-	0xbf85ffbc, 0xbe802f00,
-	0xbe822f02, 0xbe842f04,
-	0xbe862f06, 0xbe882f08,
-	0xbe8a2f0a, 0xd7610002,
-	0x0000f200, 0x80798179,
-	0xd7610002, 0x0000f201,
+	0x0000f20d, 0x80798179,
+	0xd7610002, 0x0000f20e,
 	0x80798179, 0xd7610002,
-	0x0000f202, 0x80798179,
-	0xd7610002, 0x0000f203,
+	0x0000f20f, 0x80798179,
+	0xbf06a079, 0xbf840006,
+	0xe0704000, 0x705d0200,
+	0x8070ff70, 0x00000080,
+	0xbef90380, 0x7e040280,
+	0x807c907c, 0xbf0aff7c,
+	0x00000060, 0xbf85ffbc,
+	0xbe802f00, 0xbe822f02,
+	0xbe842f04, 0xbe862f06,
+	0xbe882f08, 0xbe8a2f0a,
+	0xd7610002, 0x0000f200,
 	0x80798179, 0xd7610002,
-	0x0000f204, 0x80798179,
-	0xd7610002, 0x0000f205,
+	0x0000f201, 0x80798179,
+	0xd7610002, 0x0000f202,
 	0x80798179, 0xd7610002,
-	0x0000f206, 0x80798179,
-	0xd7610002, 0x0000f207,
+	0x0000f203, 0x80798179,
+	0xd7610002, 0x0000f204,
 	0x80798179, 0xd7610002,
-	0x0000f208, 0x80798179,
-	0xd7610002, 0x0000f209,
+	0x0000f205, 0x80798179,
+	0xd7610002, 0x0000f206,
 	0x80798179, 0xd7610002,
-	0x0000f20a, 0x80798179,
-	0xd7610002, 0x0000f20b,
-	0x80798179, 0xe0704000,
-	0x705d0200, 0xbefe03c1,
-	0x907c9973, 0x877c817c,
-	0xbf06817c, 0xbf850002,
-	0xbeff0380, 0xbf820001,
-	0xbeff03c1, 0xb97b4306,
-	0x877bc17b, 0xbf840044,
-	0xbf8a0000, 0x877aff6d,
-	0x80000000, 0xbf840040,
-	0x8f7b867b, 0x8f7b827b,
-	0xbef6037b, 0xb9703a05,
-	0x80708170, 0xbf0d9973,
-	0xbf850002, 0x8f708970,
-	0xbf820001, 0x8f708a70,
-	0xb97a1e06, 0x8f7a8a7a,
-	0x80707a70, 0x8070ff70,
-	0x00000200, 0x8070ff70,
-	0x00000080, 0xbef603ff,
-	0x01000000, 0xd7650000,
-	0x000100c1, 0xd7660000,
-	0x000200c1, 0x16000084,
-	0x907c9973, 0x877c817c,
-	0xbf06817c, 0xbefc0380,
-	0xbf850012, 0xbe8303ff,
-	0x00000080, 0xbf800000,
-	0xbf800000, 0xbf800000,
-	0xd8d80000, 0x01000000,
-	0xbf8c0000, 0xe0704000,
-	0x705d0100, 0x807c037c,
-	0x80700370, 0xd5250000,
-	0x0001ff00, 0x00000080,
-	0xbf0a7b7c, 0xbf85fff4,
-	0xbf820011, 0xbe8303ff,
-	0x00000100, 0xbf800000,
-	0xbf800000, 0xbf800000,
-	0xd8d80000, 0x01000000,
-	0xbf8c0000, 0xe0704000,
-	0x705d0100, 0x807c037c,
-	0x80700370, 0xd5250000,
-	0x0001ff00, 0x00000100,
-	0xbf0a7b7c, 0xbf85fff4,
+	0x0000f207, 0x80798179,
+	0xd7610002, 0x0000f208,
+	0x80798179, 0xd7610002,
+	0x0000f209, 0x80798179,
+	0xd7610002, 0x0000f20a,
+	0x80798179, 0xd7610002,
+	0x0000f20b, 0x80798179,
+	0xe0704000, 0x705d0200,
 	0xbefe03c1, 0x907c9973,
 	0x877c817c, 0xbf06817c,
-	0xbf850004, 0xbef003ff,
-	0x00000200, 0xbeff0380,
-	0xbf820003, 0xbef003ff,
-	0x00000400, 0xbeff03c1,
-	0xb97b3a05, 0x807b817b,
-	0x8f7b827b, 0x907c9973,
+	0xbf850002, 0xbeff0380,
+	0xbf820001, 0xbeff03c1,
+	0xb97b4306, 0x877bc17b,
+	0xbf840044, 0xbf8a0000,
+	0x877aff6d, 0x80000000,
+	0xbf840040, 0x8f7b867b,
+	0x8f7b827b, 0xbef6037b,
+	0xb9703a05, 0x80708170,
+	0xbf0d9973, 0xbf850002,
+	0x8f708970, 0xbf820001,
+	0x8f708a70, 0xb97a1e06,
+	0x8f7a8a7a, 0x80707a70,
+	0x8070ff70, 0x00000200,
+	0x8070ff70, 0x00000080,
+	0xbef603ff, 0x01000000,
+	0xd7650000, 0x000100c1,
+	0xd7660000, 0x000200c1,
+	0x16000084, 0x907c9973,
 	0x877c817c, 0xbf06817c,
-	0xbf850017, 0xbef603ff,
-	0x01000000, 0xbefc0384,
-	0xbf0a7b7c, 0xbf840037,
-	0x7e008700, 0x7e028701,
-	0x7e048702, 0x7e068703,
-	0xe0704000, 0x705d0000,
-	0xe0704080, 0x705d0100,
-	0xe0704100, 0x705d0200,
-	0xe0704180, 0x705d0300,
-	0x807c847c, 0x8070ff70,
-	0x00000200, 0xbf0a7b7c,
-	0xbf85ffef, 0xbf820025,
+	0xbefc0380, 0xbf850012,
+	0xbe8303ff, 0x00000080,
+	0xbf800000, 0xbf800000,
+	0xbf800000, 0xd8d80000,
+	0x01000000, 0xbf8c0000,
+	0xe0704000, 0x705d0100,
+	0x807c037c, 0x80700370,
+	0xd5250000, 0x0001ff00,
+	0x00000080, 0xbf0a7b7c,
+	0xbf85fff4, 0xbf820011,
+	0xbe8303ff, 0x00000100,
+	0xbf800000, 0xbf800000,
+	0xbf800000, 0xd8d80000,
+	0x01000000, 0xbf8c0000,
+	0xe0704000, 0x705d0100,
+	0x807c037c, 0x80700370,
+	0xd5250000, 0x0001ff00,
+	0x00000100, 0xbf0a7b7c,
+	0xbf85fff4, 0xbefe03c1,
+	0x907c9973, 0x877c817c,
+	0xbf06817c, 0xbf850004,
+	0xbef003ff, 0x00000200,
+	0xbeff0380, 0xbf820003,
+	0xbef003ff, 0x00000400,
+	0xbeff03c1, 0xb97b3a05,
+	0x807b817b, 0x8f7b827b,
+	0x907c9973, 0x877c817c,
+	0xbf06817c, 0xbf850017,
 	0xbef603ff, 0x01000000,
 	0xbefc0384, 0xbf0a7b7c,
-	0xbf840011, 0x7e008700,
+	0xbf840037, 0x7e008700,
 	0x7e028701, 0x7e048702,
 	0x7e068703, 0xe0704000,
-	0x705d0000, 0xe0704100,
-	0x705d0100, 0xe0704200,
-	0x705d0200, 0xe0704300,
+	0x705d0000, 0xe0704080,
+	0x705d0100, 0xe0704100,
+	0x705d0200, 0xe0704180,
 	0x705d0300, 0x807c847c,
-	0x8070ff70, 0x00000400,
+	0x8070ff70, 0x00000200,
 	0xbf0a7b7c, 0xbf85ffef,
-	0xb97b1e06, 0x877bc17b,
-	0xbf84000c, 0x8f7b837b,
-	0x807b7c7b, 0xbefe03c1,
-	0xbeff0380, 0x7e008700,
+	0xbf820025, 0xbef603ff,
+	0x01000000, 0xbefc0384,
+	0xbf0a7b7c, 0xbf840011,
+	0x7e008700, 0x7e028701,
+	0x7e048702, 0x7e068703,
 	0xe0704000, 0x705d0000,
-	0x807c817c, 0x8070ff70,
-	0x00000080, 0xbf0a7b7c,
-	0xbf85fff8, 0xbf82013b,
-	0xbef4037e, 0x8775ff7f,
-	0x0000ffff, 0x8875ff75,
-	0x00040000, 0xbef60380,
-	0xbef703ff, 0x10807fac,
-	0xb97202dc, 0x8f729972,
-	0x876eff7f, 0x04000000,
-	0xbf840034, 0xbefe03c1,
-	0x907c9972, 0x877c817c,
-	0xbf06817c, 0xbf850002,
-	0xbeff0380, 0xbf820001,
-	0xbeff03c1, 0xb96f4306,
-	0x876fc16f, 0xbf840029,
-	0x8f6f866f, 0x8f6f826f,
-	0xbef6036f, 0xb9783a05,
-	0x80788178, 0xbf0d9972,
-	0xbf850002, 0x8f788978,
-	0xbf820001, 0x8f788a78,
-	0xb96e1e06, 0x8f6e8a6e,
-	0x80786e78, 0x8078ff78,
-	0x00000200, 0x8078ff78,
-	0x00000080, 0xbef603ff,
-	0x01000000, 0x907c9972,
-	0x877c817c, 0xbf06817c,
-	0xbefc0380, 0xbf850009,
-	0xe0310000, 0x781d0000,
-	0x807cff7c, 0x00000080,
-	0x8078ff78, 0x00000080,
-	0xbf0a6f7c, 0xbf85fff8,
-	0xbf820008, 0xe0310000,
-	0x781d0000, 0x807cff7c,
-	0x00000100, 0x8078ff78,
-	0x00000100, 0xbf0a6f7c,
-	0xbf85fff8, 0xbef80380,
+	0xe0704100, 0x705d0100,
+	0xe0704200, 0x705d0200,
+	0xe0704300, 0x705d0300,
+	0x807c847c, 0x8070ff70,
+	0x00000400, 0xbf0a7b7c,
+	0xbf85ffef, 0xb97b1e06,
+	0x877bc17b, 0xbf84000c,
+	0x8f7b837b, 0x807b7c7b,
+	0xbefe03c1, 0xbeff0380,
+	0x7e008700, 0xe0704000,
+	0x705d0000, 0x807c817c,
+	0x8070ff70, 0x00000080,
+	0xbf0a7b7c, 0xbf85fff8,
+	0xbf82013b, 0xbef4037e,
+	0x8775ff7f, 0x0000ffff,
+	0x8875ff75, 0x00040000,
+	0xbef60380, 0xbef703ff,
+	0x10807fac, 0xb97202dc,
+	0x8f729972, 0x876eff7f,
+	0x04000000, 0xbf840034,
 	0xbefe03c1, 0x907c9972,
 	0x877c817c, 0xbf06817c,
 	0xbf850002, 0xbeff0380,
 	0xbf820001, 0xbeff03c1,
-	0xb96f3a05, 0x806f816f,
-	0x8f6f826f, 0x907c9972,
-	0x877c817c, 0xbf06817c,
-	0xbf850024, 0xbef603ff,
-	0x01000000, 0xbeee0378,
+	0xb96f4306, 0x876fc16f,
+	0xbf840029, 0x8f6f866f,
+	0x8f6f826f, 0xbef6036f,
+	0xb9783a05, 0x80788178,
+	0xbf0d9972, 0xbf850002,
+	0x8f788978, 0xbf820001,
+	0x8f788a78, 0xb96e1e06,
+	0x8f6e8a6e, 0x80786e78,
 	0x8078ff78, 0x00000200,
-	0xbefc0384, 0xbf0a6f7c,
-	0xbf840050, 0xe0304000,
-	0x785d0000, 0xe0304080,
-	0x785d0100, 0xe0304100,
-	0x785d0200, 0xe0304180,
-	0x785d0300, 0xbf8c3f70,
-	0x7e008500, 0x7e028501,
-	0x7e048502, 0x7e068503,
-	0x807c847c, 0x8078ff78,
-	0x00000200, 0xbf0a6f7c,
-	0xbf85ffee, 0xe0304000,
-	0x6e5d0000, 0xe0304080,
-	0x6e5d0100, 0xe0304100,
-	0x6e5d0200, 0xe0304180,
-	0x6e5d0300, 0xbf8c3f70,
-	0xbf820034, 0xbef603ff,
-	0x01000000, 0xbeee0378,
-	0x8078ff78, 0x00000400,
-	0xbefc0384, 0xbf0a6f7c,
-	0xbf840012, 0xe0304000,
-	0x785d0000, 0xe0304100,
-	0x785d0100, 0xe0304200,
-	0x785d0200, 0xe0304300,
-	0x785d0300, 0xbf8c3f70,
-	0x7e008500, 0x7e028501,
-	0x7e048502, 0x7e068503,
-	0x807c847c, 0x8078ff78,
-	0x00000400, 0xbf0a6f7c,
-	0xbf85ffee, 0xb96f1e06,
-	0x876fc16f, 0xbf84000e,
-	0x8f6f836f, 0x806f7c6f,
-	0xbefe03c1, 0xbeff0380,
+	0x8078ff78, 0x00000080,
+	0xbef603ff, 0x01000000,
+	0x907c9972, 0x877c817c,
+	0xbf06817c, 0xbefc0380,
+	0xbf850009, 0xe0310000,
+	0x781d0000, 0x807cff7c,
+	0x00000080, 0x8078ff78,
+	0x00000080, 0xbf0a6f7c,
+	0xbf85fff8, 0xbf820008,
+	0xe0310000, 0x781d0000,
+	0x807cff7c, 0x00000100,
+	0x8078ff78, 0x00000100,
+	0xbf0a6f7c, 0xbf85fff8,
+	0xbef80380, 0xbefe03c1,
+	0x907c9972, 0x877c817c,
+	0xbf06817c, 0xbf850002,
+	0xbeff0380, 0xbf820001,
+	0xbeff03c1, 0xb96f3a05,
+	0x806f816f, 0x8f6f826f,
+	0x907c9972, 0x877c817c,
+	0xbf06817c, 0xbf850024,
+	0xbef603ff, 0x01000000,
+	0xbeee0378, 0x8078ff78,
+	0x00000200, 0xbefc0384,
+	0xbf0a6f7c, 0xbf840050,
 	0xe0304000, 0x785d0000,
+	0xe0304080, 0x785d0100,
+	0xe0304100, 0x785d0200,
+	0xe0304180, 0x785d0300,
 	0xbf8c3f70, 0x7e008500,
-	0x807c817c, 0x8078ff78,
-	0x00000080, 0xbf0a6f7c,
-	0xbf85fff7, 0xbeff03c1,
+	0x7e028501, 0x7e048502,
+	0x7e068503, 0x807c847c,
+	0x8078ff78, 0x00000200,
+	0xbf0a6f7c, 0xbf85ffee,
 	0xe0304000, 0x6e5d0000,
-	0xe0304100, 0x6e5d0100,
-	0xe0304200, 0x6e5d0200,
-	0xe0304300, 0x6e5d0300,
-	0xbf8c3f70, 0xb9783a05,
-	0x80788178, 0xbf0d9972,
-	0xbf850002, 0x8f788978,
-	0xbf820001, 0x8f788a78,
-	0xb96e1e06, 0x8f6e8a6e,
-	0x80786e78, 0x8078ff78,
-	0x00000200, 0x80f8ff78,
-	0x00000050, 0xbef603ff,
-	0x01000000, 0xbefc03ff,
-	0x0000006c, 0x80f89078,
-	0xf429003a, 0xf0000000,
-	0xbf8cc07f, 0x80fc847c,
-	0xbf800000, 0xbe803100,
-	0xbe823102, 0x80f8a078,
-	0xf42d003a, 0xf0000000,
-	0xbf8cc07f, 0x80fc887c,
-	0xbf800000, 0xbe803100,
-	0xbe823102, 0xbe843104,
-	0xbe863106, 0x80f8c078,
-	0xf431003a, 0xf0000000,
-	0xbf8cc07f, 0x80fc907c,
-	0xbf800000, 0xbe803100,
-	0xbe823102, 0xbe843104,
-	0xbe863106, 0xbe883108,
-	0xbe8a310a, 0xbe8c310c,
-	0xbe8e310e, 0xbf06807c,
-	0xbf84fff0, 0xba80f801,
-	0x00000000, 0xbf8a0000,
+	0xe0304080, 0x6e5d0100,
+	0xe0304100, 0x6e5d0200,
+	0xe0304180, 0x6e5d0300,
+	0xbf8c3f70, 0xbf820034,
+	0xbef603ff, 0x01000000,
+	0xbeee0378, 0x8078ff78,
+	0x00000400, 0xbefc0384,
+	0xbf0a6f7c, 0xbf840012,
+	0xe0304000, 0x785d0000,
+	0xe0304100, 0x785d0100,
+	0xe0304200, 0x785d0200,
+	0xe0304300, 0x785d0300,
+	0xbf8c3f70, 0x7e008500,
+	0x7e028501, 0x7e048502,
+	0x7e068503, 0x807c847c,
+	0x8078ff78, 0x00000400,
+	0xbf0a6f7c, 0xbf85ffee,
+	0xb96f1e06, 0x876fc16f,
+	0xbf84000e, 0x8f6f836f,
+	0x806f7c6f, 0xbefe03c1,
+	0xbeff0380, 0xe0304000,
+	0x785d0000, 0xbf8c3f70,
+	0x7e008500, 0x807c817c,
+	0x8078ff78, 0x00000080,
+	0xbf0a6f7c, 0xbf85fff7,
+	0xbeff03c1, 0xe0304000,
+	0x6e5d0000, 0xe0304100,
+	0x6e5d0100, 0xe0304200,
+	0x6e5d0200, 0xe0304300,
+	0x6e5d0300, 0xbf8c3f70,
 	0xb9783a05, 0x80788178,
 	0xbf0d9972, 0xbf850002,
 	0x8f788978, 0xbf820001,
 	0x8f788a78, 0xb96e1e06,
 	0x8f6e8a6e, 0x80786e78,
 	0x8078ff78, 0x00000200,
+	0x80f8ff78, 0x00000050,
 	0xbef603ff, 0x01000000,
-	0xf4211bfa, 0xf0000000,
-	0x80788478, 0xf4211b3a,
+	0xbefc03ff, 0x0000006c,
+	0x80f89078, 0xf429003a,
+	0xf0000000, 0xbf8cc07f,
+	0x80fc847c, 0xbf800000,
+	0xbe803100, 0xbe823102,
+	0x80f8a078, 0xf42d003a,
+	0xf0000000, 0xbf8cc07f,
+	0x80fc887c, 0xbf800000,
+	0xbe803100, 0xbe823102,
+	0xbe843104, 0xbe863106,
+	0x80f8c078, 0xf431003a,
+	0xf0000000, 0xbf8cc07f,
+	0x80fc907c, 0xbf800000,
+	0xbe803100, 0xbe823102,
+	0xbe843104, 0xbe863106,
+	0xbe883108, 0xbe8a310a,
+	0xbe8c310c, 0xbe8e310e,
+	0xbf06807c, 0xbf84fff0,
+	0xba80f801, 0x00000000,
+	0xbf8a0000, 0xb9783a05,
+	0x80788178, 0xbf0d9972,
+	0xbf850002, 0x8f788978,
+	0xbf820001, 0x8f788a78,
+	0xb96e1e06, 0x8f6e8a6e,
+	0x80786e78, 0x8078ff78,
+	0x00000200, 0xbef603ff,
+	0x01000000, 0xf4211bfa,
 	0xf0000000, 0x80788478,
-	0xf4211b7a, 0xf0000000,
-	0x80788478, 0xf4211c3a,
+	0xf4211b3a, 0xf0000000,
+	0x80788478, 0xf4211b7a,
 	0xf0000000, 0x80788478,
-	0xf4211c7a, 0xf0000000,
-	0x80788478, 0xf4211eba,
+	0xf4211c3a, 0xf0000000,
+	0x80788478, 0xf4211c7a,
 	0xf0000000, 0x80788478,
-	0xf4211efa, 0xf0000000,
-	0x80788478, 0xf4211e7a,
+	0xf4211eba, 0xf0000000,
+	0x80788478, 0xf4211efa,
 	0xf0000000, 0x80788478,
-	0xf4211cfa, 0xf0000000,
-	0x80788478, 0xf4211bba,
+	0xf4211e7a, 0xf0000000,
+	0x80788478, 0xf4211cfa,
 	0xf0000000, 0x80788478,
-	0xbf8cc07f, 0xb9eef814,
 	0xf4211bba, 0xf0000000,
 	0x80788478, 0xbf8cc07f,
-	0xb9eef815, 0xbefc036f,
-	0xbefe0370, 0xbeff0371,
-	0x876f7bff, 0x000003ff,
-	0xb9ef4803, 0x876f7bff,
-	0xfffff800, 0x906f8b6f,
-	0xb9efa2c3, 0xb9f3f801,
-	0xb96e3a05, 0x806e816e,
-	0xbf0d9972, 0xbf850002,
-	0x8f6e896e, 0xbf820001,
-	0x8f6e8a6e, 0xb96f1e06,
-	0x8f6f8a6f, 0x806e6f6e,
-	0x806eff6e, 0x00000200,
-	0x806e746e, 0x826f8075,
-	0x876fff6f, 0x0000ffff,
-	0xf4091c37, 0xfa000050,
-	0xf4091d37, 0xfa000060,
-	0xf4011e77, 0xfa000074,
-	0xbf8cc07f, 0x876dff6d,
-	0x0000ffff, 0x87fe7e7e,
-	0x87ea6a6a, 0xb9faf802,
-	0xbe80226c, 0xbf810000,
+	0xb9eef814, 0xf4211bba,
+	0xf0000000, 0x80788478,
+	0xbf8cc07f, 0xb9eef815,
+	0xbefc036f, 0xbefe0370,
+	0xbeff0371, 0x876f7bff,
+	0x000003ff, 0xb9ef4803,
+	0x876f7bff, 0xfffff800,
+	0x906f8b6f, 0xb9efa2c3,
+	0xb9f3f801, 0xb96e3a05,
+	0x806e816e, 0xbf0d9972,
+	0xbf850002, 0x8f6e896e,
+	0xbf820001, 0x8f6e8a6e,
+	0xb96f1e06, 0x8f6f8a6f,
+	0x806e6f6e, 0x806eff6e,
+	0x00000200, 0x806e746e,
+	0x826f8075, 0x876fff6f,
+	0x0000ffff, 0xf4091c37,
+	0xfa000050, 0xf4091d37,
+	0xfa000060, 0xf4011e77,
+	0xfa000074, 0xbf8cc07f,
+	0x876dff6d, 0x0000ffff,
+	0x87fe7e7e, 0x87ea6a6a,
+	0xb9faf802, 0xbe80226c,
+	0xbf9b0000, 0xbf9f0000,
 	0xbf9f0000, 0xbf9f0000,
 	0xbf9f0000, 0xbf9f0000,
-	0xbf9f0000, 0x00000000,
 };
 
 static const uint32_t cwsr_trap_gfx11_hex[] = {
@@ -2944,7 +2944,7 @@ static const uint32_t cwsr_trap_gfx11_hex[] = {
 	0xb8eef802, 0xbf0d866e,
 	0xbfa20002, 0xb97af802,
 	0xbe80486c, 0xb97af802,
-	0xbe804a6c, 0xbfb00000,
+	0xbe804a6c, 0xbfb10000,
 	0xbf9f0000, 0xbf9f0000,
 	0xbf9f0000, 0xbf9f0000,
 	0xbf9f0000, 0x00000000,
@@ -3436,5 +3436,5 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = {
 	0x86ea6a6a, 0x8f6e837a,
 	0xb96ee0c2, 0xbf800002,
 	0xb97a0002, 0xbf8a0000,
-	0xbe801f6c, 0xbf810000,
+	0xbe801f6c, 0xbf9b0000,
 };
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
index fdab64624422..71b3dc0c7363 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
@@ -369,6 +369,12 @@ L_SLEEP:
 	s_or_b32	s_save_pc_hi, s_save_pc_hi, s_save_tmp
 
 #if NO_SQC_STORE
+#if ASIC_FAMILY <= CHIP_SIENNA_CICHLID
+	// gfx10: If there was a VALU exception, the exception state must be
+	// cleared before executing the VALU instructions below.
+	v_clrexcp
+#endif
+
 	// Trap temporaries must be saved via VGPR but all VGPRs are in use.
 	// There is no ttmp space to hold the resource constant for VGPR save.
 	// Save v0 by itself since it requires only two SGPRs.
@@ -1098,7 +1104,7 @@ L_RETURN_WITHOUT_PRIV:
 	s_rfe_b64	s_restore_pc_lo						//Return to the main shader program and resume execution
 
 L_END_PGM:
-	s_endpgm
+	s_endpgm_saved
 end
 
 function write_hwreg_to_mem(s, s_rsrc, s_mem_offset)
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
index e506411ad28a..bb26338204f4 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
@@ -921,7 +921,7 @@ L_RESTORE:
 /*			the END						  */
 /**************************************************************************/
 L_END_PGM:
-    s_endpgm
+    s_endpgm_saved
 
 end
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index f6d4748c1980..80e90fdef291 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1442,7 +1442,9 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
 			kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
 
 		/* Remove dma mapping after tlb flush to avoid IO_PAGE_FAULT */
-		amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv);
+		err = amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv);
+		if (err)
+			goto sync_memory_failed;
 	}
 
 	mutex_unlock(&p->mutex);
@@ -1564,16 +1566,11 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
 {
 	struct kfd_ioctl_import_dmabuf_args *args = data;
 	struct kfd_process_device *pdd;
-	struct dma_buf *dmabuf;
 	int idr_handle;
 	uint64_t size;
 	void *mem;
 	int r;
 
-	dmabuf = dma_buf_get(args->dmabuf_fd);
-	if (IS_ERR(dmabuf))
-		return PTR_ERR(dmabuf);
-
 	mutex_lock(&p->mutex);
 	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
 	if (!pdd) {
@@ -1587,10 +1584,10 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
 		goto err_unlock;
 	}
 
-	r = amdgpu_amdkfd_gpuvm_import_dmabuf(pdd->dev->adev, dmabuf,
-					      args->va_addr, pdd->drm_priv,
-					      (struct kgd_mem **)&mem, &size,
-					      NULL);
+	r = amdgpu_amdkfd_gpuvm_import_dmabuf_fd(pdd->dev->adev, args->dmabuf_fd,
+						 args->va_addr, pdd->drm_priv,
+						 (struct kgd_mem **)&mem, &size,
+						 NULL);
 	if (r)
 		goto err_unlock;
 
@@ -1601,7 +1598,6 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
 	}
 
 	mutex_unlock(&p->mutex);
-	dma_buf_put(dmabuf);
 
 	args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
 
@@ -1612,7 +1608,6 @@ err_free:
 					       pdd->drm_priv, NULL);
 err_unlock:
 	mutex_unlock(&p->mutex);
-	dma_buf_put(dmabuf);
 	return r;
 }
 
@@ -1855,8 +1850,8 @@ static uint32_t get_process_num_bos(struct kfd_process *p)
 	return num_of_bos;
 }
 
-static int criu_get_prime_handle(struct kgd_mem *mem, int flags,
-				      u32 *shared_fd)
+static int criu_get_prime_handle(struct kgd_mem *mem,
+				 int flags, u32 *shared_fd)
 {
 	struct dma_buf *dmabuf;
 	int ret;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 0f58be65132f..739721254a5d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -880,6 +880,10 @@ static int copy_signaled_event_data(uint32_t num_events,
 				dst = &data[i].memory_exception_data;
 				src = &event->memory_exception_data;
 				size = sizeof(struct kfd_hsa_memory_exception_data);
+			} else if (event->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
+				dst = &data[i].memory_exception_data;
+				src = &event->hw_exception_data;
+				size = sizeof(struct kfd_hsa_hw_exception_data);
 			} else if (event->type == KFD_EVENT_TYPE_SIGNAL &&
 				waiter->event_age_enabled) {
 				dst = &data[i].signal_event_data.last_event_age;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index 62b205dac63a..6604a3f99c5e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -330,12 +330,6 @@ static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id)
 	pdd->gpuvm_limit =
 		pdd->dev->kfd->shared_resources.gpuvm_size - 1;
 
-	/* dGPUs: the reserved space for kernel
-	 * before SVM
-	 */
-	pdd->qpd.cwsr_base = SVM_CWSR_BASE;
-	pdd->qpd.ib_base = SVM_IB_BASE;
-
 	pdd->scratch_base = MAKE_SCRATCH_APP_BASE_VI();
 	pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
 }
@@ -345,18 +339,18 @@ static void kfd_init_apertures_v9(struct kfd_process_device *pdd, uint8_t id)
 	pdd->lds_base = MAKE_LDS_APP_BASE_V9();
 	pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
 
-	pdd->gpuvm_base = PAGE_SIZE;
+        /* Raven needs SVM to support graphic handle, etc. Leave the small
+         * reserved space before SVM on Raven as well, even though we don't
+         * have to.
+         * Set gpuvm_base and gpuvm_limit to CANONICAL addresses so that they
+         * are used in Thunk to reserve SVM.
+         */
+        pdd->gpuvm_base = SVM_USER_BASE;
 	pdd->gpuvm_limit =
 		pdd->dev->kfd->shared_resources.gpuvm_size - 1;
 
 	pdd->scratch_base = MAKE_SCRATCH_APP_BASE_V9();
 	pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
-
-	/*
-	 * Place TBA/TMA on opposite side of VM hole to prevent
-	 * stray faults from triggering SVM on these pages.
-	 */
-	pdd->qpd.cwsr_base = pdd->dev->kfd->shared_resources.gpuvm_size;
 }
 
 int kfd_init_apertures(struct kfd_process *process)
@@ -413,6 +407,12 @@ int kfd_init_apertures(struct kfd_process *process)
 					return -EINVAL;
 				}
 			}
+
+                        /* dGPUs: the reserved space for kernel
+                         * before SVM
+                         */
+                        pdd->qpd.cwsr_base = SVM_CWSR_BASE;
+                        pdd->qpd.ib_base = SVM_IB_BASE;
 		}
 
 		dev_dbg(kfd_device, "node id %u\n", id);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 6c25dab051d5..bdc01ca9609a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -260,19 +260,6 @@ static void svm_migrate_put_sys_page(unsigned long addr)
 	put_page(page);
 }
 
-static unsigned long svm_migrate_successful_pages(struct migrate_vma *migrate)
-{
-	unsigned long cpages = 0;
-	unsigned long i;
-
-	for (i = 0; i < migrate->npages; i++) {
-		if (migrate->src[i] & MIGRATE_PFN_VALID &&
-		    migrate->src[i] & MIGRATE_PFN_MIGRATE)
-			cpages++;
-	}
-	return cpages;
-}
-
 static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma *migrate)
 {
 	unsigned long upages = 0;
@@ -402,6 +389,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
 	struct dma_fence *mfence = NULL;
 	struct migrate_vma migrate = { 0 };
 	unsigned long cpages = 0;
+	unsigned long mpages = 0;
 	dma_addr_t *scratch;
 	void *buf;
 	int r = -ENOMEM;
@@ -442,20 +430,21 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
 		goto out_free;
 	}
 	if (cpages != npages)
-		pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
+		pr_debug("partial migration, 0x%lx/0x%llx pages collected\n",
 			 cpages, npages);
 	else
-		pr_debug("0x%lx pages migrated\n", cpages);
+		pr_debug("0x%lx pages collected\n", cpages);
 
 	r = svm_migrate_copy_to_vram(node, prange, &migrate, &mfence, scratch, ttm_res_offset);
 	migrate_vma_pages(&migrate);
 
-	pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
-		svm_migrate_successful_pages(&migrate), cpages, migrate.npages);
-
 	svm_migrate_copy_done(adev, mfence);
 	migrate_vma_finalize(&migrate);
 
+	mpages = cpages - svm_migrate_unsuccessful_pages(&migrate);
+	pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
+			 mpages, cpages, migrate.npages);
+
 	kfd_smi_event_migration_end(node, p->lead_thread->pid,
 				    start >> PAGE_SHIFT, end >> PAGE_SHIFT,
 				    0, node->id, trigger);
@@ -465,12 +454,12 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
 out_free:
 	kvfree(buf);
 out:
-	if (!r && cpages) {
+	if (!r && mpages) {
 		pdd = svm_range_get_pdd_by_node(prange, node);
 		if (pdd)
-			WRITE_ONCE(pdd->page_in, pdd->page_in + cpages);
+			WRITE_ONCE(pdd->page_in, pdd->page_in + mpages);
 
-		return cpages;
+		return mpages;
 	}
 	return r;
 }
@@ -479,6 +468,8 @@ out:
  * svm_migrate_ram_to_vram - migrate svm range from system to device
  * @prange: range structure
  * @best_loc: the device to migrate to
+ * @start_mgr: start page to migrate
+ * @last_mgr: last page to migrate
  * @mm: the process mm structure
  * @trigger: reason of migration
  *
@@ -489,19 +480,20 @@ out:
  */
 static int
 svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
+			unsigned long start_mgr, unsigned long last_mgr,
 			struct mm_struct *mm, uint32_t trigger)
 {
 	unsigned long addr, start, end;
 	struct vm_area_struct *vma;
 	uint64_t ttm_res_offset;
 	struct kfd_node *node;
-	unsigned long cpages = 0;
+	unsigned long mpages = 0;
 	long r = 0;
 
-	if (prange->actual_loc == best_loc) {
-		pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
-			 prange->svms, prange->start, prange->last, best_loc);
-		return 0;
+	if (start_mgr < prange->start || last_mgr > prange->last) {
+		pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n",
+			 start_mgr, last_mgr, prange->start, prange->last);
+		return -EFAULT;
 	}
 
 	node = svm_range_get_node_by_id(prange, best_loc);
@@ -510,18 +502,19 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
 		return -ENODEV;
 	}
 
-	pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,
-		 prange->start, prange->last, best_loc);
+	pr_debug("svms 0x%p [0x%lx 0x%lx] in [0x%lx 0x%lx] to gpu 0x%x\n",
+		prange->svms, start_mgr, last_mgr, prange->start, prange->last,
+		best_loc);
 
-	start = prange->start << PAGE_SHIFT;
-	end = (prange->last + 1) << PAGE_SHIFT;
+	start = start_mgr << PAGE_SHIFT;
+	end = (last_mgr + 1) << PAGE_SHIFT;
 
 	r = svm_range_vram_node_new(node, prange, true);
 	if (r) {
 		dev_dbg(node->adev->dev, "fail %ld to alloc vram\n", r);
 		return r;
 	}
-	ttm_res_offset = prange->offset << PAGE_SHIFT;
+	ttm_res_offset = (start_mgr - prange->start + prange->offset) << PAGE_SHIFT;
 
 	for (addr = start; addr < end;) {
 		unsigned long next;
@@ -536,16 +529,19 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
 			pr_debug("failed %ld to migrate\n", r);
 			break;
 		} else {
-			cpages += r;
+			mpages += r;
 		}
 		ttm_res_offset += next - addr;
 		addr = next;
 	}
 
-	if (cpages) {
+	if (mpages) {
 		prange->actual_loc = best_loc;
-		svm_range_dma_unmap(prange);
-	} else {
+		prange->vram_pages += mpages;
+	} else if (!prange->actual_loc) {
+		/* if no page migrated and all pages from prange are at
+		 * sys ram drop svm_bo got from svm_range_vram_node_new
+		 */
 		svm_range_vram_node_free(prange);
 	}
 
@@ -578,7 +574,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
 	pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
 		 prange->last);
 
-	addr = prange->start << PAGE_SHIFT;
+	addr = migrate->start;
 
 	src = (uint64_t *)(scratch + npages);
 	dst = scratch;
@@ -663,9 +659,8 @@ out_oom:
  * Context: Process context, caller hold mmap read lock, prange->migrate_mutex
  *
  * Return:
- *   0 - success with all pages migrated
  *   negative values - indicate error
- *   positive values - partial migration, number of pages not migrated
+ *   positive values or zero - number of pages got migrated
  */
 static long
 svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
@@ -676,6 +671,7 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
 	uint64_t npages = (end - start) >> PAGE_SHIFT;
 	unsigned long upages = npages;
 	unsigned long cpages = 0;
+	unsigned long mpages = 0;
 	struct amdgpu_device *adev = node->adev;
 	struct kfd_process_device *pdd;
 	struct dma_fence *mfence = NULL;
@@ -725,10 +721,10 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
 		goto out_free;
 	}
 	if (cpages != npages)
-		pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
+		pr_debug("partial migration, 0x%lx/0x%llx pages collected\n",
 			 cpages, npages);
 	else
-		pr_debug("0x%lx pages migrated\n", cpages);
+		pr_debug("0x%lx pages collected\n", cpages);
 
 	r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence,
 				    scratch, npages);
@@ -751,17 +747,21 @@ out_free:
 	kvfree(buf);
 out:
 	if (!r && cpages) {
+		mpages = cpages - upages;
 		pdd = svm_range_get_pdd_by_node(prange, node);
 		if (pdd)
-			WRITE_ONCE(pdd->page_out, pdd->page_out + cpages);
+			WRITE_ONCE(pdd->page_out, pdd->page_out + mpages);
 	}
-	return r ? r : upages;
+
+	return r ? r : mpages;
 }
 
 /**
  * svm_migrate_vram_to_ram - migrate svm range from device to system
  * @prange: range structure
  * @mm: process mm, use current->mm if NULL
+ * @start_mgr: start page need be migrated to sys ram
+ * @last_mgr: last page need be migrated to sys ram
  * @trigger: reason of migration
  * @fault_page: is from vmf->page, svm_migrate_to_ram(), this is CPU page fault callback
  *
@@ -771,6 +771,7 @@ out:
  * 0 - OK, otherwise error code
  */
 int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
+			    unsigned long start_mgr, unsigned long last_mgr,
 			    uint32_t trigger, struct page *fault_page)
 {
 	struct kfd_node *node;
@@ -778,26 +779,33 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
 	unsigned long addr;
 	unsigned long start;
 	unsigned long end;
-	unsigned long upages = 0;
+	unsigned long mpages = 0;
 	long r = 0;
 
+	/* this pragne has no any vram page to migrate to sys ram */
 	if (!prange->actual_loc) {
 		pr_debug("[0x%lx 0x%lx] already migrated to ram\n",
 			 prange->start, prange->last);
 		return 0;
 	}
 
+	if (start_mgr < prange->start || last_mgr > prange->last) {
+		pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n",
+			 start_mgr, last_mgr, prange->start, prange->last);
+		return -EFAULT;
+	}
+
 	node = svm_range_get_node_by_id(prange, prange->actual_loc);
 	if (!node) {
 		pr_debug("failed to get kfd node by id 0x%x\n", prange->actual_loc);
 		return -ENODEV;
 	}
 	pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] from gpu 0x%x to ram\n",
-		 prange->svms, prange, prange->start, prange->last,
+		 prange->svms, prange, start_mgr, last_mgr,
 		 prange->actual_loc);
 
-	start = prange->start << PAGE_SHIFT;
-	end = (prange->last + 1) << PAGE_SHIFT;
+	start = start_mgr << PAGE_SHIFT;
+	end = (last_mgr + 1) << PAGE_SHIFT;
 
 	for (addr = start; addr < end;) {
 		unsigned long next;
@@ -816,14 +824,21 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
 			pr_debug("failed %ld to migrate prange %p\n", r, prange);
 			break;
 		} else {
-			upages += r;
+			mpages += r;
 		}
 		addr = next;
 	}
 
-	if (r >= 0 && !upages) {
-		svm_range_vram_node_free(prange);
-		prange->actual_loc = 0;
+	if (r >= 0) {
+		prange->vram_pages -= mpages;
+
+		/* prange does not have vram page set its actual_loc to system
+		 * and drop its svm_bo ref
+		 */
+		if (prange->vram_pages == 0 && prange->ttm_res) {
+			prange->actual_loc = 0;
+			svm_range_vram_node_free(prange);
+		}
 	}
 
 	return r < 0 ? r : 0;
@@ -833,17 +848,23 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
  * svm_migrate_vram_to_vram - migrate svm range from device to device
  * @prange: range structure
  * @best_loc: the device to migrate to
+ * @start: start page need be migrated to sys ram
+ * @last: last page need be migrated to sys ram
  * @mm: process mm, use current->mm if NULL
  * @trigger: reason of migration
  *
  * Context: Process context, caller hold mmap read lock, svms lock, prange lock
  *
+ * migrate all vram pages in prange to sys ram, then migrate
+ * [start, last] pages from sys ram to gpu node best_loc.
+ *
  * Return:
  * 0 - OK, otherwise error code
  */
 static int
 svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
-			 struct mm_struct *mm, uint32_t trigger)
+			unsigned long start, unsigned long last,
+			struct mm_struct *mm, uint32_t trigger)
 {
 	int r, retries = 3;
 
@@ -855,7 +876,8 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
 	pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc);
 
 	do {
-		r = svm_migrate_vram_to_ram(prange, mm, trigger, NULL);
+		r = svm_migrate_vram_to_ram(prange, mm, prange->start, prange->last,
+					    trigger, NULL);
 		if (r)
 			return r;
 	} while (prange->actual_loc && --retries);
@@ -863,17 +885,21 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
 	if (prange->actual_loc)
 		return -EDEADLK;
 
-	return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger);
+	return svm_migrate_ram_to_vram(prange, best_loc, start, last, mm, trigger);
 }
 
 int
 svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
+		    unsigned long start, unsigned long last,
 		    struct mm_struct *mm, uint32_t trigger)
 {
-	if  (!prange->actual_loc)
-		return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger);
+	if  (!prange->actual_loc || prange->actual_loc == best_loc)
+		return svm_migrate_ram_to_vram(prange, best_loc, start, last,
+					       mm, trigger);
+
 	else
-		return svm_migrate_vram_to_vram(prange, best_loc, mm, trigger);
+		return svm_migrate_vram_to_vram(prange, best_loc, start, last,
+						mm, trigger);
 
 }
 
@@ -889,10 +915,9 @@ svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
  */
 static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
 {
+	unsigned long start, last, size;
 	unsigned long addr = vmf->address;
 	struct svm_range_bo *svm_bo;
-	enum svm_work_list_ops op;
-	struct svm_range *parent;
 	struct svm_range *prange;
 	struct kfd_process *p;
 	struct mm_struct *mm;
@@ -929,51 +954,31 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
 
 	mutex_lock(&p->svms.lock);
 
-	prange = svm_range_from_addr(&p->svms, addr, &parent);
+	prange = svm_range_from_addr(&p->svms, addr, NULL);
 	if (!prange) {
 		pr_debug("failed get range svms 0x%p addr 0x%lx\n", &p->svms, addr);
 		r = -EFAULT;
 		goto out_unlock_svms;
 	}
 
-	mutex_lock(&parent->migrate_mutex);
-	if (prange != parent)
-		mutex_lock_nested(&prange->migrate_mutex, 1);
+	mutex_lock(&prange->migrate_mutex);
 
 	if (!prange->actual_loc)
 		goto out_unlock_prange;
 
-	svm_range_lock(parent);
-	if (prange != parent)
-		mutex_lock_nested(&prange->lock, 1);
-	r = svm_range_split_by_granularity(p, mm, addr, parent, prange);
-	if (prange != parent)
-		mutex_unlock(&prange->lock);
-	svm_range_unlock(parent);
-	if (r) {
-		pr_debug("failed %d to split range by granularity\n", r);
-		goto out_unlock_prange;
-	}
+	/* Align migration range start and size to granularity size */
+	size = 1UL << prange->granularity;
+	start = max(ALIGN_DOWN(addr, size), prange->start);
+	last = min(ALIGN(addr + 1, size) - 1, prange->last);
 
-	r = svm_migrate_vram_to_ram(prange, vmf->vma->vm_mm,
-				    KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU,
-				    vmf->page);
+	r = svm_migrate_vram_to_ram(prange, vmf->vma->vm_mm, start, last,
+				    KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU, vmf->page);
 	if (r)
 		pr_debug("failed %d migrate svms 0x%p range 0x%p [0x%lx 0x%lx]\n",
-			 r, prange->svms, prange, prange->start, prange->last);
-
-	/* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
-	if (p->xnack_enabled && parent == prange)
-		op = SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP;
-	else
-		op = SVM_OP_UPDATE_RANGE_NOTIFIER;
-	svm_range_add_list_work(&p->svms, parent, mm, op);
-	schedule_deferred_list_work(&p->svms);
+			r, prange->svms, prange, start, last);
 
 out_unlock_prange:
-	if (prange != parent)
-		mutex_unlock(&prange->migrate_mutex);
-	mutex_unlock(&parent->migrate_mutex);
+	mutex_unlock(&prange->migrate_mutex);
 out_unlock_svms:
 	mutex_unlock(&p->svms.lock);
 out_unref_process:
@@ -1021,7 +1026,7 @@ int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
 	} else {
 		res = devm_request_free_mem_region(adev->dev, &iomem_resource, size);
 		if (IS_ERR(res))
-			return -ENOMEM;
+			return PTR_ERR(res);
 		pgmap->range.start = res->start;
 		pgmap->range.end = res->end;
 		pgmap->type = MEMORY_DEVICE_PRIVATE;
@@ -1037,10 +1042,10 @@ int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
 	r = devm_memremap_pages(adev->dev, pgmap);
 	if (IS_ERR(r)) {
 		pr_err("failed to register HMM device memory\n");
-		/* Disable SVM support capability */
-		pgmap->type = 0;
 		if (pgmap->type == MEMORY_DEVICE_PRIVATE)
 			devm_release_mem_region(adev->dev, res->start, resource_size(res));
+		/* Disable SVM support capability */
+		pgmap->type = 0;
 		return PTR_ERR(r);
 	}
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
index 487f26368164..2eebf67f9c2c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
@@ -41,9 +41,13 @@ enum MIGRATION_COPY_DIR {
 };
 
 int svm_migrate_to_vram(struct svm_range *prange,  uint32_t best_loc,
+			unsigned long start, unsigned long last,
 			struct mm_struct *mm, uint32_t trigger);
+
 int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
+			    unsigned long start, unsigned long last,
 			    uint32_t trigger, struct page *fault_page);
+
 unsigned long
 svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
index 8b7fed913526..22cbfa1bdadd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
@@ -170,6 +170,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
 	m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
 	m->cp_hqd_pq_control |=
 			ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
+	m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK;
 	pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
 
 	m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
index 15277f1d5cf0..826bc4f6c8a7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
@@ -55,8 +55,8 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
 	m = get_mqd(mqd);
 
 	if (has_wa_flag) {
-		uint32_t wa_mask = minfo->update_flag == UPDATE_FLAG_DBG_WA_ENABLE ?
-						0xffff : 0xffffffff;
+		uint32_t wa_mask =
+			(minfo->update_flag & UPDATE_FLAG_DBG_WA_ENABLE) ? 0xffff : 0xffffffff;
 
 		m->compute_static_thread_mgmt_se0 = wa_mask;
 		m->compute_static_thread_mgmt_se1 = wa_mask;
@@ -224,6 +224,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
 	m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
 	m->cp_hqd_pq_control |=
 			ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
+	m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK;
 	pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
 
 	m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 42d881809dc7..697b6d530d12 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -303,6 +303,15 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
 		update_cu_mask(mm, mqd, minfo, 0);
 	set_priority(m, q);
 
+	if (minfo && KFD_GC_VERSION(mm->dev) >= IP_VERSION(9, 4, 2)) {
+		if (minfo->update_flag & UPDATE_FLAG_IS_GWS)
+			m->compute_resource_limits |=
+				COMPUTE_RESOURCE_LIMITS__FORCE_SIMD_DIST_MASK;
+		else
+			m->compute_resource_limits &=
+				~COMPUTE_RESOURCE_LIMITS__FORCE_SIMD_DIST_MASK;
+	}
+
 	q->is_active = QUEUE_IS_ACTIVE(*q);
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 9cc32f577e38..80320b8603fc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -532,6 +532,7 @@ struct queue_properties {
 enum mqd_update_flag {
 	UPDATE_FLAG_DBG_WA_ENABLE = 1,
 	UPDATE_FLAG_DBG_WA_DISABLE = 2,
+	UPDATE_FLAG_IS_GWS = 4, /* quirk for gfx9 IP */
 };
 
 struct mqd_update_info {
@@ -748,7 +749,6 @@ struct kfd_process_device {
 	/* VM context for GPUVM allocations */
 	struct file *drm_file;
 	void *drm_priv;
-	atomic64_t tlb_seq;
 
 	/* GPUVM allocations storage */
 	struct idr alloc_idr;
@@ -918,7 +918,7 @@ struct kfd_process {
 	 * fence will be triggered during eviction and new one will be created
 	 * during restore
 	 */
-	struct dma_fence *ef;
+	struct dma_fence __rcu *ef;
 
 	/* Work items for evicting and restoring BOs */
 	struct delayed_work eviction_work;
@@ -971,7 +971,7 @@ struct kfd_process {
 	struct work_struct debug_event_workarea;
 
 	/* Tracks debug per-vmid request for debug flags */
-	bool dbg_flags;
+	u32 dbg_flags;
 
 	atomic_t poison;
 	/* Queues are in paused stated because we are in the process of doing a CRIU checkpoint */
@@ -1128,7 +1128,7 @@ static inline struct kfd_node *kfd_node_by_irq_ids(struct amdgpu_device *adev,
 	struct kfd_dev *dev = adev->kfd.dev;
 	uint32_t i;
 
-	if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 3))
+	if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3))
 		return dev->nodes[0];
 
 	for (i = 0; i < dev->num_nodes; i++)
@@ -1462,7 +1462,14 @@ void kfd_signal_reset_event(struct kfd_node *dev);
 
 void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid);
 
-void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type);
+static inline void kfd_flush_tlb(struct kfd_process_device *pdd,
+				 enum TLB_FLUSH_TYPE type)
+{
+	struct amdgpu_device *adev = pdd->dev->adev;
+	struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
+
+	amdgpu_vm_flush_compute_tlb(adev, vm, type, pdd->dev->xcc_mask);
+}
 
 static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev)
 {
@@ -1482,10 +1489,15 @@ void kfd_dec_compute_active(struct kfd_node *dev);
 
 /* Cgroup Support */
 /* Check with device cgroup if @kfd device is accessible */
-static inline int kfd_devcgroup_check_permission(struct kfd_node *kfd)
+static inline int kfd_devcgroup_check_permission(struct kfd_node *node)
 {
 #if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF)
-	struct drm_device *ddev = adev_to_drm(kfd->adev);
+	struct drm_device *ddev;
+
+	if (node->xcp)
+		ddev = node->xcp->ddev;
+	else
+		ddev = adev_to_drm(node->adev);
 
 	return devcgroup_check_permission(DEVCG_DEV_CHAR, DRM_MAJOR,
 					  ddev->render->index,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 7a33e06f5c90..717a60d7a4ea 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -664,7 +664,8 @@ int kfd_process_create_wq(void)
 	if (!kfd_process_wq)
 		kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0);
 	if (!kfd_restore_wq)
-		kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", 0);
+		kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq",
+							 WQ_FREEZABLE);
 
 	if (!kfd_process_wq || !kfd_restore_wq) {
 		kfd_process_destroy_wq();
@@ -1109,6 +1110,7 @@ static void kfd_process_wq_release(struct work_struct *work)
 {
 	struct kfd_process *p = container_of(work, struct kfd_process,
 					     release_work);
+	struct dma_fence *ef;
 
 	kfd_process_dequeue_from_all_devices(p);
 	pqm_uninit(&p->pqm);
@@ -1117,7 +1119,9 @@ static void kfd_process_wq_release(struct work_struct *work)
 	 * destroyed. This allows any BOs to be freed without
 	 * triggering pointless evictions or waiting for fences.
 	 */
-	dma_fence_signal(p->ef);
+	synchronize_rcu();
+	ef = rcu_access_pointer(p->ef);
+	dma_fence_signal(ef);
 
 	kfd_process_remove_sysfs(p);
 
@@ -1126,7 +1130,7 @@ static void kfd_process_wq_release(struct work_struct *work)
 	svm_range_list_fini(p);
 
 	kfd_process_destroy_pdds(p);
-	dma_fence_put(p->ef);
+	dma_fence_put(ef);
 
 	kfd_event_free_process(p);
 
@@ -1642,6 +1646,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
 	struct amdgpu_fpriv *drv_priv;
 	struct amdgpu_vm *avm;
 	struct kfd_process *p;
+	struct dma_fence *ef;
 	struct kfd_node *dev;
 	int ret;
 
@@ -1661,13 +1666,13 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
 
 	ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(dev->adev, avm,
 						     &p->kgd_process_info,
-						     &p->ef);
+						     &ef);
 	if (ret) {
 		pr_err("Failed to create process VM object\n");
 		return ret;
 	}
+	RCU_INIT_POINTER(p->ef, ef);
 	pdd->drm_priv = drm_file->private_data;
-	atomic64_set(&pdd->tlb_seq, 0);
 
 	ret = kfd_process_device_reserve_ib_mem(pdd);
 	if (ret)
@@ -1909,6 +1914,21 @@ kfd_process_gpuid_from_node(struct kfd_process *p, struct kfd_node *node,
 	return -EINVAL;
 }
 
+static int signal_eviction_fence(struct kfd_process *p)
+{
+	struct dma_fence *ef;
+	int ret;
+
+	rcu_read_lock();
+	ef = dma_fence_get_rcu_safe(&p->ef);
+	rcu_read_unlock();
+
+	ret = dma_fence_signal(ef);
+	dma_fence_put(ef);
+
+	return ret;
+}
+
 static void evict_process_worker(struct work_struct *work)
 {
 	int ret;
@@ -1921,31 +1941,46 @@ static void evict_process_worker(struct work_struct *work)
 	 * lifetime of this thread, kfd_process p will be valid
 	 */
 	p = container_of(dwork, struct kfd_process, eviction_work);
-	WARN_ONCE(p->last_eviction_seqno != p->ef->seqno,
-		  "Eviction fence mismatch\n");
-
-	/* Narrow window of overlap between restore and evict work
-	 * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos
-	 * unreserves KFD BOs, it is possible to evicted again. But
-	 * restore has few more steps of finish. So lets wait for any
-	 * previous restore work to complete
-	 */
-	flush_delayed_work(&p->restore_work);
 
 	pr_debug("Started evicting pasid 0x%x\n", p->pasid);
 	ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_TTM);
 	if (!ret) {
-		dma_fence_signal(p->ef);
-		dma_fence_put(p->ef);
-		p->ef = NULL;
-		queue_delayed_work(kfd_restore_wq, &p->restore_work,
+		/* If another thread already signaled the eviction fence,
+		 * they are responsible stopping the queues and scheduling
+		 * the restore work.
+		 */
+		if (!signal_eviction_fence(p))
+			queue_delayed_work(kfd_restore_wq, &p->restore_work,
 				msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
+		else
+			kfd_process_restore_queues(p);
 
 		pr_debug("Finished evicting pasid 0x%x\n", p->pasid);
 	} else
 		pr_err("Failed to evict queues of pasid 0x%x\n", p->pasid);
 }
 
+static int restore_process_helper(struct kfd_process *p)
+{
+	int ret = 0;
+
+	/* VMs may not have been acquired yet during debugging. */
+	if (p->kgd_process_info) {
+		ret = amdgpu_amdkfd_gpuvm_restore_process_bos(
+			p->kgd_process_info, &p->ef);
+		if (ret)
+			return ret;
+	}
+
+	ret = kfd_process_restore_queues(p);
+	if (!ret)
+		pr_debug("Finished restoring pasid 0x%x\n", p->pasid);
+	else
+		pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid);
+
+	return ret;
+}
+
 static void restore_process_worker(struct work_struct *work)
 {
 	struct delayed_work *dwork;
@@ -1971,24 +2006,15 @@ static void restore_process_worker(struct work_struct *work)
 	 */
 
 	p->last_restore_timestamp = get_jiffies_64();
-	/* VMs may not have been acquired yet during debugging. */
-	if (p->kgd_process_info)
-		ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p->kgd_process_info,
-							     &p->ef);
+
+	ret = restore_process_helper(p);
 	if (ret) {
 		pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n",
 			 p->pasid, PROCESS_BACK_OFF_TIME_MS);
 		ret = queue_delayed_work(kfd_restore_wq, &p->restore_work,
 				msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));
 		WARN(!ret, "reschedule restore work failed\n");
-		return;
 	}
-
-	ret = kfd_process_restore_queues(p);
-	if (!ret)
-		pr_debug("Finished restoring pasid 0x%x\n", p->pasid);
-	else
-		pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid);
 }
 
 void kfd_suspend_all_processes(void)
@@ -1999,14 +2025,9 @@ void kfd_suspend_all_processes(void)
 
 	WARN(debug_evictions, "Evicting all processes");
 	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
-		cancel_delayed_work_sync(&p->eviction_work);
-		flush_delayed_work(&p->restore_work);
-
 		if (kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_SUSPEND))
 			pr_err("Failed to suspend process 0x%x\n", p->pasid);
-		dma_fence_signal(p->ef);
-		dma_fence_put(p->ef);
-		p->ef = NULL;
+		signal_eviction_fence(p);
 	}
 	srcu_read_unlock(&kfd_processes_srcu, idx);
 }
@@ -2018,7 +2039,7 @@ int kfd_resume_all_processes(void)
 	int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu);
 
 	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
-		if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) {
+		if (restore_process_helper(p)) {
 			pr_err("Restore process %d failed during resume\n",
 			       p->pasid);
 			ret = -EFAULT;
@@ -2059,36 +2080,6 @@ int kfd_reserved_mem_mmap(struct kfd_node *dev, struct kfd_process *process,
 			       KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
 }
 
-void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type)
-{
-	struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
-	uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm);
-	struct kfd_node *dev = pdd->dev;
-	uint32_t xcc_mask = dev->xcc_mask;
-	int xcc = 0;
-
-	/*
-	 * It can be that we race and lose here, but that is extremely unlikely
-	 * and the worst thing which could happen is that we flush the changes
-	 * into the TLB once more which is harmless.
-	 */
-	if (atomic64_xchg(&pdd->tlb_seq, tlb_seq) == tlb_seq)
-		return;
-
-	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
-		/* Nothing to flush until a VMID is assigned, which
-		 * only happens when the first queue is created.
-		 */
-		if (pdd->qpd.vmid)
-			amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->adev,
-							pdd->qpd.vmid);
-	} else {
-		for_each_inst(xcc, xcc_mask)
-			amdgpu_amdkfd_flush_gpu_tlb_pasid(
-				dev->adev, pdd->process->pasid, type, xcc);
-	}
-}
-
 /* assumes caller holds process lock. */
 int kfd_process_drain_interrupts(struct kfd_process_device *pdd)
 {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 77649392e233..4858112f9a53 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -87,12 +87,15 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd)
 		return;
 
 	dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd);
+	if (dev->kfd->shared_resources.enable_mes)
+		amdgpu_mes_flush_shader_debugger(dev->adev, pdd->proc_ctx_gpu_addr);
 	pdd->already_dequeued = true;
 }
 
 int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
 			void *gws)
 {
+	struct mqd_update_info minfo = {0};
 	struct kfd_node *dev = NULL;
 	struct process_queue_node *pqn;
 	struct kfd_process_device *pdd;
@@ -144,9 +147,10 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
 	}
 
 	pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0;
+	minfo.update_flag = gws ? UPDATE_FLAG_IS_GWS : 0;
 
 	return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
-							pqn->q, NULL);
+							pqn->q, &minfo);
 }
 
 void kfd_process_dequeue_from_all_devices(struct kfd_process *p)
@@ -169,16 +173,43 @@ int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)
 	return 0;
 }
 
+static void pqm_clean_queue_resource(struct process_queue_manager *pqm,
+				     struct process_queue_node *pqn)
+{
+	struct kfd_node *dev;
+	struct kfd_process_device *pdd;
+
+	dev = pqn->q->device;
+
+	pdd = kfd_get_process_device_data(dev, pqm->process);
+	if (!pdd) {
+		pr_err("Process device data doesn't exist\n");
+		return;
+	}
+
+	if (pqn->q->gws) {
+		if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
+		    !dev->kfd->shared_resources.enable_mes)
+			amdgpu_amdkfd_remove_gws_from_process(
+				pqm->process->kgd_process_info, pqn->q->gws);
+		pdd->qpd.num_gws = 0;
+	}
+
+	if (dev->kfd->shared_resources.enable_mes) {
+		amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->gang_ctx_bo);
+		if (pqn->q->wptr_bo)
+			amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo);
+	}
+}
+
 void pqm_uninit(struct process_queue_manager *pqm)
 {
 	struct process_queue_node *pqn, *next;
 
 	list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) {
-		if (pqn->q && pqn->q->gws &&
-		    KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
-		    !pqn->q->device->kfd->shared_resources.enable_mes)
-			amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info,
-				pqn->q->gws);
+		if (pqn->q)
+			pqm_clean_queue_resource(pqm, pqn);
+
 		kfd_procfs_del_queue(pqn->q);
 		uninit_queue(pqn->q);
 		list_del(&pqn->process_queue_list);
@@ -461,22 +492,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
 				goto err_destroy_queue;
 		}
 
-		if (pqn->q->gws) {
-			if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
-			    !dev->kfd->shared_resources.enable_mes)
-				amdgpu_amdkfd_remove_gws_from_process(
-						pqm->process->kgd_process_info,
-						pqn->q->gws);
-			pdd->qpd.num_gws = 0;
-		}
-
-		if (dev->kfd->shared_resources.enable_mes) {
-			amdgpu_amdkfd_free_gtt_mem(dev->adev,
-						   pqn->q->gang_ctx_bo);
-			if (pqn->q->wptr_bo)
-				amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo);
-
-		}
+		pqm_clean_queue_resource(pqm, pqn);
 		uninit_queue(pqn->q);
 	}
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 76d9f14ccc7c..c50a0dc9c9c0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -198,6 +198,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
 		pr_debug_ratelimited("dma mapping 0x%llx for page addr 0x%lx\n",
 				     addr[i] >> PAGE_SHIFT, page_to_pfn(page));
 	}
+
 	return 0;
 }
 
@@ -349,6 +350,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
 	INIT_LIST_HEAD(&prange->child_list);
 	atomic_set(&prange->invalid, 0);
 	prange->validate_timestamp = 0;
+	prange->vram_pages = 0;
 	mutex_init(&prange->migrate_mutex);
 	mutex_init(&prange->lock);
 
@@ -395,19 +397,16 @@ static void svm_range_bo_release(struct kref *kref)
 			 prange->start, prange->last);
 		mutex_lock(&prange->lock);
 		prange->svm_bo = NULL;
+		/* prange should not hold vram page now */
+		WARN_ONCE(prange->actual_loc, "prange should not hold vram page");
 		mutex_unlock(&prange->lock);
 
 		spin_lock(&svm_bo->list_lock);
 	}
 	spin_unlock(&svm_bo->list_lock);
-	if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) {
-		/* We're not in the eviction worker.
-		 * Signal the fence and synchronize with any
-		 * pending eviction work.
-		 */
+	if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base))
+		/* We're not in the eviction worker. Signal the fence. */
 		dma_fence_signal(&svm_bo->eviction_fence->base);
-		cancel_work_sync(&svm_bo->eviction_work);
-	}
 	dma_fence_put(&svm_bo->eviction_fence->base);
 	amdgpu_bo_unref(&svm_bo->bo);
 	kfree(svm_bo);
@@ -878,14 +877,29 @@ static void svm_range_debug_dump(struct svm_range_list *svms)
 
 static void *
 svm_range_copy_array(void *psrc, size_t size, uint64_t num_elements,
-		     uint64_t offset)
+		     uint64_t offset, uint64_t *vram_pages)
 {
+	unsigned char *src = (unsigned char *)psrc + offset;
 	unsigned char *dst;
+	uint64_t i;
 
 	dst = kvmalloc_array(num_elements, size, GFP_KERNEL);
 	if (!dst)
 		return NULL;
-	memcpy(dst, (unsigned char *)psrc + offset, num_elements * size);
+
+	if (!vram_pages) {
+		memcpy(dst, src, num_elements * size);
+		return (void *)dst;
+	}
+
+	*vram_pages = 0;
+	for (i = 0; i < num_elements; i++) {
+		dma_addr_t *temp;
+		temp = (dma_addr_t *)dst + i;
+		*temp = *((dma_addr_t *)src + i);
+		if (*temp&SVM_RANGE_VRAM_DOMAIN)
+			(*vram_pages)++;
+	}
 
 	return (void *)dst;
 }
@@ -899,7 +913,7 @@ svm_range_copy_dma_addrs(struct svm_range *dst, struct svm_range *src)
 		if (!src->dma_addr[i])
 			continue;
 		dst->dma_addr[i] = svm_range_copy_array(src->dma_addr[i],
-					sizeof(*src->dma_addr[i]), src->npages, 0);
+					sizeof(*src->dma_addr[i]), src->npages, 0, NULL);
 		if (!dst->dma_addr[i])
 			return -ENOMEM;
 	}
@@ -910,7 +924,7 @@ svm_range_copy_dma_addrs(struct svm_range *dst, struct svm_range *src)
 static int
 svm_range_split_array(void *ppnew, void *ppold, size_t size,
 		      uint64_t old_start, uint64_t old_n,
-		      uint64_t new_start, uint64_t new_n)
+		      uint64_t new_start, uint64_t new_n, uint64_t *new_vram_pages)
 {
 	unsigned char *new, *old, *pold;
 	uint64_t d;
@@ -922,11 +936,12 @@ svm_range_split_array(void *ppnew, void *ppold, size_t size,
 		return 0;
 
 	d = (new_start - old_start) * size;
-	new = svm_range_copy_array(pold, size, new_n, d);
+	/* get dma addr array for new range and calculte its vram page number */
+	new = svm_range_copy_array(pold, size, new_n, d, new_vram_pages);
 	if (!new)
 		return -ENOMEM;
 	d = (new_start == old_start) ? new_n * size : 0;
-	old = svm_range_copy_array(pold, size, old_n, d);
+	old = svm_range_copy_array(pold, size, old_n, d, NULL);
 	if (!old) {
 		kvfree(new);
 		return -ENOMEM;
@@ -948,10 +963,13 @@ svm_range_split_pages(struct svm_range *new, struct svm_range *old,
 	for (i = 0; i < MAX_GPU_INSTANCE; i++) {
 		r = svm_range_split_array(&new->dma_addr[i], &old->dma_addr[i],
 					  sizeof(*old->dma_addr[i]), old->start,
-					  npages, new->start, new->npages);
+					  npages, new->start, new->npages,
+					  old->actual_loc ? &new->vram_pages : NULL);
 		if (r)
 			return r;
 	}
+	if (old->actual_loc)
+		old->vram_pages -= new->vram_pages;
 
 	return 0;
 }
@@ -1097,7 +1115,7 @@ static int
 svm_range_split_tail(struct svm_range *prange, uint64_t new_last,
 		     struct list_head *insert_list, struct list_head *remap_list)
 {
-	struct svm_range *tail;
+	struct svm_range *tail = NULL;
 	int r = svm_range_split(prange, prange->start, new_last, &tail);
 
 	if (!r) {
@@ -1112,7 +1130,7 @@ static int
 svm_range_split_head(struct svm_range *prange, uint64_t new_start,
 		     struct list_head *insert_list, struct list_head *remap_list)
 {
-	struct svm_range *head;
+	struct svm_range *head = NULL;
 	int r = svm_range_split(prange, new_start, prange->last, &head);
 
 	if (!r) {
@@ -1135,66 +1153,6 @@ svm_range_add_child(struct svm_range *prange, struct mm_struct *mm,
 	list_add_tail(&pchild->child_list, &prange->child_list);
 }
 
-/**
- * svm_range_split_by_granularity - collect ranges within granularity boundary
- *
- * @p: the process with svms list
- * @mm: mm structure
- * @addr: the vm fault address in pages, to split the prange
- * @parent: parent range if prange is from child list
- * @prange: prange to split
- *
- * Trims @prange to be a single aligned block of prange->granularity if
- * possible. The head and tail are added to the child_list in @parent.
- *
- * Context: caller must hold mmap_read_lock and prange->lock
- *
- * Return:
- * 0 - OK, otherwise error code
- */
-int
-svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
-			       unsigned long addr, struct svm_range *parent,
-			       struct svm_range *prange)
-{
-	struct svm_range *head, *tail;
-	unsigned long start, last, size;
-	int r;
-
-	/* Align splited range start and size to granularity size, then a single
-	 * PTE will be used for whole range, this reduces the number of PTE
-	 * updated and the L1 TLB space used for translation.
-	 */
-	size = 1UL << prange->granularity;
-	start = ALIGN_DOWN(addr, size);
-	last = ALIGN(addr + 1, size) - 1;
-
-	pr_debug("svms 0x%p split [0x%lx 0x%lx] to [0x%lx 0x%lx] size 0x%lx\n",
-		 prange->svms, prange->start, prange->last, start, last, size);
-
-	if (start > prange->start) {
-		r = svm_range_split(prange, start, prange->last, &head);
-		if (r)
-			return r;
-		svm_range_add_child(parent, mm, head, SVM_OP_ADD_RANGE);
-	}
-
-	if (last < prange->last) {
-		r = svm_range_split(prange, prange->start, last, &tail);
-		if (r)
-			return r;
-		svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE);
-	}
-
-	/* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
-	if (p->xnack_enabled && prange->work_item.op == SVM_OP_ADD_RANGE) {
-		prange->work_item.op = SVM_OP_ADD_RANGE_AND_MAP;
-		pr_debug("change prange 0x%p [0x%lx 0x%lx] op %d\n",
-			 prange, prange->start, prange->last,
-			 SVM_OP_ADD_RANGE_AND_MAP);
-	}
-	return 0;
-}
 static bool
 svm_nodes_in_same_hive(struct kfd_node *node_a, struct kfd_node *node_b)
 {
@@ -1614,6 +1572,7 @@ static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx)
  * 5. Release page table (and SVM BO) reservation
  */
 static int svm_range_validate_and_map(struct mm_struct *mm,
+				      unsigned long map_start, unsigned long map_last,
 				      struct svm_range *prange, int32_t gpuidx,
 				      bool intr, bool wait, bool flush_tlb)
 {
@@ -1653,18 +1612,24 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 			if (test_bit(gpuidx, prange->bitmap_access))
 				bitmap_set(ctx->bitmap, gpuidx, 1);
 		}
+
+		/*
+		 * If prange is already mapped or with always mapped flag,
+		 * update mapping on GPUs with ACCESS attribute
+		 */
+		if (bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) {
+			if (prange->mapped_to_gpu ||
+			    prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)
+				bitmap_copy(ctx->bitmap, prange->bitmap_access, MAX_GPU_INSTANCE);
+		}
 	} else {
 		bitmap_or(ctx->bitmap, prange->bitmap_access,
 			  prange->bitmap_aip, MAX_GPU_INSTANCE);
 	}
 
 	if (bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) {
-		bitmap_copy(ctx->bitmap, prange->bitmap_access, MAX_GPU_INSTANCE);
-		if (!prange->mapped_to_gpu ||
-		    bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) {
-			r = 0;
-			goto free_ctx;
-		}
+		r = 0;
+		goto free_ctx;
 	}
 
 	if (prange->actual_loc && !prange->ttm_res) {
@@ -1688,10 +1653,12 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 		}
 	}
 
-	start = prange->start << PAGE_SHIFT;
-	end = (prange->last + 1) << PAGE_SHIFT;
+	start = map_start << PAGE_SHIFT;
+	end = (map_last + 1) << PAGE_SHIFT;
 	for (addr = start; !r && addr < end; ) {
 		struct hmm_range *hmm_range;
+		unsigned long map_start_vma;
+		unsigned long map_last_vma;
 		struct vm_area_struct *vma;
 		unsigned long next = 0;
 		unsigned long offset;
@@ -1719,7 +1686,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 		}
 
 		if (!r) {
-			offset = (addr - start) >> PAGE_SHIFT;
+			offset = (addr >> PAGE_SHIFT) - prange->start;
 			r = svm_range_dma_map(prange, ctx->bitmap, offset, npages,
 					      hmm_range->hmm_pfns);
 			if (r)
@@ -1737,9 +1704,16 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 			r = -EAGAIN;
 		}
 
-		if (!r)
-			r = svm_range_map_to_gpus(prange, offset, npages, readonly,
-						  ctx->bitmap, wait, flush_tlb);
+		if (!r) {
+			map_start_vma = max(map_start, prange->start + offset);
+			map_last_vma = min(map_last, prange->start + offset + npages - 1);
+			if (map_start_vma <= map_last_vma) {
+				offset = map_start_vma - prange->start;
+				npages = map_last_vma - map_start_vma + 1;
+				r = svm_range_map_to_gpus(prange, offset, npages, readonly,
+							  ctx->bitmap, wait, flush_tlb);
+			}
+		}
 
 		if (!r && next == end)
 			prange->mapped_to_gpu = true;
@@ -1832,8 +1806,8 @@ static void svm_range_restore_work(struct work_struct *work)
 		 */
 		mutex_lock(&prange->migrate_mutex);
 
-		r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
-					       false, true, false);
+		r = svm_range_validate_and_map(mm, prange->start, prange->last, prange,
+					       MAX_GPU_INSTANCE, false, true, false);
 		if (r)
 			pr_debug("failed %d to map 0x%lx to gpus\n", r,
 				 prange->start);
@@ -1870,7 +1844,7 @@ out_reschedule:
 	/* If validation failed, reschedule another attempt */
 	if (evicted_ranges) {
 		pr_debug("reschedule to restore svm range\n");
-		schedule_delayed_work(&svms->restore_work,
+		queue_delayed_work(system_freezable_wq, &svms->restore_work,
 			msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
 
 		kfd_smi_event_queue_restore_rescheduled(mm);
@@ -1946,7 +1920,7 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
 			pr_debug("failed to quiesce KFD\n");
 
 		pr_debug("schedule to restore svm %p ranges\n", svms);
-		schedule_delayed_work(&svms->restore_work,
+		queue_delayed_work(system_freezable_wq, &svms->restore_work,
 			msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
 	} else {
 		unsigned long s, l;
@@ -2001,6 +1975,7 @@ static struct svm_range *svm_range_clone(struct svm_range *old)
 	new->actual_loc = old->actual_loc;
 	new->granularity = old->granularity;
 	new->mapped_to_gpu = old->mapped_to_gpu;
+	new->vram_pages = old->vram_pages;
 	bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
 	bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
 
@@ -2365,8 +2340,10 @@ retry:
 		mutex_unlock(&svms->lock);
 		mmap_write_unlock(mm);
 
-		/* Pairs with mmget in svm_range_add_list_work */
-		mmput(mm);
+		/* Pairs with mmget in svm_range_add_list_work. If dropping the
+		 * last mm refcount, schedule release work to avoid circular locking
+		 */
+		mmput_async(mm);
 
 		spin_lock(&svms->deferred_list_lock);
 	}
@@ -2677,6 +2654,7 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
 {
 	struct vm_area_struct *vma;
 	struct interval_tree_node *node;
+	struct rb_node *rb_node;
 	unsigned long start_limit, end_limit;
 
 	vma = vma_lookup(p->mm, addr << PAGE_SHIFT);
@@ -2696,16 +2674,15 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
 	if (node) {
 		end_limit = min(end_limit, node->start);
 		/* Last range that ends before the fault address */
-		node = container_of(rb_prev(&node->rb),
-				    struct interval_tree_node, rb);
+		rb_node = rb_prev(&node->rb);
 	} else {
 		/* Last range must end before addr because
 		 * there was no range after addr
 		 */
-		node = container_of(rb_last(&p->svms.objects.rb_root),
-				    struct interval_tree_node, rb);
+		rb_node = rb_last(&p->svms.objects.rb_root);
 	}
-	if (node) {
+	if (rb_node) {
+		node = container_of(rb_node, struct interval_tree_node, rb);
 		if (node->last >= addr) {
 			WARN(1, "Overlap with prev node and page fault addr\n");
 			return -EFAULT;
@@ -2908,6 +2885,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 			uint32_t vmid, uint32_t node_id,
 			uint64_t addr, bool write_fault)
 {
+	unsigned long start, last, size;
 	struct mm_struct *mm = NULL;
 	struct svm_range_list *svms;
 	struct svm_range *prange;
@@ -3043,40 +3021,44 @@ retry_write_locked:
 	kfd_smi_event_page_fault_start(node, p->lead_thread->pid, addr,
 				       write_fault, timestamp);
 
-	if (prange->actual_loc != best_loc) {
+	/* Align migration range start and size to granularity size */
+	size = 1UL << prange->granularity;
+	start = max_t(unsigned long, ALIGN_DOWN(addr, size), prange->start);
+	last = min_t(unsigned long, ALIGN(addr + 1, size) - 1, prange->last);
+	if (prange->actual_loc != 0 || best_loc != 0) {
 		migration = true;
+
 		if (best_loc) {
-			r = svm_migrate_to_vram(prange, best_loc, mm,
-					KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU);
+			r = svm_migrate_to_vram(prange, best_loc, start, last,
+					mm, KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU);
 			if (r) {
 				pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n",
 					 r, addr);
 				/* Fallback to system memory if migration to
 				 * VRAM failed
 				 */
-				if (prange->actual_loc)
-					r = svm_migrate_vram_to_ram(prange, mm,
-					   KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU,
-					   NULL);
+				if (prange->actual_loc && prange->actual_loc != best_loc)
+					r = svm_migrate_vram_to_ram(prange, mm, start, last,
+						KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, NULL);
 				else
 					r = 0;
 			}
 		} else {
-			r = svm_migrate_vram_to_ram(prange, mm,
-					KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU,
-					NULL);
+			r = svm_migrate_vram_to_ram(prange, mm, start, last,
+					KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, NULL);
 		}
 		if (r) {
 			pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n",
-				 r, svms, prange->start, prange->last);
+				 r, svms, start, last);
 			goto out_unlock_range;
 		}
 	}
 
-	r = svm_range_validate_and_map(mm, prange, gpuidx, false, false, false);
+	r = svm_range_validate_and_map(mm, start, last, prange, gpuidx, false,
+				       false, false);
 	if (r)
 		pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
-			 r, svms, prange->start, prange->last);
+			 r, svms, start, last);
 
 	kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr,
 				     migration);
@@ -3422,18 +3404,24 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange,
 	*migrated = false;
 	best_loc = svm_range_best_prefetch_location(prange);
 
-	if (best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED ||
-	    best_loc == prange->actual_loc)
+	/* when best_loc is a gpu node and same as prange->actual_loc
+	 * we still need do migration as prange->actual_loc !=0 does
+	 * not mean all pages in prange are vram. hmm migrate will pick
+	 * up right pages during migration.
+	 */
+	if ((best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED) ||
+	    (best_loc == 0 && prange->actual_loc == 0))
 		return 0;
 
 	if (!best_loc) {
-		r = svm_migrate_vram_to_ram(prange, mm,
+		r = svm_migrate_vram_to_ram(prange, mm, prange->start, prange->last,
 					KFD_MIGRATE_TRIGGER_PREFETCH, NULL);
 		*migrated = !r;
 		return r;
 	}
 
-	r = svm_migrate_to_vram(prange, best_loc, mm, KFD_MIGRATE_TRIGGER_PREFETCH);
+	r = svm_migrate_to_vram(prange, best_loc, prange->start, prange->last,
+				mm, KFD_MIGRATE_TRIGGER_PREFETCH);
 	*migrated = !r;
 
 	return r;
@@ -3441,13 +3429,14 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange,
 
 int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence)
 {
-	if (!fence)
-		return -EINVAL;
-
-	if (dma_fence_is_signaled(&fence->base))
-		return 0;
-
-	if (fence->svm_bo) {
+	/* Dereferencing fence->svm_bo is safe here because the fence hasn't
+	 * signaled yet and we're under the protection of the fence->lock.
+	 * After the fence is signaled in svm_range_bo_release, we cannot get
+	 * here any more.
+	 *
+	 * Reference is dropped in svm_range_evict_svm_bo_worker.
+	 */
+	if (svm_bo_ref_unless_zero(fence->svm_bo)) {
 		WRITE_ONCE(fence->svm_bo->evicting, 1);
 		schedule_work(&fence->svm_bo->eviction_work);
 	}
@@ -3462,8 +3451,6 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
 	int r = 0;
 
 	svm_bo = container_of(work, struct svm_range_bo, eviction_work);
-	if (!svm_bo_ref_unless_zero(svm_bo))
-		return; /* svm_bo was freed while eviction was pending */
 
 	if (mmget_not_zero(svm_bo->eviction_fence->mm)) {
 		mm = svm_bo->eviction_fence->mm;
@@ -3488,7 +3475,11 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
 
 		mutex_lock(&prange->migrate_mutex);
 		do {
+			/* migrate all vram pages in this prange to sys ram
+			 * after that prange->actual_loc should be zero
+			 */
 			r = svm_migrate_vram_to_ram(prange, mm,
+					prange->start, prange->last,
 					KFD_MIGRATE_TRIGGER_TTM_EVICTION, NULL);
 		} while (!r && prange->actual_loc && --retries);
 
@@ -3612,8 +3603,8 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
 
 		flush_tlb = !migrated && update_mapping && prange->mapped_to_gpu;
 
-		r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
-					       true, true, flush_tlb);
+		r = svm_range_validate_and_map(mm, prange->start, prange->last, prange,
+					       MAX_GPU_INSTANCE, true, true, flush_tlb);
 		if (r)
 			pr_debug("failed %d to map svm range\n", r);
 
@@ -3627,8 +3618,8 @@ out_unlock_range:
 		pr_debug("Remapping prange 0x%p [0x%lx 0x%lx]\n",
 			 prange, prange->start, prange->last);
 		mutex_lock(&prange->migrate_mutex);
-		r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
-					       true, true, prange->mapped_to_gpu);
+		r = svm_range_validate_and_map(mm,  prange->start, prange->last, prange,
+					       MAX_GPU_INSTANCE, true, true, prange->mapped_to_gpu);
 		if (r)
 			pr_debug("failed %d on remap svm range\n", r);
 		mutex_unlock(&prange->migrate_mutex);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index c528df1d0ba2..026863a0abcd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -78,6 +78,7 @@ struct svm_work_list_item {
  * @update_list:link list node used to add to update_list
  * @mapping:    bo_va mapping structure to create and update GPU page table
  * @npages:     number of pages
+ * @vram_pages: vram pages number in this svm_range
  * @dma_addr:   dma mapping address on each GPU for system memory physical page
  * @ttm_res:    vram ttm resource map
  * @offset:     range start offset within mm_nodes
@@ -88,7 +89,9 @@ struct svm_work_list_item {
  * @flags:      flags defined as KFD_IOCTL_SVM_FLAG_*
  * @perferred_loc: perferred location, 0 for CPU, or GPU id
  * @perfetch_loc: last prefetch location, 0 for CPU, or GPU id
- * @actual_loc: the actual location, 0 for CPU, or GPU id
+ * @actual_loc: this svm_range location. 0: all pages are from sys ram;
+ *              GPU id: this svm_range may include vram pages from GPU with
+ *              id actual_loc.
  * @granularity:migration granularity, log2 num pages
  * @invalid:    not 0 means cpu page table is invalidated
  * @validate_timestamp: system timestamp when range is validated
@@ -112,6 +115,7 @@ struct svm_range {
 	struct list_head		list;
 	struct list_head		update_list;
 	uint64_t			npages;
+	uint64_t			vram_pages;
 	dma_addr_t			*dma_addr[MAX_GPU_INSTANCE];
 	struct ttm_resource		*ttm_res;
 	uint64_t			offset;
@@ -168,9 +172,6 @@ struct kfd_node *svm_range_get_node_by_id(struct svm_range *prange,
 int svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
 			    bool clear);
 void svm_range_vram_node_free(struct svm_range *prange);
-int svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
-			       unsigned long addr, struct svm_range *parent,
-			       struct svm_range *prange);
 int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 			    uint32_t vmid, uint32_t node_id, uint64_t addr,
 			    bool write_fault);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 057284bf50bb..6ed2ec381aaa 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1342,10 +1342,11 @@ static int kfd_create_indirect_link_prop(struct kfd_topology_device *kdev, int g
 		num_cpu++;
 	}
 
+	if (list_empty(&kdev->io_link_props))
+		return -ENODATA;
+
 	gpu_link = list_first_entry(&kdev->io_link_props,
-					struct kfd_iolink_properties, list);
-	if (!gpu_link)
-		return -ENOMEM;
+				    struct kfd_iolink_properties, list);
 
 	for (i = 0; i < num_cpu; i++) {
 		/* CPU <--> GPU */
@@ -1423,15 +1424,17 @@ static int kfd_add_peer_prop(struct kfd_topology_device *kdev,
 				peer->gpu->adev))
 		return ret;
 
+	if (list_empty(&kdev->io_link_props))
+		return -ENODATA;
+
 	iolink1 = list_first_entry(&kdev->io_link_props,
-							struct kfd_iolink_properties, list);
-	if (!iolink1)
-		return -ENOMEM;
+				   struct kfd_iolink_properties, list);
+
+	if (list_empty(&peer->io_link_props))
+		return -ENODATA;
 
 	iolink2 = list_first_entry(&peer->io_link_props,
-							struct kfd_iolink_properties, list);
-	if (!iolink2)
-		return -ENOMEM;
+				   struct kfd_iolink_properties, list);
 
 	props = kfd_alloc_struct(props);
 	if (!props)
@@ -1449,17 +1452,19 @@ static int kfd_add_peer_prop(struct kfd_topology_device *kdev,
 		/* CPU->CPU  link*/
 		cpu_dev = kfd_topology_device_by_proximity_domain(iolink1->node_to);
 		if (cpu_dev) {
-			list_for_each_entry(iolink3, &cpu_dev->io_link_props, list)
-				if (iolink3->node_to == iolink2->node_to)
-					break;
-
-			props->weight += iolink3->weight;
-			props->min_latency += iolink3->min_latency;
-			props->max_latency += iolink3->max_latency;
-			props->min_bandwidth = min(props->min_bandwidth,
-							iolink3->min_bandwidth);
-			props->max_bandwidth = min(props->max_bandwidth,
-							iolink3->max_bandwidth);
+			list_for_each_entry(iolink3, &cpu_dev->io_link_props, list) {
+				if (iolink3->node_to != iolink2->node_to)
+					continue;
+
+				props->weight += iolink3->weight;
+				props->min_latency += iolink3->min_latency;
+				props->max_latency += iolink3->max_latency;
+				props->min_bandwidth = min(props->min_bandwidth,
+							   iolink3->min_bandwidth);
+				props->max_bandwidth = min(props->max_bandwidth,
+							   iolink3->max_bandwidth);
+				break;
+			}
 		} else {
 			WARN(1, "CPU node not found");
 		}
@@ -1633,12 +1638,10 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
 		else
 			mode = UNKNOWN_MEMORY_PARTITION_MODE;
 
-		if (pcache->cache_level == 2)
-			pcache->cache_size = pcache_info[cache_type].cache_size * num_xcc;
-		else if (mode)
-			pcache->cache_size = pcache_info[cache_type].cache_size / mode;
-		else
-			pcache->cache_size = pcache_info[cache_type].cache_size;
+		pcache->cache_size = pcache_info[cache_type].cache_size;
+		/* Partition mode only affects L3 cache size */
+		if (mode && pcache->cache_level == 3)
+			pcache->cache_size /= mode;
 
 		if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE)
 			pcache->cache_type |= HSA_CACHE_TYPE_DATA;
diff --git a/drivers/gpu/drm/amd/display/Makefile b/drivers/gpu/drm/amd/display/Makefile
index af17ab8027df..92a5c5efcf92 100644
--- a/drivers/gpu/drm/amd/display/Makefile
+++ b/drivers/gpu/drm/amd/display/Makefile
@@ -30,6 +30,9 @@ subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/inc/
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/inc/hw
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/clk_mgr
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/hwss
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/resource
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dsc
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/optc
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/inc
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/freesync
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/color
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
index 8bf94920d23e..ab2a97e354da 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
@@ -25,22 +25,25 @@
 
 
 
+ifneq ($(CONFIG_DRM_AMD_DC),)
 AMDGPUDM = \
 	amdgpu_dm.o \
 	amdgpu_dm_plane.o \
 	amdgpu_dm_crtc.o \
 	amdgpu_dm_irq.o \
 	amdgpu_dm_mst_types.o \
-	amdgpu_dm_color.o
+	amdgpu_dm_color.o \
+	amdgpu_dm_services.o \
+	amdgpu_dm_helpers.o \
+	amdgpu_dm_pp_smu.o \
+	amdgpu_dm_psr.o \
+	amdgpu_dm_replay.o \
+	amdgpu_dm_wb.o
 
 ifdef CONFIG_DRM_AMD_DC_FP
 AMDGPUDM += dc_fpu.o
 endif
 
-ifneq ($(CONFIG_DRM_AMD_DC),)
-AMDGPUDM += amdgpu_dm_services.o amdgpu_dm_helpers.o amdgpu_dm_pp_smu.o amdgpu_dm_psr.o amdgpu_dm_replay.o
-endif
-
 AMDGPUDM += amdgpu_dm_hdcp.o
 
 ifneq ($(CONFIG_DEBUG_FS),)
@@ -52,3 +55,4 @@ subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc
 AMDGPU_DM = $(addprefix $(AMDDALPATH)/amdgpu_dm/,$(AMDGPUDM))
 
 AMD_DISPLAY_FILES += $(AMDGPU_DM)
+endif
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index b8c3a9b104a4..5853cf022917 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -37,6 +37,7 @@
 #include "dc/dc_dmub_srv.h"
 #include "dc/dc_edid_parser.h"
 #include "dc/dc_stat.h"
+#include "dc/dc_state.h"
 #include "amdgpu_dm_trace.h"
 #include "dpcd_defs.h"
 #include "link/protocols/link_dpcd.h"
@@ -54,6 +55,7 @@
 #include "amdgpu_dm_crtc.h"
 #include "amdgpu_dm_hdcp.h"
 #include <drm/display/drm_hdcp_helper.h>
+#include "amdgpu_dm_wb.h"
 #include "amdgpu_pm.h"
 #include "amdgpu_atombios.h"
 
@@ -65,7 +67,6 @@
 #include "amdgpu_dm_debugfs.h"
 #endif
 #include "amdgpu_dm_psr.h"
-#include "amdgpu_dm_replay.h"
 
 #include "ivsrcid/ivsrcid_vislands30.h"
 
@@ -271,6 +272,7 @@ static int dm_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
 {
 	u32 v_blank_start, v_blank_end, h_position, v_position;
 	struct amdgpu_crtc *acrtc = NULL;
+	struct dc *dc = adev->dm.dc;
 
 	if ((crtc < 0) || (crtc >= adev->mode_info.num_crtc))
 		return -EINVAL;
@@ -283,6 +285,9 @@ static int dm_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
 		return 0;
 	}
 
+	if (dc && dc->caps.ips_support && dc->idle_optimizations_allowed)
+		dc_allow_idle_optimizations(dc, false);
+
 	/*
 	 * TODO rework base driver to use values directly.
 	 * for now parse it back into reg-format
@@ -576,6 +581,7 @@ static void dm_crtc_high_irq(void *interrupt_params)
 {
 	struct common_irq_params *irq_params = interrupt_params;
 	struct amdgpu_device *adev = irq_params->adev;
+	struct drm_writeback_job *job;
 	struct amdgpu_crtc *acrtc;
 	unsigned long flags;
 	int vrr_active;
@@ -584,6 +590,33 @@ static void dm_crtc_high_irq(void *interrupt_params)
 	if (!acrtc)
 		return;
 
+	if (acrtc->wb_pending) {
+		if (acrtc->wb_conn) {
+			spin_lock_irqsave(&acrtc->wb_conn->job_lock, flags);
+			job = list_first_entry_or_null(&acrtc->wb_conn->job_queue,
+						       struct drm_writeback_job,
+						       list_entry);
+			spin_unlock_irqrestore(&acrtc->wb_conn->job_lock, flags);
+
+			if (job) {
+				unsigned int v_total, refresh_hz;
+				struct dc_stream_state *stream = acrtc->dm_irq_params.stream;
+
+				v_total = stream->adjust.v_total_max ?
+					  stream->adjust.v_total_max : stream->timing.v_total;
+				refresh_hz = div_u64((uint64_t) stream->timing.pix_clk_100hz *
+					     100LL, (v_total * stream->timing.h_total));
+				mdelay(1000 / refresh_hz);
+
+				drm_writeback_signal_completion(acrtc->wb_conn, 0);
+				dc_stream_fc_disable_writeback(adev->dm.dc,
+							       acrtc->dm_irq_params.stream, 0);
+			}
+		} else
+			DRM_ERROR("%s: no amdgpu_crtc wb_conn\n", __func__);
+		acrtc->wb_pending = false;
+	}
+
 	vrr_active = amdgpu_dm_crtc_vrr_active_irq(acrtc);
 
 	drm_dbg_vbl(adev_to_drm(adev),
@@ -726,6 +759,10 @@ static void dmub_hpd_callback(struct amdgpu_device *adev,
 
 	drm_connector_list_iter_begin(dev, &iter);
 	drm_for_each_connector_iter(connector, &iter) {
+
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
 		aconnector = to_amdgpu_dm_connector(connector);
 		if (link && aconnector->dc_link == link) {
 			if (notify->type == DMUB_NOTIFICATION_HPD)
@@ -895,8 +932,7 @@ static int dm_early_init(void *handle);
 /* Allocate memory for FBC compressed data  */
 static void amdgpu_dm_fbc_init(struct drm_connector *connector)
 {
-	struct drm_device *dev = connector->dev;
-	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_device *adev = drm_to_adev(connector->dev);
 	struct dm_compressor_info *compressor = &adev->dm.compressor;
 	struct amdgpu_dm_connector *aconn = to_amdgpu_dm_connector(connector);
 	struct drm_display_mode *mode;
@@ -950,6 +986,10 @@ static int amdgpu_dm_audio_component_get_eld(struct device *kdev, int port,
 
 	drm_connector_list_iter_begin(dev, &conn_iter);
 	drm_for_each_connector_iter(connector, &conn_iter) {
+
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
 		aconnector = to_amdgpu_dm_connector(connector);
 		if (aconnector->audio_inst != port)
 			continue;
@@ -990,8 +1030,7 @@ static int amdgpu_dm_audio_component_bind(struct device *kdev,
 static void amdgpu_dm_audio_component_unbind(struct device *kdev,
 					  struct device *hda_kdev, void *data)
 {
-	struct drm_device *dev = dev_get_drvdata(kdev);
-	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_device *adev = drm_to_adev(dev_get_drvdata(kdev));
 	struct drm_audio_component *acomp = data;
 
 	acomp->ops = NULL;
@@ -1259,7 +1298,9 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_
 	/* AGP aperture is disabled */
 	if (agp_bot > agp_top) {
 		logical_addr_low = adev->gmc.fb_start >> 18;
-		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+		if (adev->apu_flags & (AMD_APU_IS_RAVEN2 |
+				       AMD_APU_IS_RENOIR |
+				       AMD_APU_IS_GREEN_SARDINE))
 			/*
 			 * Raven2 has a HW issue that it is unable to use the vram which
 			 * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the
@@ -1271,7 +1312,9 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_
 			logical_addr_high = adev->gmc.fb_end >> 18;
 	} else {
 		logical_addr_low = min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18;
-		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+		if (adev->apu_flags & (AMD_APU_IS_RAVEN2 |
+				       AMD_APU_IS_RENOIR |
+				       AMD_APU_IS_GREEN_SARDINE))
 			/*
 			 * Raven2 has a HW issue that it is unable to use the vram which
 			 * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the
@@ -1676,6 +1719,15 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 	init_data.nbio_reg_offsets = adev->reg_offset[NBIO_HWIP][0];
 	init_data.clk_reg_offsets = adev->reg_offset[CLK_HWIP][0];
 
+	if (amdgpu_dc_debug_mask & DC_DISABLE_IPS)
+		init_data.flags.disable_ips = DMUB_IPS_DISABLE_ALL;
+
+	init_data.flags.disable_ips_in_vpb = 1;
+
+	/* Enable DWB for tested platforms only */
+	if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0))
+		init_data.num_virtual_links = 1;
+
 	INIT_LIST_HEAD(&adev->dm.da_list);
 
 	retrieve_dmi_info(&adev->dm);
@@ -1718,23 +1770,6 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 	/* TODO: Remove after DP2 receiver gets proper support of Cable ID feature */
 	adev->dm.dc->debug.ignore_cable_id = true;
 
-	/* TODO: There is a new drm mst change where the freedom of
-	 * vc_next_start_slot update is revoked/moved into drm, instead of in
-	 * driver. This forces us to make sure to get vc_next_start_slot updated
-	 * in drm function each time without considering if mst_state is active
-	 * or not. Otherwise, next time hotplug will give wrong start_slot
-	 * number. We are implementing a temporary solution to even notify drm
-	 * mst deallocation when link is no longer of MST type when uncommitting
-	 * the stream so we will have more time to work on a proper solution.
-	 * Ideally when dm_helpers_dp_mst_stop_top_mgr message is triggered, we
-	 * should notify drm to do a complete "reset" of its states and stop
-	 * calling further drm mst functions when link is no longer of an MST
-	 * type. This could happen when we unplug an MST hubs/displays. When
-	 * uncommit stream comes later after unplug, we should just reset
-	 * hardware states only.
-	 */
-	adev->dm.dc->debug.temp_mst_deallocation_sequence = true;
-
 	if (adev->dm.dc->caps.dp_hdmi21_pcon_support)
 		DRM_INFO("DP-HDMI FRL PCON supported\n");
 
@@ -1808,21 +1843,12 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 			DRM_ERROR("amdgpu: fail to register dmub aux callback");
 			goto error;
 		}
-		if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD, dmub_hpd_callback, true)) {
-			DRM_ERROR("amdgpu: fail to register dmub hpd callback");
-			goto error;
-		}
-		if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_IRQ, dmub_hpd_callback, true)) {
-			DRM_ERROR("amdgpu: fail to register dmub hpd callback");
-			goto error;
-		}
-	}
-
-	/* Enable outbox notification only after IRQ handlers are registered and DMUB is alive.
-	 * It is expected that DMUB will resend any pending notifications at this point, for
-	 * example HPD from DPIA.
-	 */
-	if (dc_is_dmub_outbox_supported(adev->dm.dc)) {
+		/* Enable outbox notification only after IRQ handlers are registered and DMUB is alive.
+		 * It is expected that DMUB will resend any pending notifications at this point. Note
+		 * that hpd and hpd_irq handler registration are deferred to register_hpd_handlers() to
+		 * align legacy interface initialization sequence. Connection status will be proactivly
+		 * detected once in the amdgpu_dm_initialize_drm_device.
+		 */
 		dc_enable_dmub_outbox(adev->dm.dc);
 
 		/* DPIA trace goes to dmesg logs only if outbox is enabled */
@@ -1921,7 +1947,7 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
 				      &adev->dm.dmub_bo_gpu_addr,
 				      &adev->dm.dmub_bo_cpu_addr);
 
-	if (adev->dm.hpd_rx_offload_wq) {
+	if (adev->dm.hpd_rx_offload_wq && adev->dm.dc) {
 		for (i = 0; i < adev->dm.dc->caps.max_links; i++) {
 			if (adev->dm.hpd_rx_offload_wq[i].wq) {
 				destroy_workqueue(adev->dm.hpd_rx_offload_wq[i].wq);
@@ -2252,6 +2278,7 @@ static int dm_sw_fini(void *handle)
 
 	if (adev->dm.dmub_srv) {
 		dmub_srv_destroy(adev->dm.dmub_srv);
+		kfree(adev->dm.dmub_srv);
 		adev->dm.dmub_srv = NULL;
 	}
 
@@ -2270,6 +2297,10 @@ static int detect_mst_link_for_all_connectors(struct drm_device *dev)
 
 	drm_connector_list_iter_begin(dev, &iter);
 	drm_for_each_connector_iter(connector, &iter) {
+
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
 		aconnector = to_amdgpu_dm_connector(connector);
 		if (aconnector->dc_link->type == dc_connection_mst_branch &&
 		    aconnector->mst_mgr.aux) {
@@ -2398,6 +2429,10 @@ static void s3_handle_mst(struct drm_device *dev, bool suspend)
 
 	drm_connector_list_iter_begin(dev, &iter);
 	drm_for_each_connector_iter(connector, &iter) {
+
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
 		aconnector = to_amdgpu_dm_connector(connector);
 		if (aconnector->dc_link->type != dc_connection_mst_branch ||
 		    aconnector->mst_root)
@@ -2577,12 +2612,10 @@ static enum dc_status amdgpu_dm_commit_zero_streams(struct dc *dc)
 
 	memset(del_streams, 0, sizeof(del_streams));
 
-	context = dc_create_state(dc);
+	context = dc_state_create_current_copy(dc);
 	if (context == NULL)
 		goto context_alloc_fail;
 
-	dc_resource_state_copy_construct_current(dc, context);
-
 	/* First remove from context all streams */
 	for (i = 0; i < context->stream_count; i++) {
 		struct dc_stream_state *stream = context->streams[i];
@@ -2592,12 +2625,12 @@ static enum dc_status amdgpu_dm_commit_zero_streams(struct dc *dc)
 
 	/* Remove all planes for removed streams and then remove the streams */
 	for (i = 0; i < del_streams_count; i++) {
-		if (!dc_rem_all_planes_for_stream(dc, del_streams[i], context)) {
+		if (!dc_state_rem_all_planes_for_stream(dc, del_streams[i], context)) {
 			res = DC_FAIL_DETACH_SURFACES;
 			goto fail;
 		}
 
-		res = dc_remove_stream_from_ctx(dc, context, del_streams[i]);
+		res = dc_state_remove_stream(dc, context, del_streams[i]);
 		if (res != DC_OK)
 			goto fail;
 	}
@@ -2605,7 +2638,7 @@ static enum dc_status amdgpu_dm_commit_zero_streams(struct dc *dc)
 	res = dc_commit_streams(dc, context->streams, context->stream_count);
 
 fail:
-	dc_release_state(context);
+	dc_state_release(context);
 
 context_alloc_fail:
 	return res;
@@ -2632,7 +2665,7 @@ static int dm_suspend(void *handle)
 
 		dc_allow_idle_optimizations(adev->dm.dc, false);
 
-		dm->cached_dc_state = dc_copy_state(dm->dc->current_state);
+		dm->cached_dc_state = dc_state_create_copy(dm->dc->current_state);
 
 		dm_gpureset_toggle_interrupts(adev, dm->cached_dc_state, false);
 
@@ -2657,11 +2690,12 @@ static int dm_suspend(void *handle)
 	hpd_rx_irq_work_suspend(dm);
 
 	dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D3);
+	dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D3);
 
 	return 0;
 }
 
-struct amdgpu_dm_connector *
+struct drm_connector *
 amdgpu_dm_find_first_crtc_matching_connector(struct drm_atomic_state *state,
 					     struct drm_crtc *crtc)
 {
@@ -2674,7 +2708,7 @@ amdgpu_dm_find_first_crtc_matching_connector(struct drm_atomic_state *state,
 		crtc_from_state = new_con_state->crtc;
 
 		if (crtc_from_state == crtc)
-			return to_amdgpu_dm_connector(connector);
+			return connector;
 	}
 
 	return NULL;
@@ -2825,7 +2859,7 @@ static int dm_resume(void *handle)
 	bool need_hotplug = false;
 
 	if (dm->dc->caps.ips_support) {
-		dc_dmub_srv_exit_low_power_state(dm->dc);
+		dc_dmub_srv_apply_idle_power_optimizations(dm->dc, false);
 	}
 
 	if (amdgpu_in_reset(adev)) {
@@ -2852,6 +2886,7 @@ static int dm_resume(void *handle)
 		if (r)
 			DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r);
 
+		dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D0);
 		dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0);
 
 		dc_resume(dm->dc);
@@ -2877,7 +2912,7 @@ static int dm_resume(void *handle)
 
 		dm_gpureset_toggle_interrupts(adev, dm->cached_dc_state, true);
 
-		dc_release_state(dm->cached_dc_state);
+		dc_state_release(dm->cached_dc_state);
 		dm->cached_dc_state = NULL;
 
 		amdgpu_dm_irq_resume_late(adev);
@@ -2887,10 +2922,9 @@ static int dm_resume(void *handle)
 		return 0;
 	}
 	/* Recreate dc_state - DC invalidates it when setting power state to S3. */
-	dc_release_state(dm_state->context);
-	dm_state->context = dc_create_state(dm->dc);
+	dc_state_release(dm_state->context);
+	dm_state->context = dc_state_create(dm->dc);
 	/* TODO: Remove dc_state->dccg, use dc->dccg directly. */
-	dc_resource_state_construct(dm->dc, dm_state->context);
 
 	/* Before powering on DC we need to re-initialize DMUB. */
 	dm_dmub_hw_resume(adev);
@@ -2902,6 +2936,7 @@ static int dm_resume(void *handle)
 	}
 
 	/* power on hardware */
+	dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D0);
 	dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0);
 
 	/* program HPD filter */
@@ -2919,6 +2954,10 @@ static int dm_resume(void *handle)
 	/* Do detection*/
 	drm_connector_list_iter_begin(ddev, &iter);
 	drm_for_each_connector_iter(connector, &iter) {
+
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
 		aconnector = to_amdgpu_dm_connector(connector);
 
 		if (!aconnector->dc_link)
@@ -3489,9 +3528,20 @@ static void register_hpd_handlers(struct amdgpu_device *adev)
 	int_params.requested_polarity = INTERRUPT_POLARITY_DEFAULT;
 	int_params.current_polarity = INTERRUPT_POLARITY_DEFAULT;
 
+	if (dc_is_dmub_outbox_supported(adev->dm.dc)) {
+		if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD, dmub_hpd_callback, true))
+			DRM_ERROR("amdgpu: fail to register dmub hpd callback");
+
+		if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_IRQ, dmub_hpd_callback, true))
+			DRM_ERROR("amdgpu: fail to register dmub hpd callback");
+	}
+
 	list_for_each_entry(connector,
 			&dev->mode_config.connector_list, head)	{
 
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
 		aconnector = to_amdgpu_dm_connector(connector);
 		dc_link = aconnector->dc_link;
 
@@ -3514,10 +3564,6 @@ static void register_hpd_handlers(struct amdgpu_device *adev)
 					handle_hpd_rx_irq,
 					(void *) aconnector);
 		}
-
-		if (adev->dm.hpd_rx_offload_wq)
-			adev->dm.hpd_rx_offload_wq[connector->index].aconnector =
-				aconnector;
 	}
 }
 
@@ -3958,7 +4004,7 @@ dm_atomic_duplicate_state(struct drm_private_obj *obj)
 	old_state = to_dm_atomic_state(obj->state);
 
 	if (old_state && old_state->context)
-		new_state->context = dc_copy_state(old_state->context);
+		new_state->context = dc_state_create_copy(old_state->context);
 
 	if (!new_state->context) {
 		kfree(new_state);
@@ -3974,7 +4020,7 @@ static void dm_atomic_destroy_state(struct drm_private_obj *obj,
 	struct dm_atomic_state *dm_state = to_dm_atomic_state(state);
 
 	if (dm_state && dm_state->context)
-		dc_release_state(dm_state->context);
+		dc_state_release(dm_state->context);
 
 	kfree(dm_state);
 }
@@ -4010,14 +4056,12 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev)
 	if (!state)
 		return -ENOMEM;
 
-	state->context = dc_create_state(adev->dm.dc);
+	state->context = dc_state_create_current_copy(adev->dm.dc);
 	if (!state->context) {
 		kfree(state);
 		return -ENOMEM;
 	}
 
-	dc_resource_state_copy_construct_current(adev->dm.dc, state->context);
-
 	drm_atomic_private_obj_init(adev_to_drm(adev),
 				    &adev->dm.atomic_obj,
 				    &state->base,
@@ -4025,14 +4069,19 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev)
 
 	r = amdgpu_display_modeset_create_props(adev);
 	if (r) {
-		dc_release_state(state->context);
+		dc_state_release(state->context);
 		kfree(state);
 		return r;
 	}
 
+#ifdef AMD_PRIVATE_COLOR
+	if (amdgpu_dm_create_color_properties(adev))
+		return -ENOMEM;
+#endif
+
 	r = amdgpu_dm_audio_init(adev);
 	if (r) {
-		dc_release_state(state->context);
+		dc_state_release(state->context);
 		kfree(state);
 		return r;
 	}
@@ -4346,7 +4395,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 	enum dc_connection_type new_connection_type = dc_connection_none;
 	const struct dc_plane_cap *plane;
 	bool psr_feature_enabled = false;
-	bool replay_feature_enabled = false;
 	int max_overlay = dm->dc->caps.max_slave_planes;
 
 	dm->display_indexes_num = dm->dc->caps.max_streams;
@@ -4458,20 +4506,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 		}
 	}
 
-	if (!(amdgpu_dc_debug_mask & DC_DISABLE_REPLAY)) {
-		switch (adev->ip_versions[DCE_HWIP][0]) {
-		case IP_VERSION(3, 1, 4):
-		case IP_VERSION(3, 1, 5):
-		case IP_VERSION(3, 1, 6):
-		case IP_VERSION(3, 2, 0):
-		case IP_VERSION(3, 2, 1):
-			replay_feature_enabled = true;
-			break;
-		default:
-			replay_feature_enabled = amdgpu_dc_feature_mask & DC_REPLAY_MASK;
-			break;
-		}
-	}
 	/* loops over all connectors on the board */
 	for (i = 0; i < link_cnt; i++) {
 		struct dc_link *link = NULL;
@@ -4483,6 +4517,28 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 			continue;
 		}
 
+		link = dc_get_link_at_index(dm->dc, i);
+
+		if (link->connector_signal == SIGNAL_TYPE_VIRTUAL) {
+			struct amdgpu_dm_wb_connector *wbcon = kzalloc(sizeof(*wbcon), GFP_KERNEL);
+
+			if (!wbcon) {
+				DRM_ERROR("KMS: Failed to allocate writeback connector\n");
+				continue;
+			}
+
+			if (amdgpu_dm_wb_connector_init(dm, wbcon, i)) {
+				DRM_ERROR("KMS: Failed to initialize writeback connector\n");
+				kfree(wbcon);
+				continue;
+			}
+
+			link->psr_settings.psr_feature_enabled = false;
+			link->psr_settings.psr_version = DC_PSR_VERSION_UNSUPPORTED;
+
+			continue;
+		}
+
 		aconnector = kzalloc(sizeof(*aconnector), GFP_KERNEL);
 		if (!aconnector)
 			goto fail;
@@ -4501,7 +4557,9 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 			goto fail;
 		}
 
-		link = dc_get_link_at_index(dm->dc, i);
+		if (dm->hpd_rx_offload_wq)
+			dm->hpd_rx_offload_wq[aconnector->base.index].aconnector =
+				aconnector;
 
 		if (!dc_link_detect_connection_type(link, &new_connection_type))
 			DRM_ERROR("KMS: Failed to detect connector\n");
@@ -4520,12 +4578,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 				amdgpu_dm_update_connector_after_detect(aconnector);
 				setup_backlight_device(dm, aconnector);
 
-				/*
-				 * Disable psr if replay can be enabled
-				 */
-				if (replay_feature_enabled && amdgpu_dm_setup_replay(link, aconnector))
-					psr_feature_enabled = false;
-
 				if (psr_feature_enabled)
 					amdgpu_dm_set_psr_caps(link);
 
@@ -5107,7 +5159,9 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev,
 	 * Always set input transfer function, since plane state is refreshed
 	 * every time.
 	 */
-	ret = amdgpu_dm_update_plane_color_mgmt(dm_crtc_state, dc_plane_state);
+	ret = amdgpu_dm_update_plane_color_mgmt(dm_crtc_state,
+						plane_state,
+						dc_plane_state);
 	if (ret)
 		return ret;
 
@@ -5165,6 +5219,7 @@ static void fill_dc_dirty_rects(struct drm_plane *plane,
 				struct drm_plane_state *new_plane_state,
 				struct drm_crtc_state *crtc_state,
 				struct dc_flip_addrs *flip_addrs,
+				bool is_psr_su,
 				bool *dirty_regions_changed)
 {
 	struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state);
@@ -5183,9 +5238,16 @@ static void fill_dc_dirty_rects(struct drm_plane *plane,
 	if (plane->type == DRM_PLANE_TYPE_CURSOR)
 		return;
 
+	if (new_plane_state->rotation != DRM_MODE_ROTATE_0)
+		goto ffu;
+
 	num_clips = drm_plane_get_damage_clips_count(new_plane_state);
 	clips = drm_plane_get_damage_clips(new_plane_state);
 
+	if (num_clips && (!amdgpu_damage_clips || (amdgpu_damage_clips < 0 &&
+						   is_psr_su)))
+		goto ffu;
+
 	if (!dm_crtc_state->mpo_requested) {
 		if (!num_clips || num_clips > DC_MAX_DIRTY_RECTS)
 			goto ffu;
@@ -5509,10 +5571,13 @@ static void fill_stream_properties_from_drm_display_mode(
 {
 	struct dc_crtc_timing *timing_out = &stream->timing;
 	const struct drm_display_info *info = &connector->display_info;
-	struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+	struct amdgpu_dm_connector *aconnector = NULL;
 	struct hdmi_vendor_infoframe hv_frame;
 	struct hdmi_avi_infoframe avi_frame;
 
+	if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK)
+		aconnector = to_amdgpu_dm_connector(connector);
+
 	memset(&hv_frame, 0, sizeof(hv_frame));
 	memset(&avi_frame, 0, sizeof(avi_frame));
 
@@ -5525,6 +5590,7 @@ static void fill_stream_properties_from_drm_display_mode(
 			&& stream->signal == SIGNAL_TYPE_HDMI_TYPE_A)
 		timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR420;
 	else if (drm_mode_is_420_also(info, mode_in)
+			&& aconnector
 			&& aconnector->force_yuv420_output)
 		timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR420;
 	else if ((connector->display_info.color_formats & DRM_COLOR_FORMAT_YCBCR444)
@@ -5560,7 +5626,7 @@ static void fill_stream_properties_from_drm_display_mode(
 		timing_out->hdmi_vic = hv_frame.vic;
 	}
 
-	if (is_freesync_video_mode(mode_in, aconnector)) {
+	if (aconnector && is_freesync_video_mode(mode_in, aconnector)) {
 		timing_out->h_addressable = mode_in->hdisplay;
 		timing_out->h_total = mode_in->htotal;
 		timing_out->h_sync_width = mode_in->hsync_end - mode_in->hsync_start;
@@ -5681,13 +5747,13 @@ decide_crtc_timing_for_drm_display_mode(struct drm_display_mode *drm_mode,
 }
 
 static struct dc_sink *
-create_fake_sink(struct amdgpu_dm_connector *aconnector)
+create_fake_sink(struct dc_link *link)
 {
 	struct dc_sink_init_data sink_init_data = { 0 };
 	struct dc_sink *sink = NULL;
 
-	sink_init_data.link = aconnector->dc_link;
-	sink_init_data.sink_signal = aconnector->dc_link->connector_signal;
+	sink_init_data.link = link;
+	sink_init_data.sink_signal = link->connector_signal;
 
 	sink = dc_sink_create(&sink_init_data);
 	if (!sink) {
@@ -6037,14 +6103,14 @@ static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector,
 }
 
 static struct dc_stream_state *
-create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
+create_stream_for_sink(struct drm_connector *connector,
 		       const struct drm_display_mode *drm_mode,
 		       const struct dm_connector_state *dm_state,
 		       const struct dc_stream_state *old_stream,
 		       int requested_bpc)
 {
+	struct amdgpu_dm_connector *aconnector = NULL;
 	struct drm_display_mode *preferred_mode = NULL;
-	struct drm_connector *drm_connector;
 	const struct drm_connector_state *con_state = &dm_state->base;
 	struct dc_stream_state *stream = NULL;
 	struct drm_display_mode mode;
@@ -6058,22 +6124,35 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 	enum color_transfer_func tf = TRANSFER_FUNC_UNKNOWN;
 	struct dsc_dec_dpcd_caps dsc_caps;
 
+	struct dc_link *link = NULL;
 	struct dc_sink *sink = NULL;
 
 	drm_mode_init(&mode, drm_mode);
 	memset(&saved_mode, 0, sizeof(saved_mode));
 
-	if (aconnector == NULL) {
-		DRM_ERROR("aconnector is NULL!\n");
+	if (connector == NULL) {
+		DRM_ERROR("connector is NULL!\n");
 		return stream;
 	}
 
-	drm_connector = &aconnector->base;
+	if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK) {
+		aconnector = NULL;
+		aconnector = to_amdgpu_dm_connector(connector);
+		link = aconnector->dc_link;
+	} else {
+		struct drm_writeback_connector *wbcon = NULL;
+		struct amdgpu_dm_wb_connector *dm_wbcon = NULL;
 
-	if (!aconnector->dc_sink) {
-		sink = create_fake_sink(aconnector);
+		wbcon = drm_connector_to_writeback(connector);
+		dm_wbcon = to_amdgpu_dm_wb_connector(wbcon);
+		link = dm_wbcon->link;
+	}
+
+	if (!aconnector || !aconnector->dc_sink) {
+		sink = create_fake_sink(link);
 		if (!sink)
 			return stream;
+
 	} else {
 		sink = aconnector->dc_sink;
 		dc_sink_retain(sink);
@@ -6086,12 +6165,13 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 		goto finish;
 	}
 
+	/* We leave this NULL for writeback connectors */
 	stream->dm_stream_context = aconnector;
 
 	stream->timing.flags.LTE_340MCSC_SCRAMBLE =
-		drm_connector->display_info.hdmi.scdc.scrambling.low_rates;
+		connector->display_info.hdmi.scdc.scrambling.low_rates;
 
-	list_for_each_entry(preferred_mode, &aconnector->base.modes, head) {
+	list_for_each_entry(preferred_mode, &connector->modes, head) {
 		/* Search for preferred mode */
 		if (preferred_mode->type & DRM_MODE_TYPE_PREFERRED) {
 			native_mode_found = true;
@@ -6100,7 +6180,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 	}
 	if (!native_mode_found)
 		preferred_mode = list_first_entry_or_null(
-				&aconnector->base.modes,
+				&connector->modes,
 				struct drm_display_mode,
 				head);
 
@@ -6114,12 +6194,14 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 		 * and the modelist may not be filled in time.
 		 */
 		DRM_DEBUG_DRIVER("No preferred mode found\n");
-	} else {
+	} else if (aconnector) {
 		recalculate_timing = is_freesync_video_mode(&mode, aconnector);
 		if (recalculate_timing) {
 			freesync_mode = get_highest_refresh_rate_mode(aconnector, false);
 			drm_mode_copy(&saved_mode, &mode);
+			saved_mode.picture_aspect_ratio = mode.picture_aspect_ratio;
 			drm_mode_copy(&mode, freesync_mode);
+			mode.picture_aspect_ratio = saved_mode.picture_aspect_ratio;
 		} else {
 			decide_crtc_timing_for_drm_display_mode(
 					&mode, preferred_mode, scale);
@@ -6137,13 +6219,17 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 	 */
 	if (!scale || mode_refresh != preferred_refresh)
 		fill_stream_properties_from_drm_display_mode(
-			stream, &mode, &aconnector->base, con_state, NULL,
+			stream, &mode, connector, con_state, NULL,
 			requested_bpc);
 	else
 		fill_stream_properties_from_drm_display_mode(
-			stream, &mode, &aconnector->base, con_state, old_stream,
+			stream, &mode, connector, con_state, old_stream,
 			requested_bpc);
 
+	/* The rest isn't needed for writeback connectors */
+	if (!aconnector)
+		goto finish;
+
 	if (aconnector->timing_changed) {
 		drm_dbg(aconnector->base.dev,
 			"overriding timing for automated test, bpc %d, changing to %d\n",
@@ -6161,15 +6247,16 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 
 	fill_audio_info(
 		&stream->audio_info,
-		drm_connector,
+		connector,
 		sink);
 
 	update_stream_signal(stream, sink);
 
 	if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A)
 		mod_build_hf_vsif_infopacket(stream, &stream->vsp_infopacket);
-
-	if (stream->link->psr_settings.psr_feature_enabled || stream->link->replay_settings.replay_feature_enabled) {
+	else if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT ||
+			 stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST ||
+			 stream->signal == SIGNAL_TYPE_EDP) {
 		//
 		// should decide stream support vsc sdp colorimetry capability
 		// before building vsc info packet
@@ -6185,8 +6272,9 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 		if (stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22)
 			tf = TRANSFER_FUNC_GAMMA_22;
 		mod_build_vsc_infopacket(stream, &stream->vsc_infopacket, stream->output_color_space, tf);
-		aconnector->psr_skip_count = AMDGPU_DM_PSR_ENTRY_DELAY;
 
+		if (stream->link->psr_settings.psr_feature_enabled)
+			aconnector->psr_skip_count = AMDGPU_DM_PSR_ENTRY_DELAY;
 	}
 finish:
 	dc_sink_release(sink);
@@ -6268,7 +6356,7 @@ int amdgpu_dm_connector_atomic_set_property(struct drm_connector *connector,
 		dm_new_state->underscan_enable = val;
 		ret = 0;
 	} else if (property == adev->mode_info.abm_level_property) {
-		dm_new_state->abm_level = val;
+		dm_new_state->abm_level = val ?: ABM_LEVEL_IMMEDIATE_DISABLE;
 		ret = 0;
 	}
 
@@ -6313,7 +6401,8 @@ int amdgpu_dm_connector_atomic_get_property(struct drm_connector *connector,
 		*val = dm_state->underscan_enable;
 		ret = 0;
 	} else if (property == adev->mode_info.abm_level_property) {
-		*val = dm_state->abm_level;
+		*val = (dm_state->abm_level != ABM_LEVEL_IMMEDIATE_DISABLE) ?
+			dm_state->abm_level : 0;
 		ret = 0;
 	}
 
@@ -6386,7 +6475,8 @@ void amdgpu_dm_connector_funcs_reset(struct drm_connector *connector)
 		state->pbn = 0;
 
 		if (connector->connector_type == DRM_MODE_CONNECTOR_eDP)
-			state->abm_level = amdgpu_dm_abm_level;
+			state->abm_level = amdgpu_dm_abm_level ?:
+				ABM_LEVEL_IMMEDIATE_DISABLE;
 
 		__drm_atomic_helper_connector_reset(connector, &state->base);
 	}
@@ -6444,10 +6534,15 @@ amdgpu_dm_connector_late_register(struct drm_connector *connector)
 static void amdgpu_dm_connector_funcs_force(struct drm_connector *connector)
 {
 	struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
-	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
 	struct dc_link *dc_link = aconnector->dc_link;
 	struct dc_sink *dc_em_sink = aconnector->dc_em_sink;
 	struct edid *edid;
+	struct i2c_adapter *ddc;
+
+	if (dc_link->aux_mode)
+		ddc = &aconnector->dm_dp_aux.aux.ddc;
+	else
+		ddc = &aconnector->i2c->base;
 
 	/*
 	 * Note: drm_get_edid gets edid in the following order:
@@ -6455,7 +6550,7 @@ static void amdgpu_dm_connector_funcs_force(struct drm_connector *connector)
 	 * 2) firmware EDID if set via edid_firmware module parameter
 	 * 3) regular DDC read.
 	 */
-	edid = drm_get_edid(connector, &amdgpu_connector->ddc_bus->aux.ddc);
+	edid = drm_get_edid(connector, ddc);
 	if (!edid) {
 		DRM_ERROR("No EDID found on connector: %s.\n", connector->name);
 		return;
@@ -6496,12 +6591,18 @@ static int get_modes(struct drm_connector *connector)
 static void create_eml_sink(struct amdgpu_dm_connector *aconnector)
 {
 	struct drm_connector *connector = &aconnector->base;
-	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(&aconnector->base);
+	struct dc_link *dc_link = aconnector->dc_link;
 	struct dc_sink_init_data init_params = {
 			.link = aconnector->dc_link,
 			.sink_signal = SIGNAL_TYPE_VIRTUAL
 	};
 	struct edid *edid;
+	struct i2c_adapter *ddc;
+
+	if (dc_link->aux_mode)
+		ddc = &aconnector->dm_dp_aux.aux.ddc;
+	else
+		ddc = &aconnector->i2c->base;
 
 	/*
 	 * Note: drm_get_edid gets edid in the following order:
@@ -6509,7 +6610,7 @@ static void create_eml_sink(struct amdgpu_dm_connector *aconnector)
 	 * 2) firmware EDID if set via edid_firmware module parameter
 	 * 3) regular DDC read.
 	 */
-	edid = drm_get_edid(connector, &amdgpu_connector->ddc_bus->aux.ddc);
+	edid = drm_get_edid(connector, ddc);
 	if (!edid) {
 		DRM_ERROR("No EDID found on connector: %s.\n", connector->name);
 		return;
@@ -6564,7 +6665,7 @@ static enum dc_status dm_validate_stream_and_context(struct dc *dc,
 	if (!dc_plane_state)
 		goto cleanup;
 
-	dc_state = dc_create_state(dc);
+	dc_state = dc_state_create(dc);
 	if (!dc_state)
 		goto cleanup;
 
@@ -6591,9 +6692,9 @@ static enum dc_status dm_validate_stream_and_context(struct dc *dc,
 		dc_result = dc_validate_plane(dc, dc_plane_state);
 
 	if (dc_result == DC_OK)
-		dc_result = dc_add_stream_to_ctx(dc, dc_state, stream);
+		dc_result = dc_state_add_stream(dc, dc_state, stream);
 
-	if (dc_result == DC_OK && !dc_add_plane_to_context(
+	if (dc_result == DC_OK && !dc_state_add_plane(
 						dc,
 						stream,
 						dc_plane_state,
@@ -6605,7 +6706,7 @@ static enum dc_status dm_validate_stream_and_context(struct dc *dc,
 
 cleanup:
 	if (dc_state)
-		dc_release_state(dc_state);
+		dc_state_release(dc_state);
 
 	if (dc_plane_state)
 		dc_plane_state_release(dc_plane_state);
@@ -6627,7 +6728,7 @@ create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 	enum dc_status dc_result = DC_OK;
 
 	do {
-		stream = create_stream_for_sink(aconnector, drm_mode,
+		stream = create_stream_for_sink(connector, drm_mode,
 						dm_state, old_stream,
 						requested_bpc);
 		if (stream == NULL) {
@@ -6635,6 +6736,9 @@ create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 			break;
 		}
 
+		if (aconnector->base.connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			return stream;
+
 		dc_result = dc_validate_stream(adev->dm.dc, stream);
 		if (dc_result == DC_OK && stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
 			dc_result = dm_dp_mst_is_port_support_mode(aconnector, stream);
@@ -6910,8 +7014,7 @@ static int dm_encoder_helper_atomic_check(struct drm_encoder *encoder,
 	if (IS_ERR(mst_state))
 		return PTR_ERR(mst_state);
 
-	if (!mst_state->pbn_div.full)
-		mst_state->pbn_div.full = dfixed_const(dm_mst_get_pbn_divider(aconnector->mst_root->dc_link));
+	mst_state->pbn_div.full = dfixed_const(dm_mst_get_pbn_divider(aconnector->mst_root->dc_link));
 
 	if (!state->duplicated) {
 		int max_bpc = conn_state->max_requested_bpc;
@@ -6955,6 +7058,9 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state,
 
 	for_each_new_connector_in_state(state, connector, new_con_state, i) {
 
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
 		aconnector = to_amdgpu_dm_connector(connector);
 
 		if (!aconnector->mst_output_port)
@@ -7534,7 +7640,6 @@ create_i2c(struct ddc_service *ddc_service,
 	if (!i2c)
 		return NULL;
 	i2c->base.owner = THIS_MODULE;
-	i2c->base.class = I2C_CLASS_DDC;
 	i2c->base.dev.parent = &adev->pdev->dev;
 	i2c->base.algo = &amdgpu_dm_i2c_algo;
 	snprintf(i2c->base.name, sizeof(i2c->base.name), "AMDGPU DM i2c hw bus %d", link_index);
@@ -7560,6 +7665,7 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm,
 	struct dc_link *link = dc_get_link_at_index(dc, link_index);
 	struct amdgpu_i2c_adapter *i2c;
 
+	/* Not needed for writeback connector */
 	link->priv = aconnector;
 
 
@@ -8170,6 +8276,10 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 			bundle->surface_updates[planes_count].gamma = dc_plane->gamma_correction;
 			bundle->surface_updates[planes_count].in_transfer_func = dc_plane->in_transfer_func;
 			bundle->surface_updates[planes_count].gamut_remap_matrix = &dc_plane->gamut_remap_matrix;
+			bundle->surface_updates[planes_count].hdr_mult = dc_plane->hdr_mult;
+			bundle->surface_updates[planes_count].func_shaper = dc_plane->in_shaper_func;
+			bundle->surface_updates[planes_count].lut3d_func = dc_plane->lut3d_func;
+			bundle->surface_updates[planes_count].blend_tf = dc_plane->blend_tf;
 		}
 
 		amdgpu_dm_plane_fill_dc_scaling_info(dm->adev, new_plane_state,
@@ -8206,6 +8316,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 			fill_dc_dirty_rects(plane, old_plane_state,
 					    new_plane_state, new_crtc_state,
 					    &bundle->flip_addrs[planes_count],
+					    acrtc_state->stream->link->psr_settings.psr_version ==
+					    DC_PSR_VERSION_SU_1,
 					    &dirty_rects_changed);
 
 			/*
@@ -8381,6 +8493,10 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 				&acrtc_state->stream->csc_color_matrix;
 			bundle->stream_update.out_transfer_func =
 				acrtc_state->stream->out_transfer_func;
+			bundle->stream_update.lut3d_func =
+				(struct dc_3dlut *) acrtc_state->stream->lut3d_func;
+			bundle->stream_update.func_shaper =
+				(struct dc_transfer_func *) acrtc_state->stream->func_shaper;
 		}
 
 		acrtc_state->stream->abm_level = acrtc_state->abm_level;
@@ -8514,6 +8630,9 @@ static void amdgpu_dm_commit_audio(struct drm_device *dev,
 		if (!drm_atomic_crtc_needs_modeset(new_crtc_state))
 			continue;
 
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
 notify:
 		aconnector = to_amdgpu_dm_connector(connector);
 
@@ -8547,6 +8666,9 @@ notify:
 		if (!status)
 			continue;
 
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
 		aconnector = to_amdgpu_dm_connector(connector);
 
 		mutex_lock(&adev->dm.audio_lock);
@@ -8572,6 +8694,12 @@ static void amdgpu_dm_crtc_copy_transient_flags(struct drm_crtc_state *crtc_stat
 	stream_state->mode_changed = drm_atomic_crtc_needs_modeset(crtc_state);
 }
 
+static void dm_clear_writeback(struct amdgpu_display_manager *dm,
+			      struct dm_crtc_state *crtc_state)
+{
+	dc_stream_remove_writeback(dm->dc, crtc_state->stream, 0);
+}
+
 static void amdgpu_dm_commit_streams(struct drm_atomic_state *state,
 					struct dc_state *dc_state)
 {
@@ -8581,9 +8709,38 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state,
 	struct drm_crtc *crtc;
 	struct drm_crtc_state *old_crtc_state, *new_crtc_state;
 	struct dm_crtc_state *dm_old_crtc_state, *dm_new_crtc_state;
+	struct drm_connector_state *old_con_state;
+	struct drm_connector *connector;
 	bool mode_set_reset_required = false;
 	u32 i;
 
+	/* Disable writeback */
+	for_each_old_connector_in_state(state, connector, old_con_state, i) {
+		struct dm_connector_state *dm_old_con_state;
+		struct amdgpu_crtc *acrtc;
+
+		if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
+		old_crtc_state = NULL;
+
+		dm_old_con_state = to_dm_connector_state(old_con_state);
+		if (!dm_old_con_state->base.crtc)
+			continue;
+
+		acrtc = to_amdgpu_crtc(dm_old_con_state->base.crtc);
+		if (acrtc)
+			old_crtc_state = drm_atomic_get_old_crtc_state(state, &acrtc->base);
+
+		if (!acrtc->wb_enabled)
+			continue;
+
+		dm_old_crtc_state = to_dm_crtc_state(old_crtc_state);
+
+		dm_clear_writeback(dm, dm_old_crtc_state);
+		acrtc->wb_enabled = false;
+	}
+
 	for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state,
 				      new_crtc_state, i) {
 		struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
@@ -8708,7 +8865,7 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state,
 					dc_stream_get_status(dm_new_crtc_state->stream);
 
 			if (!status)
-				status = dc_stream_get_status_from_state(dc_state,
+				status = dc_state_get_stream_status(dc_state,
 									 dm_new_crtc_state->stream);
 			if (!status)
 				drm_err(dev,
@@ -8720,6 +8877,105 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state,
 	}
 }
 
+static void dm_set_writeback(struct amdgpu_display_manager *dm,
+			      struct dm_crtc_state *crtc_state,
+			      struct drm_connector *connector,
+			      struct drm_connector_state *new_con_state)
+{
+	struct drm_writeback_connector *wb_conn = drm_connector_to_writeback(connector);
+	struct amdgpu_device *adev = dm->adev;
+	struct amdgpu_crtc *acrtc;
+	struct dc_writeback_info *wb_info;
+	struct pipe_ctx *pipe = NULL;
+	struct amdgpu_framebuffer *afb;
+	int i = 0;
+
+	wb_info = kzalloc(sizeof(*wb_info), GFP_KERNEL);
+	if (!wb_info) {
+		DRM_ERROR("Failed to allocate wb_info\n");
+		return;
+	}
+
+	acrtc = to_amdgpu_crtc(wb_conn->encoder.crtc);
+	if (!acrtc) {
+		DRM_ERROR("no amdgpu_crtc found\n");
+		kfree(wb_info);
+		return;
+	}
+
+	afb = to_amdgpu_framebuffer(new_con_state->writeback_job->fb);
+	if (!afb) {
+		DRM_ERROR("No amdgpu_framebuffer found\n");
+		kfree(wb_info);
+		return;
+	}
+
+	for (i = 0; i < MAX_PIPES; i++) {
+		if (dm->dc->current_state->res_ctx.pipe_ctx[i].stream == crtc_state->stream) {
+			pipe = &dm->dc->current_state->res_ctx.pipe_ctx[i];
+			break;
+		}
+	}
+
+	/* fill in wb_info */
+	wb_info->wb_enabled = true;
+
+	wb_info->dwb_pipe_inst = 0;
+	wb_info->dwb_params.dwbscl_black_color = 0;
+	wb_info->dwb_params.hdr_mult = 0x1F000;
+	wb_info->dwb_params.csc_params.gamut_adjust_type = CM_GAMUT_ADJUST_TYPE_BYPASS;
+	wb_info->dwb_params.csc_params.gamut_coef_format = CM_GAMUT_REMAP_COEF_FORMAT_S2_13;
+	wb_info->dwb_params.output_depth = DWB_OUTPUT_PIXEL_DEPTH_10BPC;
+	wb_info->dwb_params.cnv_params.cnv_out_bpc = DWB_CNV_OUT_BPC_10BPC;
+
+	/* width & height from crtc */
+	wb_info->dwb_params.cnv_params.src_width = acrtc->base.mode.crtc_hdisplay;
+	wb_info->dwb_params.cnv_params.src_height = acrtc->base.mode.crtc_vdisplay;
+	wb_info->dwb_params.dest_width = acrtc->base.mode.crtc_hdisplay;
+	wb_info->dwb_params.dest_height = acrtc->base.mode.crtc_vdisplay;
+
+	wb_info->dwb_params.cnv_params.crop_en = false;
+	wb_info->dwb_params.stereo_params.stereo_enabled = false;
+
+	wb_info->dwb_params.cnv_params.out_max_pix_val = 0x3ff;	// 10 bits
+	wb_info->dwb_params.cnv_params.out_min_pix_val = 0;
+	wb_info->dwb_params.cnv_params.fc_out_format = DWB_OUT_FORMAT_32BPP_ARGB;
+	wb_info->dwb_params.cnv_params.out_denorm_mode = DWB_OUT_DENORM_BYPASS;
+
+	wb_info->dwb_params.out_format = dwb_scaler_mode_bypass444;
+
+	wb_info->dwb_params.capture_rate = dwb_capture_rate_0;
+
+	wb_info->dwb_params.scaler_taps.h_taps = 4;
+	wb_info->dwb_params.scaler_taps.v_taps = 4;
+	wb_info->dwb_params.scaler_taps.h_taps_c = 2;
+	wb_info->dwb_params.scaler_taps.v_taps_c = 2;
+	wb_info->dwb_params.subsample_position = DWB_INTERSTITIAL_SUBSAMPLING;
+
+	wb_info->mcif_buf_params.luma_pitch = afb->base.pitches[0];
+	wb_info->mcif_buf_params.chroma_pitch = afb->base.pitches[1];
+
+	for (i = 0; i < DWB_MCIF_BUF_COUNT; i++) {
+		wb_info->mcif_buf_params.luma_address[i] = afb->address;
+		wb_info->mcif_buf_params.chroma_address[i] = 0;
+	}
+
+	wb_info->mcif_buf_params.p_vmid = 1;
+	if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0)) {
+		wb_info->mcif_warmup_params.start_address.quad_part = afb->address;
+		wb_info->mcif_warmup_params.region_size =
+			wb_info->mcif_buf_params.luma_pitch * wb_info->dwb_params.dest_height;
+	}
+	wb_info->mcif_warmup_params.p_vmid = 1;
+	wb_info->writeback_source_plane = pipe->plane_state;
+
+	dc_stream_add_writeback(dm->dc, crtc_state->stream, wb_info);
+
+	acrtc->wb_pending = true;
+	acrtc->wb_conn = wb_conn;
+	drm_writeback_queue_job(wb_conn, new_con_state);
+}
+
 /**
  * amdgpu_dm_atomic_commit_tail() - AMDgpu DM's commit tail implementation.
  * @state: The atomic state to commit
@@ -8747,16 +9003,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 
 	trace_amdgpu_dm_atomic_commit_tail_begin(state);
 
-	if (dm->dc->caps.ips_support) {
-		for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) {
-			if (new_con_state->crtc &&
-				new_con_state->crtc->state->active &&
-				drm_atomic_crtc_needs_modeset(new_con_state->crtc->state)) {
-				dc_dmub_srv_exit_low_power_state(dm->dc);
-				break;
-			}
-		}
-	}
+	if (dm->dc->caps.ips_support && dm->dc->idle_optimizations_allowed)
+		dc_allow_idle_optimizations(dm->dc, false);
 
 	drm_atomic_helper_update_legacy_modeset_state(dev, state);
 	drm_dp_mst_atomic_wait_for_dependencies(state);
@@ -8770,7 +9018,12 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 	for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) {
 		struct dm_connector_state *dm_new_con_state = to_dm_connector_state(new_con_state);
 		struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc);
-		struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+		struct amdgpu_dm_connector *aconnector;
+
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
+		aconnector = to_amdgpu_dm_connector(connector);
 
 		if (!adev->dm.hdcp_workqueue)
 			continue;
@@ -8954,6 +9207,10 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 		 * To fix this, DC should permit updating only stream properties.
 		 */
 		dummy_updates = kzalloc(sizeof(struct dc_surface_update) * MAX_SURFACES, GFP_ATOMIC);
+		if (!dummy_updates) {
+			DRM_ERROR("Failed to allocate memory for dummy_updates.\n");
+			continue;
+		}
 		for (j = 0; j < status->plane_count; j++)
 			dummy_updates[j].surface = status->plane_states[0];
 
@@ -9047,6 +9304,31 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 			amdgpu_dm_commit_planes(state, dev, dm, crtc, wait_for_vblank);
 	}
 
+	/* Enable writeback */
+	for_each_new_connector_in_state(state, connector, new_con_state, i) {
+		struct dm_connector_state *dm_new_con_state = to_dm_connector_state(new_con_state);
+		struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc);
+
+		if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
+		if (!new_con_state->writeback_job)
+			continue;
+
+		new_crtc_state = drm_atomic_get_new_crtc_state(state, &acrtc->base);
+
+		if (!new_crtc_state)
+			continue;
+
+		if (acrtc->wb_enabled)
+			continue;
+
+		dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
+
+		dm_set_writeback(dm, dm_new_crtc_state, connector, new_con_state);
+		acrtc->wb_enabled = true;
+	}
+
 	/* Update audio instances for each connector. */
 	amdgpu_dm_commit_audio(dev, state);
 
@@ -9164,10 +9446,15 @@ out:
 void dm_restore_drm_connector_state(struct drm_device *dev,
 				    struct drm_connector *connector)
 {
-	struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+	struct amdgpu_dm_connector *aconnector;
 	struct amdgpu_crtc *disconnected_acrtc;
 	struct dm_crtc_state *acrtc_state;
 
+	if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+		return;
+
+	aconnector = to_amdgpu_dm_connector(connector);
+
 	if (!aconnector->dc_sink || !connector->state || !connector->encoder)
 		return;
 
@@ -9244,12 +9531,16 @@ static void get_freesync_config_for_crtc(
 	struct dm_connector_state *new_con_state)
 {
 	struct mod_freesync_config config = {0};
-	struct amdgpu_dm_connector *aconnector =
-			to_amdgpu_dm_connector(new_con_state->base.connector);
+	struct amdgpu_dm_connector *aconnector;
 	struct drm_display_mode *mode = &new_crtc_state->base.mode;
 	int vrefresh = drm_mode_vrefresh(mode);
 	bool fs_vid_mode = false;
 
+	if (new_con_state->base.connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+		return;
+
+	aconnector = to_amdgpu_dm_connector(new_con_state->base.connector);
+
 	new_crtc_state->vrr_supported = new_con_state->freesync_capable &&
 					vrefresh >= aconnector->min_vfreq &&
 					vrefresh <= aconnector->max_vfreq;
@@ -9349,6 +9640,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
 	 * update changed items
 	 */
 	struct amdgpu_crtc *acrtc = NULL;
+	struct drm_connector *connector = NULL;
 	struct amdgpu_dm_connector *aconnector = NULL;
 	struct drm_connector_state *drm_new_conn_state = NULL, *drm_old_conn_state = NULL;
 	struct dm_connector_state *dm_new_conn_state = NULL, *dm_old_conn_state = NULL;
@@ -9358,15 +9650,17 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
 	dm_old_crtc_state = to_dm_crtc_state(old_crtc_state);
 	dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
 	acrtc = to_amdgpu_crtc(crtc);
-	aconnector = amdgpu_dm_find_first_crtc_matching_connector(state, crtc);
+	connector = amdgpu_dm_find_first_crtc_matching_connector(state, crtc);
+	if (connector)
+		aconnector = to_amdgpu_dm_connector(connector);
 
 	/* TODO This hack should go away */
-	if (aconnector && enable) {
+	if (connector && enable) {
 		/* Make sure fake sink is created in plug-in scenario */
 		drm_new_conn_state = drm_atomic_get_new_connector_state(state,
-							    &aconnector->base);
+									connector);
 		drm_old_conn_state = drm_atomic_get_old_connector_state(state,
-							    &aconnector->base);
+									connector);
 
 		if (IS_ERR(drm_new_conn_state)) {
 			ret = PTR_ERR_OR_ZERO(drm_new_conn_state);
@@ -9492,7 +9786,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
 				crtc->base.id);
 
 		/* i.e. reset mode */
-		if (dc_remove_stream_from_ctx(
+		if (dc_state_remove_stream(
 				dm->dc,
 				dm_state->context,
 				dm_old_crtc_state->stream) != DC_OK) {
@@ -9513,7 +9807,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
 		 * added MST connectors not found in existing crtc_state in the chained mode
 		 * TODO: need to dig out the root cause of that
 		 */
-		if (!aconnector)
+		if (!connector)
 			goto skip_modeset;
 
 		if (modereset_required(new_crtc_state))
@@ -9535,7 +9829,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
 			DRM_DEBUG_ATOMIC("Enabling DRM crtc: %d\n",
 					 crtc->base.id);
 
-			if (dc_add_stream_to_ctx(
+			if (dc_state_add_stream(
 					dm->dc,
 					dm_state->context,
 					dm_new_crtc_state->stream) != DC_OK) {
@@ -9556,7 +9850,7 @@ skip_modeset:
 	 * We want to do dc stream updates that do not require a
 	 * full modeset below.
 	 */
-	if (!(enable && aconnector && new_crtc_state->active))
+	if (!(enable && connector && new_crtc_state->active))
 		return 0;
 	/*
 	 * Given above conditions, the dc state cannot be NULL because:
@@ -9582,6 +9876,7 @@ skip_modeset:
 	 * when a modeset is needed, to ensure it gets reprogrammed.
 	 */
 	if (dm_new_crtc_state->base.color_mgmt_changed ||
+	    dm_old_crtc_state->regamma_tf != dm_new_crtc_state->regamma_tf ||
 	    drm_atomic_crtc_needs_modeset(new_crtc_state)) {
 		ret = amdgpu_dm_update_crtc_color_mgmt(dm_new_crtc_state);
 		if (ret)
@@ -9615,7 +9910,8 @@ static bool should_reset_plane(struct drm_atomic_state *state,
 	 * TODO: Remove this hack for all asics once it proves that the
 	 * fast updates works fine on DCN3.2+.
 	 */
-	if (adev->ip_versions[DCE_HWIP][0] < IP_VERSION(3, 2, 0) && state->allow_modeset)
+	if (amdgpu_ip_version(adev, DCE_HWIP, 0) < IP_VERSION(3, 2, 0) &&
+	    state->allow_modeset)
 		return true;
 
 	/* Exit early if we know that we're adding or removing the plane. */
@@ -9649,6 +9945,10 @@ static bool should_reset_plane(struct drm_atomic_state *state,
 	 */
 	for_each_oldnew_plane_in_state(state, other, old_other_state, new_other_state, i) {
 		struct amdgpu_framebuffer *old_afb, *new_afb;
+		struct dm_plane_state *dm_new_other_state, *dm_old_other_state;
+
+		dm_new_other_state = to_dm_plane_state(new_other_state);
+		dm_old_other_state = to_dm_plane_state(old_other_state);
 
 		if (other->type == DRM_PLANE_TYPE_CURSOR)
 			continue;
@@ -9685,6 +9985,18 @@ static bool should_reset_plane(struct drm_atomic_state *state,
 		    old_other_state->color_encoding != new_other_state->color_encoding)
 			return true;
 
+		/* HDR/Transfer Function changes. */
+		if (dm_old_other_state->degamma_tf != dm_new_other_state->degamma_tf ||
+		    dm_old_other_state->degamma_lut != dm_new_other_state->degamma_lut ||
+		    dm_old_other_state->hdr_mult != dm_new_other_state->hdr_mult ||
+		    dm_old_other_state->ctm != dm_new_other_state->ctm ||
+		    dm_old_other_state->shaper_lut != dm_new_other_state->shaper_lut ||
+		    dm_old_other_state->shaper_tf != dm_new_other_state->shaper_tf ||
+		    dm_old_other_state->lut3d != dm_new_other_state->lut3d ||
+		    dm_old_other_state->blend_lut != dm_new_other_state->blend_lut ||
+		    dm_old_other_state->blend_tf != dm_new_other_state->blend_tf)
+			return true;
+
 		/* Framebuffer checks fall at the end. */
 		if (!old_other_state->fb || !new_other_state->fb)
 			continue;
@@ -9839,7 +10151,7 @@ static int dm_update_plane_state(struct dc *dc,
 		if (ret)
 			return ret;
 
-		if (!dc_remove_plane_from_context(
+		if (!dc_state_remove_plane(
 				dc,
 				dm_old_crtc_state->stream,
 				dm_old_plane_state->dc_state,
@@ -9917,7 +10229,7 @@ static int dm_update_plane_state(struct dc *dc,
 		 * state. It'll be released when the atomic state is
 		 * cleaned.
 		 */
-		if (!dc_add_plane_to_context(
+		if (!dc_state_add_plane(
 				dc,
 				dm_new_crtc_state->stream,
 				dc_new_plane_state,
@@ -10079,6 +10391,9 @@ static int add_affected_mst_dsc_crtcs(struct drm_atomic_state *state, struct drm
 		if (conn_state->crtc != crtc)
 			continue;
 
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
 		aconnector = to_amdgpu_dm_connector(connector);
 		if (!aconnector->mst_output_port || !aconnector->mst_root)
 			aconnector = NULL;
@@ -10436,11 +10751,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
 			goto fail;
 		}
 
-		ret = compute_mst_dsc_configs_for_state(state, dm_state->context, vars);
-		if (ret) {
-			DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() failed\n");
-			ret = -EINVAL;
-			goto fail;
+		if (dc_resource_is_dsc_encoding_supported(dc)) {
+			ret = compute_mst_dsc_configs_for_state(state, dm_state->context, vars);
+			if (ret) {
+				DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() failed\n");
+				ret = -EINVAL;
+				goto fail;
+			}
 		}
 
 		ret = dm_update_mst_vcpi_slots_for_dsc(state, dm_state->context, vars);
@@ -10598,7 +10915,7 @@ static bool dm_edid_parser_send_cea(struct amdgpu_display_manager *dm,
 	input->cea_total_length = total_length;
 	memcpy(input->payload, data, length);
 
-	res = dm_execute_dmub_cmd(dm->dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY);
+	res = dc_wake_and_execute_dmub_cmd(dm->dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY);
 	if (!res) {
 		DRM_ERROR("EDID CEA parser failed\n");
 		return false;
@@ -10789,8 +11106,7 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector,
 	struct dm_connector_state *dm_con_state = NULL;
 	struct dc_sink *sink;
 
-	struct drm_device *dev = connector->dev;
-	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_device *adev = drm_to_adev(connector->dev);
 	struct amdgpu_hdmi_vsdb_info vsdb_info = {0};
 	bool freesync_capable = false;
 	enum adaptive_sync_type as_type = ADAPTIVE_SYNC_TYPE_NONE;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 3d480be802cb..9c1871b866cc 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -32,6 +32,7 @@
 #include <drm/drm_crtc.h>
 #include <drm/drm_plane.h>
 #include "link_service_types.h"
+#include <drm/drm_writeback.h>
 
 /*
  * This file contains the definition for amdgpu_display_manager
@@ -54,6 +55,9 @@
 #define HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_IEEE_REGISTRATION_ID 0x00001A
 #define AMD_VSDB_VERSION_3_FEATURECAP_REPLAYMODE 0x40
 #define HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_VERSION_3 0x3
+
+#define AMDGPU_HDR_MULT_DEFAULT (0x100000000LL)
+
 /*
 #include "include/amdgpu_dal_power_if.h"
 #include "amdgpu_dm_irq.h"
@@ -714,11 +718,107 @@ static inline void amdgpu_dm_set_mst_status(uint8_t *status,
 
 #define to_amdgpu_dm_connector(x) container_of(x, struct amdgpu_dm_connector, base)
 
+struct amdgpu_dm_wb_connector {
+	struct drm_writeback_connector base;
+	struct dc_link *link;
+};
+
+#define to_amdgpu_dm_wb_connector(x) container_of(x, struct amdgpu_dm_wb_connector, base)
+
 extern const struct amdgpu_ip_block_version dm_ip_block;
 
+/* enum amdgpu_transfer_function: pre-defined transfer function supported by AMD.
+ *
+ * It includes standardized transfer functions and pure power functions. The
+ * transfer function coefficients are available at modules/color/color_gamma.c
+ */
+enum amdgpu_transfer_function {
+	AMDGPU_TRANSFER_FUNCTION_DEFAULT,
+	AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF,
+	AMDGPU_TRANSFER_FUNCTION_BT709_INV_OETF,
+	AMDGPU_TRANSFER_FUNCTION_PQ_EOTF,
+	AMDGPU_TRANSFER_FUNCTION_IDENTITY,
+	AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF,
+	AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF,
+	AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF,
+	AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF,
+	AMDGPU_TRANSFER_FUNCTION_BT709_OETF,
+	AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF,
+	AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF,
+	AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF,
+	AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF,
+	AMDGPU_TRANSFER_FUNCTION_COUNT
+};
+
 struct dm_plane_state {
 	struct drm_plane_state base;
 	struct dc_plane_state *dc_state;
+
+	/* Plane color mgmt */
+	/**
+	 * @degamma_lut:
+	 *
+	 * 1D LUT for mapping framebuffer/plane pixel data before sampling or
+	 * blending operations. It's usually applied to linearize input space.
+	 * The blob (if not NULL) is an array of &struct drm_color_lut.
+	 */
+	struct drm_property_blob *degamma_lut;
+	/**
+	 * @degamma_tf:
+	 *
+	 * Predefined transfer function to tell DC driver the input space to
+	 * linearize.
+	 */
+	enum amdgpu_transfer_function degamma_tf;
+	/**
+	 * @hdr_mult:
+	 *
+	 * Multiplier to 'gain' the plane.  When PQ is decoded using the fixed
+	 * func transfer function to the internal FP16 fb, 1.0 -> 80 nits (on
+	 * AMD at least). When sRGB is decoded, 1.0 -> 1.0, obviously.
+	 * Therefore, 1.0 multiplier = 80 nits for SDR content.  So if you
+	 * want, 203 nits for SDR content, pass in (203.0 / 80.0).  Format is
+	 * S31.32 sign-magnitude.
+	 *
+	 * HDR multiplier can wide range beyond [0.0, 1.0]. This means that PQ
+	 * TF is needed for any subsequent linear-to-non-linear transforms.
+	 */
+	__u64 hdr_mult;
+	/**
+	 * @ctm:
+	 *
+	 * Color transformation matrix. The blob (if not NULL) is a &struct
+	 * drm_color_ctm_3x4.
+	 */
+	struct drm_property_blob *ctm;
+	/**
+	 * @shaper_lut: shaper lookup table blob. The blob (if not NULL) is an
+	 * array of &struct drm_color_lut.
+	 */
+	struct drm_property_blob *shaper_lut;
+	/**
+	 * @shaper_tf:
+	 *
+	 * Predefined transfer function to delinearize color space.
+	 */
+	enum amdgpu_transfer_function shaper_tf;
+	/**
+	 * @lut3d: 3D lookup table blob. The blob (if not NULL) is an array of
+	 * &struct drm_color_lut.
+	 */
+	struct drm_property_blob *lut3d;
+	/**
+	 * @blend_lut: blend lut lookup table blob. The blob (if not NULL) is an
+	 * array of &struct drm_color_lut.
+	 */
+	struct drm_property_blob *blend_lut;
+	/**
+	 * @blend_tf:
+	 *
+	 * Pre-defined transfer function for converting plane pixel data before
+	 * applying blend LUT.
+	 */
+	enum amdgpu_transfer_function blend_tf;
 };
 
 struct dm_crtc_state {
@@ -743,6 +843,14 @@ struct dm_crtc_state {
 	struct dc_info_packet vrr_infopacket;
 
 	int abm_level;
+
+	/**
+	 * @regamma_tf:
+	 *
+	 * Pre-defined transfer function for converting internal FB -> wire
+	 * encoding.
+	 */
+	enum amdgpu_transfer_function regamma_tf;
 };
 
 #define to_dm_crtc_state(x) container_of(x, struct dm_crtc_state, base)
@@ -804,14 +912,22 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector,
 
 void amdgpu_dm_trigger_timing_sync(struct drm_device *dev);
 
+/* 3D LUT max size is 17x17x17 (4913 entries) */
+#define MAX_COLOR_3DLUT_SIZE 17
+#define MAX_COLOR_3DLUT_BITDEPTH 12
+int amdgpu_dm_verify_lut3d_size(struct amdgpu_device *adev,
+				struct drm_plane_state *plane_state);
+/* 1D LUT size */
 #define MAX_COLOR_LUT_ENTRIES 4096
 /* Legacy gamm LUT users such as X doesn't like large LUT sizes */
 #define MAX_COLOR_LEGACY_LUT_ENTRIES 256
 
 void amdgpu_dm_init_color_mod(void);
+int amdgpu_dm_create_color_properties(struct amdgpu_device *adev);
 int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state);
 int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc);
 int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
+				      struct drm_plane_state *plane_state,
 				      struct dc_plane_state *dc_plane_state);
 
 void amdgpu_dm_update_connector_after_detect(
@@ -834,7 +950,7 @@ struct dc_stream_state *
 int dm_atomic_get_state(struct drm_atomic_state *state,
 			struct dm_atomic_state **dm_state);
 
-struct amdgpu_dm_connector *
+struct drm_connector *
 amdgpu_dm_find_first_crtc_matching_connector(struct drm_atomic_state *state,
 					     struct drm_crtc *crtc);
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index a4cb23d059bd..c87b64e464ed 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -72,6 +72,7 @@
  */
 
 #define MAX_DRM_LUT_VALUE 0xFFFF
+#define SDR_WHITE_LEVEL_INIT_VALUE 80
 
 /**
  * amdgpu_dm_init_color_mod - Initialize the color module.
@@ -84,6 +85,247 @@ void amdgpu_dm_init_color_mod(void)
 	setup_x_points_distribution();
 }
 
+static inline struct fixed31_32 amdgpu_dm_fixpt_from_s3132(__u64 x)
+{
+	struct fixed31_32 val;
+
+	/* If negative, convert to 2's complement. */
+	if (x & (1ULL << 63))
+		x = -(x & ~(1ULL << 63));
+
+	val.value = x;
+	return val;
+}
+
+#ifdef AMD_PRIVATE_COLOR
+/* Pre-defined Transfer Functions (TF)
+ *
+ * AMD driver supports pre-defined mathematical functions for transferring
+ * between encoded values and optical/linear space. Depending on HW color caps,
+ * ROMs and curves built by the AMD color module support these transforms.
+ *
+ * The driver-specific color implementation exposes properties for pre-blending
+ * degamma TF, shaper TF (before 3D LUT), and blend(dpp.ogam) TF and
+ * post-blending regamma (mpc.ogam) TF. However, only pre-blending degamma
+ * supports ROM curves. AMD color module uses pre-defined coefficients to build
+ * curves for the other blocks. What can be done by each color block is
+ * described by struct dpp_color_capsand struct mpc_color_caps.
+ *
+ * AMD driver-specific color API exposes the following pre-defined transfer
+ * functions:
+ *
+ * - Identity: linear/identity relationship between pixel value and
+ *   luminance value;
+ * - Gamma 2.2, Gamma 2.4, Gamma 2.6: pure power functions;
+ * - sRGB: 2.4: The piece-wise transfer function from IEC 61966-2-1:1999;
+ * - BT.709: has a linear segment in the bottom part and then a power function
+ *   with a 0.45 (~1/2.22) gamma for the rest of the range; standardized by
+ *   ITU-R BT.709-6;
+ * - PQ (Perceptual Quantizer): used for HDR display, allows luminance range
+ *   capability of 0 to 10,000 nits; standardized by SMPTE ST 2084.
+ *
+ * The AMD color model is designed with an assumption that SDR (sRGB, BT.709,
+ * Gamma 2.2, etc.) peak white maps (normalized to 1.0 FP) to 80 nits in the PQ
+ * system. This has the implication that PQ EOTF (non-linear to linear) maps to
+ * [0.0..125.0] where 125.0 = 10,000 nits / 80 nits.
+ *
+ * Non-linear and linear forms are described in the table below:
+ *
+ * ┌───────────┬─────────────────────┬──────────────────────┐
+ * │           │     Non-linear      │   Linear             │
+ * ├───────────┼─────────────────────┼──────────────────────┤
+ * │      sRGB │ UNORM or [0.0, 1.0] │ [0.0, 1.0]           │
+ * ├───────────┼─────────────────────┼──────────────────────┤
+ * │     BT709 │ UNORM or [0.0, 1.0] │ [0.0, 1.0]           │
+ * ├───────────┼─────────────────────┼──────────────────────┤
+ * │ Gamma 2.x │ UNORM or [0.0, 1.0] │ [0.0, 1.0]           │
+ * ├───────────┼─────────────────────┼──────────────────────┤
+ * │        PQ │ UNORM or FP16 CCCS* │ [0.0, 125.0]         │
+ * ├───────────┼─────────────────────┼──────────────────────┤
+ * │  Identity │ UNORM or FP16 CCCS* │ [0.0, 1.0] or CCCS** │
+ * └───────────┴─────────────────────┴──────────────────────┘
+ * * CCCS: Windows canonical composition color space
+ * ** Respectively
+ *
+ * In the driver-specific API, color block names attached to TF properties
+ * suggest the intention regarding non-linear encoding pixel's luminance
+ * values. As some newer encodings don't use gamma curve, we make encoding and
+ * decoding explicit by defining an enum list of transfer functions supported
+ * in terms of EOTF and inverse EOTF, where:
+ *
+ * - EOTF (electro-optical transfer function): is the transfer function to go
+ *   from the encoded value to an optical (linear) value. De-gamma functions
+ *   traditionally do this.
+ * - Inverse EOTF (simply the inverse of the EOTF): is usually intended to go
+ *   from an optical/linear space (which might have been used for blending)
+ *   back to the encoded values. Gamma functions traditionally do this.
+ */
+static const char * const
+amdgpu_transfer_function_names[] = {
+	[AMDGPU_TRANSFER_FUNCTION_DEFAULT]		= "Default",
+	[AMDGPU_TRANSFER_FUNCTION_IDENTITY]		= "Identity",
+	[AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF]		= "sRGB EOTF",
+	[AMDGPU_TRANSFER_FUNCTION_BT709_INV_OETF]	= "BT.709 inv_OETF",
+	[AMDGPU_TRANSFER_FUNCTION_PQ_EOTF]		= "PQ EOTF",
+	[AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF]		= "Gamma 2.2 EOTF",
+	[AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF]		= "Gamma 2.4 EOTF",
+	[AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF]		= "Gamma 2.6 EOTF",
+	[AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF]	= "sRGB inv_EOTF",
+	[AMDGPU_TRANSFER_FUNCTION_BT709_OETF]		= "BT.709 OETF",
+	[AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF]		= "PQ inv_EOTF",
+	[AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF]	= "Gamma 2.2 inv_EOTF",
+	[AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF]	= "Gamma 2.4 inv_EOTF",
+	[AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF]	= "Gamma 2.6 inv_EOTF",
+};
+
+static const u32 amdgpu_eotf =
+	BIT(AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF) |
+	BIT(AMDGPU_TRANSFER_FUNCTION_BT709_INV_OETF) |
+	BIT(AMDGPU_TRANSFER_FUNCTION_PQ_EOTF) |
+	BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF) |
+	BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF) |
+	BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF);
+
+static const u32 amdgpu_inv_eotf =
+	BIT(AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF) |
+	BIT(AMDGPU_TRANSFER_FUNCTION_BT709_OETF) |
+	BIT(AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF) |
+	BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF) |
+	BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF) |
+	BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF);
+
+static struct drm_property *
+amdgpu_create_tf_property(struct drm_device *dev,
+			  const char *name,
+			  u32 supported_tf)
+{
+	u32 transfer_functions = supported_tf |
+				 BIT(AMDGPU_TRANSFER_FUNCTION_DEFAULT) |
+				 BIT(AMDGPU_TRANSFER_FUNCTION_IDENTITY);
+	struct drm_prop_enum_list enum_list[AMDGPU_TRANSFER_FUNCTION_COUNT];
+	int i, len;
+
+	len = 0;
+	for (i = 0; i < AMDGPU_TRANSFER_FUNCTION_COUNT; i++) {
+		if ((transfer_functions & BIT(i)) == 0)
+			continue;
+
+		enum_list[len].type = i;
+		enum_list[len].name = amdgpu_transfer_function_names[i];
+		len++;
+	}
+
+	return drm_property_create_enum(dev, DRM_MODE_PROP_ENUM,
+					name, enum_list, len);
+}
+
+int
+amdgpu_dm_create_color_properties(struct amdgpu_device *adev)
+{
+	struct drm_property *prop;
+
+	prop = drm_property_create(adev_to_drm(adev),
+				   DRM_MODE_PROP_BLOB,
+				   "AMD_PLANE_DEGAMMA_LUT", 0);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_degamma_lut_property = prop;
+
+	prop = drm_property_create_range(adev_to_drm(adev),
+					 DRM_MODE_PROP_IMMUTABLE,
+					 "AMD_PLANE_DEGAMMA_LUT_SIZE",
+					 0, UINT_MAX);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_degamma_lut_size_property = prop;
+
+	prop = amdgpu_create_tf_property(adev_to_drm(adev),
+					 "AMD_PLANE_DEGAMMA_TF",
+					 amdgpu_eotf);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_degamma_tf_property = prop;
+
+	prop = drm_property_create_range(adev_to_drm(adev),
+					 0, "AMD_PLANE_HDR_MULT", 0, U64_MAX);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_hdr_mult_property = prop;
+
+	prop = drm_property_create(adev_to_drm(adev),
+				   DRM_MODE_PROP_BLOB,
+				   "AMD_PLANE_CTM", 0);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_ctm_property = prop;
+
+	prop = drm_property_create(adev_to_drm(adev),
+				   DRM_MODE_PROP_BLOB,
+				   "AMD_PLANE_SHAPER_LUT", 0);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_shaper_lut_property = prop;
+
+	prop = drm_property_create_range(adev_to_drm(adev),
+					 DRM_MODE_PROP_IMMUTABLE,
+					 "AMD_PLANE_SHAPER_LUT_SIZE", 0, UINT_MAX);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_shaper_lut_size_property = prop;
+
+	prop = amdgpu_create_tf_property(adev_to_drm(adev),
+					 "AMD_PLANE_SHAPER_TF",
+					 amdgpu_inv_eotf);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_shaper_tf_property = prop;
+
+	prop = drm_property_create(adev_to_drm(adev),
+				   DRM_MODE_PROP_BLOB,
+				   "AMD_PLANE_LUT3D", 0);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_lut3d_property = prop;
+
+	prop = drm_property_create_range(adev_to_drm(adev),
+					 DRM_MODE_PROP_IMMUTABLE,
+					 "AMD_PLANE_LUT3D_SIZE", 0, UINT_MAX);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_lut3d_size_property = prop;
+
+	prop = drm_property_create(adev_to_drm(adev),
+				   DRM_MODE_PROP_BLOB,
+				   "AMD_PLANE_BLEND_LUT", 0);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_blend_lut_property = prop;
+
+	prop = drm_property_create_range(adev_to_drm(adev),
+					 DRM_MODE_PROP_IMMUTABLE,
+					 "AMD_PLANE_BLEND_LUT_SIZE", 0, UINT_MAX);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_blend_lut_size_property = prop;
+
+	prop = amdgpu_create_tf_property(adev_to_drm(adev),
+					 "AMD_PLANE_BLEND_TF",
+					 amdgpu_eotf);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_blend_tf_property = prop;
+
+	prop = amdgpu_create_tf_property(adev_to_drm(adev),
+					 "AMD_CRTC_REGAMMA_TF",
+					 amdgpu_inv_eotf);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.regamma_tf_property = prop;
+
+	return 0;
+}
+#endif
+
 /**
  * __extract_blob_lut - Extracts the DRM lut and lut size from a blob.
  * @blob: DRM color mgmt property blob
@@ -182,7 +424,6 @@ static void __drm_lut_to_dc_gamma(const struct drm_color_lut *lut,
 static void __drm_ctm_to_dc_matrix(const struct drm_color_ctm *ctm,
 				   struct fixed31_32 *matrix)
 {
-	int64_t val;
 	int i;
 
 	/*
@@ -201,12 +442,29 @@ static void __drm_ctm_to_dc_matrix(const struct drm_color_ctm *ctm,
 		}
 
 		/* gamut_remap_matrix[i] = ctm[i - floor(i/4)] */
-		val = ctm->matrix[i - (i / 4)];
-		/* If negative, convert to 2's complement. */
-		if (val & (1ULL << 63))
-			val = -(val & ~(1ULL << 63));
+		matrix[i] = amdgpu_dm_fixpt_from_s3132(ctm->matrix[i - (i / 4)]);
+	}
+}
 
-		matrix[i].value = val;
+/**
+ * __drm_ctm_3x4_to_dc_matrix - converts a DRM CTM 3x4 to a DC CSC float matrix
+ * @ctm: DRM color transformation matrix with 3x4 dimensions
+ * @matrix: DC CSC float matrix
+ *
+ * The matrix needs to be a 3x4 (12 entry) matrix.
+ */
+static void __drm_ctm_3x4_to_dc_matrix(const struct drm_color_ctm_3x4 *ctm,
+				       struct fixed31_32 *matrix)
+{
+	int i;
+
+	/* The format provided is S31.32, using signed-magnitude representation.
+	 * Our fixed31_32 is also S31.32, but is using 2's complement. We have
+	 * to convert from signed-magnitude to 2's complement.
+	 */
+	for (i = 0; i < 12; i++) {
+		/* gamut_remap_matrix[i] = ctm[i - floor(i/4)] */
+		matrix[i] = amdgpu_dm_fixpt_from_s3132(ctm->matrix[i]);
 	}
 }
 
@@ -268,16 +526,18 @@ static int __set_output_tf(struct dc_transfer_func *func,
 	struct calculate_buffer cal_buffer = {0};
 	bool res;
 
-	ASSERT(lut && lut_size == MAX_COLOR_LUT_ENTRIES);
-
 	cal_buffer.buffer_index = -1;
 
-	gamma = dc_create_gamma();
-	if (!gamma)
-		return -ENOMEM;
+	if (lut_size) {
+		ASSERT(lut && lut_size == MAX_COLOR_LUT_ENTRIES);
 
-	gamma->num_entries = lut_size;
-	__drm_lut_to_dc_gamma(lut, gamma, false);
+		gamma = dc_create_gamma();
+		if (!gamma)
+			return -ENOMEM;
+
+		gamma->num_entries = lut_size;
+		__drm_lut_to_dc_gamma(lut, gamma, false);
+	}
 
 	if (func->tf == TRANSFER_FUNCTION_LINEAR) {
 		/*
@@ -285,27 +545,68 @@ static int __set_output_tf(struct dc_transfer_func *func,
 		 * on top of a linear input. But degamma params can be used
 		 * instead to simulate this.
 		 */
-		gamma->type = GAMMA_CUSTOM;
+		if (gamma)
+			gamma->type = GAMMA_CUSTOM;
 		res = mod_color_calculate_degamma_params(NULL, func,
-							gamma, true);
+							 gamma, gamma != NULL);
 	} else {
 		/*
 		 * Assume sRGB. The actual mapping will depend on whether the
 		 * input was legacy or not.
 		 */
-		gamma->type = GAMMA_CS_TFM_1D;
-		res = mod_color_calculate_regamma_params(func, gamma, false,
+		if (gamma)
+			gamma->type = GAMMA_CS_TFM_1D;
+		res = mod_color_calculate_regamma_params(func, gamma, gamma != NULL,
 							 has_rom, NULL, &cal_buffer);
 	}
 
-	dc_gamma_release(&gamma);
+	if (gamma)
+		dc_gamma_release(&gamma);
 
 	return res ? 0 : -ENOMEM;
 }
 
+static int amdgpu_dm_set_atomic_regamma(struct dc_stream_state *stream,
+					const struct drm_color_lut *regamma_lut,
+					uint32_t regamma_size, bool has_rom,
+					enum dc_transfer_func_predefined tf)
+{
+	struct dc_transfer_func *out_tf = stream->out_transfer_func;
+	int ret = 0;
+
+	if (regamma_size || tf != TRANSFER_FUNCTION_LINEAR) {
+		/*
+		 * CRTC RGM goes into RGM LUT.
+		 *
+		 * Note: there is no implicit sRGB regamma here. We are using
+		 * degamma calculation from color module to calculate the curve
+		 * from a linear base if gamma TF is not set. However, if gamma
+		 * TF (!= Linear) and LUT are set at the same time, we will use
+		 * regamma calculation, and the color module will combine the
+		 * pre-defined TF and the custom LUT values into the LUT that's
+		 * actually programmed.
+		 */
+		out_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
+		out_tf->tf = tf;
+		out_tf->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE;
+
+		ret = __set_output_tf(out_tf, regamma_lut, regamma_size, has_rom);
+	} else {
+		/*
+		 * No CRTC RGM means we can just put the block into bypass
+		 * since we don't have any plane level adjustments using it.
+		 */
+		out_tf->type = TF_TYPE_BYPASS;
+		out_tf->tf = TRANSFER_FUNCTION_LINEAR;
+	}
+
+	return ret;
+}
+
 /**
  * __set_input_tf - calculates the input transfer function based on expected
  * input space.
+ * @caps: dc color capabilities
  * @func: transfer function
  * @lut: lookup table that defines the color space
  * @lut_size: size of respective lut.
@@ -313,27 +614,240 @@ static int __set_output_tf(struct dc_transfer_func *func,
  * Returns:
  * 0 in case of success. -ENOMEM if fails.
  */
-static int __set_input_tf(struct dc_transfer_func *func,
+static int __set_input_tf(struct dc_color_caps *caps, struct dc_transfer_func *func,
 			  const struct drm_color_lut *lut, uint32_t lut_size)
 {
 	struct dc_gamma *gamma = NULL;
 	bool res;
 
-	gamma = dc_create_gamma();
-	if (!gamma)
-		return -ENOMEM;
+	if (lut_size) {
+		gamma = dc_create_gamma();
+		if (!gamma)
+			return -ENOMEM;
 
-	gamma->type = GAMMA_CUSTOM;
-	gamma->num_entries = lut_size;
+		gamma->type = GAMMA_CUSTOM;
+		gamma->num_entries = lut_size;
+
+		__drm_lut_to_dc_gamma(lut, gamma, false);
+	}
 
-	__drm_lut_to_dc_gamma(lut, gamma, false);
+	res = mod_color_calculate_degamma_params(caps, func, gamma, gamma != NULL);
 
-	res = mod_color_calculate_degamma_params(NULL, func, gamma, true);
-	dc_gamma_release(&gamma);
+	if (gamma)
+		dc_gamma_release(&gamma);
 
 	return res ? 0 : -ENOMEM;
 }
 
+static enum dc_transfer_func_predefined
+amdgpu_tf_to_dc_tf(enum amdgpu_transfer_function tf)
+{
+	switch (tf) {
+	default:
+	case AMDGPU_TRANSFER_FUNCTION_DEFAULT:
+	case AMDGPU_TRANSFER_FUNCTION_IDENTITY:
+		return TRANSFER_FUNCTION_LINEAR;
+	case AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF:
+	case AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF:
+		return TRANSFER_FUNCTION_SRGB;
+	case AMDGPU_TRANSFER_FUNCTION_BT709_OETF:
+	case AMDGPU_TRANSFER_FUNCTION_BT709_INV_OETF:
+		return TRANSFER_FUNCTION_BT709;
+	case AMDGPU_TRANSFER_FUNCTION_PQ_EOTF:
+	case AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF:
+		return TRANSFER_FUNCTION_PQ;
+	case AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF:
+	case AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF:
+		return TRANSFER_FUNCTION_GAMMA22;
+	case AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF:
+	case AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF:
+		return TRANSFER_FUNCTION_GAMMA24;
+	case AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF:
+	case AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF:
+		return TRANSFER_FUNCTION_GAMMA26;
+	}
+}
+
+static void __to_dc_lut3d_color(struct dc_rgb *rgb,
+				const struct drm_color_lut lut,
+				int bit_precision)
+{
+	rgb->red = drm_color_lut_extract(lut.red, bit_precision);
+	rgb->green = drm_color_lut_extract(lut.green, bit_precision);
+	rgb->blue  = drm_color_lut_extract(lut.blue, bit_precision);
+}
+
+static void __drm_3dlut_to_dc_3dlut(const struct drm_color_lut *lut,
+				    uint32_t lut3d_size,
+				    struct tetrahedral_params *params,
+				    bool use_tetrahedral_9,
+				    int bit_depth)
+{
+	struct dc_rgb *lut0;
+	struct dc_rgb *lut1;
+	struct dc_rgb *lut2;
+	struct dc_rgb *lut3;
+	int lut_i, i;
+
+
+	if (use_tetrahedral_9) {
+		lut0 = params->tetrahedral_9.lut0;
+		lut1 = params->tetrahedral_9.lut1;
+		lut2 = params->tetrahedral_9.lut2;
+		lut3 = params->tetrahedral_9.lut3;
+	} else {
+		lut0 = params->tetrahedral_17.lut0;
+		lut1 = params->tetrahedral_17.lut1;
+		lut2 = params->tetrahedral_17.lut2;
+		lut3 = params->tetrahedral_17.lut3;
+	}
+
+	for (lut_i = 0, i = 0; i < lut3d_size - 4; lut_i++, i += 4) {
+		/*
+		 * We should consider the 3D LUT RGB values are distributed
+		 * along four arrays lut0-3 where the first sizes 1229 and the
+		 * other 1228. The bit depth supported for 3dlut channel is
+		 * 12-bit, but DC also supports 10-bit.
+		 *
+		 * TODO: improve color pipeline API to enable the userspace set
+		 * bit depth and 3D LUT size/stride, as specified by VA-API.
+		 */
+		__to_dc_lut3d_color(&lut0[lut_i], lut[i], bit_depth);
+		__to_dc_lut3d_color(&lut1[lut_i], lut[i + 1], bit_depth);
+		__to_dc_lut3d_color(&lut2[lut_i], lut[i + 2], bit_depth);
+		__to_dc_lut3d_color(&lut3[lut_i], lut[i + 3], bit_depth);
+	}
+	/* lut0 has 1229 points (lut_size/4 + 1) */
+	__to_dc_lut3d_color(&lut0[lut_i], lut[i], bit_depth);
+}
+
+/* amdgpu_dm_atomic_lut3d - set DRM 3D LUT to DC stream
+ * @drm_lut3d: user 3D LUT
+ * @drm_lut3d_size: size of 3D LUT
+ * @lut3d: DC 3D LUT
+ *
+ * Map user 3D LUT data to DC 3D LUT and all necessary bits to program it
+ * on DCN accordingly.
+ */
+static void amdgpu_dm_atomic_lut3d(const struct drm_color_lut *drm_lut3d,
+				   uint32_t drm_lut3d_size,
+				   struct dc_3dlut *lut)
+{
+	if (!drm_lut3d_size) {
+		lut->state.bits.initialized = 0;
+	} else {
+		/* Stride and bit depth are not programmable by API yet.
+		 * Therefore, only supports 17x17x17 3D LUT (12-bit).
+		 */
+		lut->lut_3d.use_tetrahedral_9 = false;
+		lut->lut_3d.use_12bits = true;
+		lut->state.bits.initialized = 1;
+		__drm_3dlut_to_dc_3dlut(drm_lut3d, drm_lut3d_size, &lut->lut_3d,
+					lut->lut_3d.use_tetrahedral_9,
+					MAX_COLOR_3DLUT_BITDEPTH);
+	}
+}
+
+static int amdgpu_dm_atomic_shaper_lut(const struct drm_color_lut *shaper_lut,
+				       bool has_rom,
+				       enum dc_transfer_func_predefined tf,
+				       uint32_t shaper_size,
+				       struct dc_transfer_func *func_shaper)
+{
+	int ret = 0;
+
+	if (shaper_size || tf != TRANSFER_FUNCTION_LINEAR) {
+		/*
+		 * If user shaper LUT is set, we assume a linear color space
+		 * (linearized by degamma 1D LUT or not).
+		 */
+		func_shaper->type = TF_TYPE_DISTRIBUTED_POINTS;
+		func_shaper->tf = tf;
+		func_shaper->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE;
+
+		ret = __set_output_tf(func_shaper, shaper_lut, shaper_size, has_rom);
+	} else {
+		func_shaper->type = TF_TYPE_BYPASS;
+		func_shaper->tf = TRANSFER_FUNCTION_LINEAR;
+	}
+
+	return ret;
+}
+
+static int amdgpu_dm_atomic_blend_lut(const struct drm_color_lut *blend_lut,
+				       bool has_rom,
+				       enum dc_transfer_func_predefined tf,
+				       uint32_t blend_size,
+				       struct dc_transfer_func *func_blend)
+{
+	int ret = 0;
+
+	if (blend_size || tf != TRANSFER_FUNCTION_LINEAR) {
+		/*
+		 * DRM plane gamma LUT or TF means we are linearizing color
+		 * space before blending (similar to degamma programming). As
+		 * we don't have hardcoded curve support, or we use AMD color
+		 * module to fill the parameters that will be translated to HW
+		 * points.
+		 */
+		func_blend->type = TF_TYPE_DISTRIBUTED_POINTS;
+		func_blend->tf = tf;
+		func_blend->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE;
+
+		ret = __set_input_tf(NULL, func_blend, blend_lut, blend_size);
+	} else {
+		func_blend->type = TF_TYPE_BYPASS;
+		func_blend->tf = TRANSFER_FUNCTION_LINEAR;
+	}
+
+	return ret;
+}
+
+/**
+ * amdgpu_dm_verify_lut3d_size - verifies if 3D LUT is supported and if user
+ * shaper and 3D LUTs match the hw supported size
+ * @adev: amdgpu device
+ * @plane_state: the DRM plane state
+ *
+ * Verifies if pre-blending (DPP) 3D LUT is supported by the HW (DCN 2.0 or
+ * newer) and if the user shaper and 3D LUTs match the supported size.
+ *
+ * Returns:
+ * 0 on success. -EINVAL if lut size are invalid.
+ */
+int amdgpu_dm_verify_lut3d_size(struct amdgpu_device *adev,
+				struct drm_plane_state *plane_state)
+{
+	struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
+	const struct drm_color_lut *shaper = NULL, *lut3d = NULL;
+	uint32_t exp_size, size, dim_size = MAX_COLOR_3DLUT_SIZE;
+	bool has_3dlut = adev->dm.dc->caps.color.dpp.hw_3d_lut;
+
+	/* shaper LUT is only available if 3D LUT color caps */
+	exp_size = has_3dlut ? MAX_COLOR_LUT_ENTRIES : 0;
+	shaper = __extract_blob_lut(dm_plane_state->shaper_lut, &size);
+
+	if (shaper && size != exp_size) {
+		drm_dbg(&adev->ddev,
+			"Invalid Shaper LUT size. Should be %u but got %u.\n",
+			exp_size, size);
+		return -EINVAL;
+	}
+
+	/* The number of 3D LUT entries is the dimension size cubed */
+	exp_size = has_3dlut ? dim_size * dim_size * dim_size : 0;
+	lut3d = __extract_blob_lut(dm_plane_state->lut3d, &size);
+
+	if (lut3d && size != exp_size) {
+		drm_dbg(&adev->ddev,
+			"Invalid 3D LUT size. Should be %u but got %u.\n",
+			exp_size, size);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 /**
  * amdgpu_dm_verify_lut_sizes - verifies if DRM luts match the hw supported sizes
  * @crtc_state: the DRM CRTC state
@@ -401,9 +915,12 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
 	const struct drm_color_lut *degamma_lut, *regamma_lut;
 	uint32_t degamma_size, regamma_size;
 	bool has_regamma, has_degamma;
+	enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_LINEAR;
 	bool is_legacy;
 	int r;
 
+	tf = amdgpu_tf_to_dc_tf(crtc->regamma_tf);
+
 	r = amdgpu_dm_verify_lut_sizes(&crtc->base);
 	if (r)
 		return r;
@@ -439,27 +956,23 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
 		crtc->cm_is_degamma_srgb = true;
 		stream->out_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS;
 		stream->out_transfer_func->tf = TRANSFER_FUNCTION_SRGB;
-
+		/*
+		 * Note: although we pass has_rom as parameter here, we never
+		 * actually use ROM because the color module only takes the ROM
+		 * path if transfer_func->type == PREDEFINED.
+		 *
+		 * See more in mod_color_calculate_regamma_params()
+		 */
 		r = __set_legacy_tf(stream->out_transfer_func, regamma_lut,
 				    regamma_size, has_rom);
 		if (r)
 			return r;
-	} else if (has_regamma) {
-		/* If atomic regamma, CRTC RGM goes into RGM LUT. */
-		stream->out_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS;
-		stream->out_transfer_func->tf = TRANSFER_FUNCTION_LINEAR;
-
-		r = __set_output_tf(stream->out_transfer_func, regamma_lut,
-				    regamma_size, has_rom);
+	} else {
+		regamma_size = has_regamma ? regamma_size : 0;
+		r = amdgpu_dm_set_atomic_regamma(stream, regamma_lut,
+						 regamma_size, has_rom, tf);
 		if (r)
 			return r;
-	} else {
-		/*
-		 * No CRTC RGM means we can just put the block into bypass
-		 * since we don't have any plane level adjustments using it.
-		 */
-		stream->out_transfer_func->type = TF_TYPE_BYPASS;
-		stream->out_transfer_func->tf = TRANSFER_FUNCTION_LINEAR;
 	}
 
 	/*
@@ -495,20 +1008,10 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
 	return 0;
 }
 
-/**
- * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC plane.
- * @crtc: amdgpu_dm crtc state
- * @dc_plane_state: target DC surface
- *
- * Update the underlying dc_stream_state's input transfer function (ITF) in
- * preparation for hardware commit. The transfer function used depends on
- * the preparation done on the stream for color management.
- *
- * Returns:
- * 0 on success. -ENOMEM if mem allocation fails.
- */
-int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
-				      struct dc_plane_state *dc_plane_state)
+static int
+map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc,
+			     struct dc_plane_state *dc_plane_state,
+			     struct dc_color_caps *caps)
 {
 	const struct drm_color_lut *degamma_lut;
 	enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB;
@@ -531,8 +1034,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
 						 &degamma_size);
 		ASSERT(degamma_size == MAX_COLOR_LUT_ENTRIES);
 
-		dc_plane_state->in_transfer_func->type =
-			TF_TYPE_DISTRIBUTED_POINTS;
+		dc_plane_state->in_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS;
 
 		/*
 		 * This case isn't fully correct, but also fairly
@@ -564,11 +1066,11 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
 			dc_plane_state->in_transfer_func->tf =
 				TRANSFER_FUNCTION_LINEAR;
 
-		r = __set_input_tf(dc_plane_state->in_transfer_func,
+		r = __set_input_tf(caps, dc_plane_state->in_transfer_func,
 				   degamma_lut, degamma_size);
 		if (r)
 			return r;
-	} else if (crtc->cm_is_degamma_srgb) {
+	} else {
 		/*
 		 * For legacy gamma support we need the regamma input
 		 * in linear space. Assume that the input is sRGB.
@@ -577,14 +1079,209 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
 		dc_plane_state->in_transfer_func->tf = tf;
 
 		if (tf != TRANSFER_FUNCTION_SRGB &&
-		    !mod_color_calculate_degamma_params(NULL,
-			    dc_plane_state->in_transfer_func, NULL, false))
+		    !mod_color_calculate_degamma_params(caps,
+							dc_plane_state->in_transfer_func,
+							NULL, false))
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int
+__set_dm_plane_degamma(struct drm_plane_state *plane_state,
+		       struct dc_plane_state *dc_plane_state,
+		       struct dc_color_caps *color_caps)
+{
+	struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
+	const struct drm_color_lut *degamma_lut;
+	enum amdgpu_transfer_function tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
+	uint32_t degamma_size;
+	bool has_degamma_lut;
+	int ret;
+
+	degamma_lut = __extract_blob_lut(dm_plane_state->degamma_lut,
+					 &degamma_size);
+
+	has_degamma_lut = degamma_lut &&
+			  !__is_lut_linear(degamma_lut, degamma_size);
+
+	tf = dm_plane_state->degamma_tf;
+
+	/* If we don't have plane degamma LUT nor TF to set on DC, we have
+	 * nothing to do here, return.
+	 */
+	if (!has_degamma_lut && tf == AMDGPU_TRANSFER_FUNCTION_DEFAULT)
+		return -EINVAL;
+
+	dc_plane_state->in_transfer_func->tf = amdgpu_tf_to_dc_tf(tf);
+
+	if (has_degamma_lut) {
+		ASSERT(degamma_size == MAX_COLOR_LUT_ENTRIES);
+
+		dc_plane_state->in_transfer_func->type =
+			TF_TYPE_DISTRIBUTED_POINTS;
+
+		ret = __set_input_tf(color_caps, dc_plane_state->in_transfer_func,
+				     degamma_lut, degamma_size);
+		if (ret)
+			return ret;
+       } else {
+		dc_plane_state->in_transfer_func->type =
+			TF_TYPE_PREDEFINED;
+
+		if (!mod_color_calculate_degamma_params(color_caps,
+		    dc_plane_state->in_transfer_func, NULL, false))
 			return -ENOMEM;
-	} else {
-		/* ...Otherwise we can just bypass the DGM block. */
-		dc_plane_state->in_transfer_func->type = TF_TYPE_BYPASS;
-		dc_plane_state->in_transfer_func->tf = TRANSFER_FUNCTION_LINEAR;
+	}
+	return 0;
+}
+
+static int
+amdgpu_dm_plane_set_color_properties(struct drm_plane_state *plane_state,
+				     struct dc_plane_state *dc_plane_state)
+{
+	struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
+	enum amdgpu_transfer_function shaper_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
+	enum amdgpu_transfer_function blend_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
+	const struct drm_color_lut *shaper_lut, *lut3d, *blend_lut;
+	uint32_t shaper_size, lut3d_size, blend_size;
+	int ret;
+
+	dc_plane_state->hdr_mult = amdgpu_dm_fixpt_from_s3132(dm_plane_state->hdr_mult);
+
+	shaper_lut = __extract_blob_lut(dm_plane_state->shaper_lut, &shaper_size);
+	shaper_size = shaper_lut != NULL ? shaper_size : 0;
+	shaper_tf = dm_plane_state->shaper_tf;
+	lut3d = __extract_blob_lut(dm_plane_state->lut3d, &lut3d_size);
+	lut3d_size = lut3d != NULL ? lut3d_size : 0;
+
+	amdgpu_dm_atomic_lut3d(lut3d, lut3d_size, dc_plane_state->lut3d_func);
+	ret = amdgpu_dm_atomic_shaper_lut(shaper_lut, false,
+					  amdgpu_tf_to_dc_tf(shaper_tf),
+					  shaper_size,
+					  dc_plane_state->in_shaper_func);
+	if (ret) {
+		drm_dbg_kms(plane_state->plane->dev,
+			    "setting plane %d shaper LUT failed.\n",
+			    plane_state->plane->index);
+
+		return ret;
+	}
+
+	blend_tf = dm_plane_state->blend_tf;
+	blend_lut = __extract_blob_lut(dm_plane_state->blend_lut, &blend_size);
+	blend_size = blend_lut != NULL ? blend_size : 0;
+
+	ret = amdgpu_dm_atomic_blend_lut(blend_lut, false,
+					 amdgpu_tf_to_dc_tf(blend_tf),
+					 blend_size, dc_plane_state->blend_tf);
+	if (ret) {
+		drm_dbg_kms(plane_state->plane->dev,
+			    "setting plane %d gamma lut failed.\n",
+			    plane_state->plane->index);
+
+		return ret;
 	}
 
 	return 0;
 }
+
+/**
+ * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC plane.
+ * @crtc: amdgpu_dm crtc state
+ * @plane_state: DRM plane state
+ * @dc_plane_state: target DC surface
+ *
+ * Update the underlying dc_stream_state's input transfer function (ITF) in
+ * preparation for hardware commit. The transfer function used depends on
+ * the preparation done on the stream for color management.
+ *
+ * Returns:
+ * 0 on success. -ENOMEM if mem allocation fails.
+ */
+int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
+				      struct drm_plane_state *plane_state,
+				      struct dc_plane_state *dc_plane_state)
+{
+	struct amdgpu_device *adev = drm_to_adev(crtc->base.state->dev);
+	struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
+	struct drm_color_ctm_3x4 *ctm = NULL;
+	struct dc_color_caps *color_caps = NULL;
+	bool has_crtc_cm_degamma;
+	int ret;
+
+	ret = amdgpu_dm_verify_lut3d_size(adev, plane_state);
+	if (ret) {
+		drm_dbg_driver(&adev->ddev, "amdgpu_dm_verify_lut3d_size() failed\n");
+		return ret;
+	}
+
+	if (dc_plane_state->ctx && dc_plane_state->ctx->dc)
+		color_caps = &dc_plane_state->ctx->dc->caps.color;
+
+	/* Initially, we can just bypass the DGM block. */
+	dc_plane_state->in_transfer_func->type = TF_TYPE_BYPASS;
+	dc_plane_state->in_transfer_func->tf = TRANSFER_FUNCTION_LINEAR;
+
+	/* After, we start to update values according to color props */
+	has_crtc_cm_degamma = (crtc->cm_has_degamma || crtc->cm_is_degamma_srgb);
+
+	ret = __set_dm_plane_degamma(plane_state, dc_plane_state, color_caps);
+	if (ret == -ENOMEM)
+		return ret;
+
+	/* We only have one degamma block available (pre-blending) for the
+	 * whole color correction pipeline, so that we can't actually perform
+	 * plane and CRTC degamma at the same time. Explicitly reject atomic
+	 * updates when userspace sets both plane and CRTC degamma properties.
+	 */
+	if (has_crtc_cm_degamma && ret != -EINVAL) {
+		drm_dbg_kms(crtc->base.crtc->dev,
+			    "doesn't support plane and CRTC degamma at the same time\n");
+		return -EINVAL;
+	}
+
+	/* If we are here, it means we don't have plane degamma settings, check
+	 * if we have CRTC degamma waiting for mapping to pre-blending degamma
+	 * block
+	 */
+	if (has_crtc_cm_degamma) {
+		/*
+		 * AMD HW doesn't have post-blending degamma caps. When DRM
+		 * CRTC atomic degamma is set, we maps it to DPP degamma block
+		 * (pre-blending) or, on legacy gamma, we use DPP degamma to
+		 * linearize (implicit degamma) from sRGB/BT709 according to
+		 * the input space.
+		 */
+		ret = map_crtc_degamma_to_dc_plane(crtc, dc_plane_state, color_caps);
+		if (ret)
+			return ret;
+	}
+
+	/* Setup CRTC CTM. */
+	if (dm_plane_state->ctm) {
+		ctm = (struct drm_color_ctm_3x4 *)dm_plane_state->ctm->data;
+		/*
+		 * DCN2 and older don't support both pre-blending and
+		 * post-blending gamut remap. For this HW family, if we have
+		 * the plane and CRTC CTMs simultaneously, CRTC CTM takes
+		 * priority, and we discard plane CTM, as implemented in
+		 * dcn10_program_gamut_remap(). However, DCN3+ has DPP
+		 * (pre-blending) and MPC (post-blending) `gamut remap` blocks;
+		 * therefore, we can program plane and CRTC CTMs together by
+		 * mapping CRTC CTM to MPC and keeping plane CTM setup at DPP,
+		 * as it's done by dcn30_program_gamut_remap().
+		 */
+		__drm_ctm_3x4_to_dc_matrix(ctm, dc_plane_state->gamut_remap_matrix.matrix);
+
+		dc_plane_state->gamut_remap_matrix.enable_remap = true;
+		dc_plane_state->input_csc_color_matrix.enable_adjustment = false;
+	} else {
+		/* Bypass CTM. */
+		dc_plane_state->gamut_remap_matrix.enable_remap = false;
+		dc_plane_state->input_csc_color_matrix.enable_adjustment = false;
+	}
+
+	return amdgpu_dm_plane_set_color_properties(plane_state, dc_plane_state);
+}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c
index 52ecfa746b54..f936a35fa9eb 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c
@@ -326,6 +326,9 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name)
 			if (!connector->state || connector->state->crtc != crtc)
 				continue;
 
+			if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+				continue;
+
 			aconn = to_amdgpu_dm_connector(connector);
 			break;
 		}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
index cb0b48bb2a7d..6e715ef3a556 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
@@ -29,7 +29,6 @@
 #include "dc.h"
 #include "amdgpu.h"
 #include "amdgpu_dm_psr.h"
-#include "amdgpu_dm_replay.h"
 #include "amdgpu_dm_crtc.h"
 #include "amdgpu_dm_plane.h"
 #include "amdgpu_dm_trace.h"
@@ -124,12 +123,7 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work)
 	 * fill_dc_dirty_rects().
 	 */
 	if (vblank_work->stream && vblank_work->stream->link) {
-		/*
-		 * Prioritize replay, instead of psr
-		 */
-		if (vblank_work->stream->link->replay_settings.replay_feature_enabled)
-			amdgpu_dm_replay_enable(vblank_work->stream, false);
-		else if (vblank_work->enable) {
+		if (vblank_work->enable) {
 			if (vblank_work->stream->link->psr_settings.psr_version < DC_PSR_VERSION_SU_1 &&
 			    vblank_work->stream->link->psr_settings.psr_allow_active)
 				amdgpu_dm_psr_disable(vblank_work->stream);
@@ -138,7 +132,6 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work)
 #ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
 			   !amdgpu_dm_crc_window_is_activated(&vblank_work->acrtc->base) &&
 #endif
-			   vblank_work->stream->link->panel_config.psr.disallow_replay &&
 			   vblank_work->acrtc->dm_irq_params.allow_psr_entry) {
 			amdgpu_dm_psr_enable(vblank_work->stream);
 		}
@@ -260,6 +253,7 @@ static struct drm_crtc_state *amdgpu_dm_crtc_duplicate_state(struct drm_crtc *cr
 	state->freesync_config = cur->freesync_config;
 	state->cm_has_degamma = cur->cm_has_degamma;
 	state->cm_is_degamma_srgb = cur->cm_is_degamma_srgb;
+	state->regamma_tf = cur->regamma_tf;
 	state->crc_skip_count = cur->crc_skip_count;
 	state->mpo_requested = cur->mpo_requested;
 	/* TODO Duplicate dc_stream after objects are stream object is flattened */
@@ -296,6 +290,70 @@ static int amdgpu_dm_crtc_late_register(struct drm_crtc *crtc)
 }
 #endif
 
+#ifdef AMD_PRIVATE_COLOR
+/**
+ * dm_crtc_additional_color_mgmt - enable additional color properties
+ * @crtc: DRM CRTC
+ *
+ * This function lets the driver enable post-blending CRTC regamma transfer
+ * function property in addition to DRM CRTC gamma LUT. Default value means
+ * linear transfer function, which is the default CRTC gamma LUT behaviour
+ * without this property.
+ */
+static void
+dm_crtc_additional_color_mgmt(struct drm_crtc *crtc)
+{
+	struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+
+	if (adev->dm.dc->caps.color.mpc.ogam_ram)
+		drm_object_attach_property(&crtc->base,
+					   adev->mode_info.regamma_tf_property,
+					   AMDGPU_TRANSFER_FUNCTION_DEFAULT);
+}
+
+static int
+amdgpu_dm_atomic_crtc_set_property(struct drm_crtc *crtc,
+				   struct drm_crtc_state *state,
+				   struct drm_property *property,
+				   uint64_t val)
+{
+	struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+	struct dm_crtc_state *acrtc_state = to_dm_crtc_state(state);
+
+	if (property == adev->mode_info.regamma_tf_property) {
+		if (acrtc_state->regamma_tf != val) {
+			acrtc_state->regamma_tf = val;
+			acrtc_state->base.color_mgmt_changed |= 1;
+		}
+	} else {
+		drm_dbg_atomic(crtc->dev,
+			       "[CRTC:%d:%s] unknown property [PROP:%d:%s]]\n",
+			       crtc->base.id, crtc->name,
+			       property->base.id, property->name);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+amdgpu_dm_atomic_crtc_get_property(struct drm_crtc *crtc,
+				   const struct drm_crtc_state *state,
+				   struct drm_property *property,
+				   uint64_t *val)
+{
+	struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+	struct dm_crtc_state *acrtc_state = to_dm_crtc_state(state);
+
+	if (property == adev->mode_info.regamma_tf_property)
+		*val = acrtc_state->regamma_tf;
+	else
+		return -EINVAL;
+
+	return 0;
+}
+#endif
+
 /* Implemented only the options currently available for the driver */
 static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = {
 	.reset = amdgpu_dm_crtc_reset_state,
@@ -314,6 +372,10 @@ static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = {
 #if defined(CONFIG_DEBUG_FS)
 	.late_register = amdgpu_dm_crtc_late_register,
 #endif
+#ifdef AMD_PRIVATE_COLOR
+	.atomic_set_property = amdgpu_dm_atomic_crtc_set_property,
+	.atomic_get_property = amdgpu_dm_atomic_crtc_get_property,
+#endif
 };
 
 static void amdgpu_dm_crtc_helper_disable(struct drm_crtc *crtc)
@@ -489,6 +551,9 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm,
 
 	drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES);
 
+#ifdef AMD_PRIVATE_COLOR
+	dm_crtc_additional_color_mgmt(&acrtc->base);
+#endif
 	return 0;
 
 fail:
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
index 13a177d34376..68a846323912 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
@@ -2971,6 +2971,85 @@ static int allow_edp_hotplug_detection_set(void *data, u64 val)
 	return 0;
 }
 
+static int dmub_trace_mask_set(void *data, u64 val)
+{
+	struct amdgpu_device *adev = data;
+	struct dmub_srv *srv = adev->dm.dc->ctx->dmub_srv->dmub;
+	enum dmub_gpint_command cmd;
+	u64 mask = 0xffff;
+	u8 shift = 0;
+	u32 res;
+	int i;
+
+	if (!srv->fw_version)
+		return -EINVAL;
+
+	for (i = 0;  i < 4; i++) {
+		res = (val & mask) >> shift;
+
+		switch (i) {
+		case 0:
+			cmd = DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD0;
+			break;
+		case 1:
+			cmd = DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD1;
+			break;
+		case 2:
+			cmd = DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD2;
+			break;
+		case 3:
+			cmd = DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD3;
+			break;
+		}
+
+		if (!dc_wake_and_execute_gpint(adev->dm.dc->ctx, cmd, res, NULL, DM_DMUB_WAIT_TYPE_WAIT))
+			return -EIO;
+
+		usleep_range(100, 1000);
+
+		mask <<= 16;
+		shift += 16;
+	}
+
+	return 0;
+}
+
+static int dmub_trace_mask_show(void *data, u64 *val)
+{
+	enum dmub_gpint_command cmd = DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD0;
+	struct amdgpu_device *adev = data;
+	struct dmub_srv *srv = adev->dm.dc->ctx->dmub_srv->dmub;
+	u8 shift = 0;
+	u64 raw = 0;
+	u64 res = 0;
+	int i = 0;
+
+	if (!srv->fw_version)
+		return -EINVAL;
+
+	while (i < 4) {
+		uint32_t response;
+
+		if (!dc_wake_and_execute_gpint(adev->dm.dc->ctx, cmd, 0, &response, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
+			return -EIO;
+
+		raw = response;
+		usleep_range(100, 1000);
+
+		cmd++;
+		res |= (raw << shift);
+		shift += 16;
+		i++;
+	}
+
+	*val = res;
+
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(dmub_trace_mask_fops, dmub_trace_mask_show,
+			 dmub_trace_mask_set, "0x%llx\n");
+
 /*
  * Set dmcub trace event IRQ enable or disable.
  * Usage to enable dmcub trace event IRQ: echo 1 > /sys/kernel/debug/dri/0/amdgpu_dm_dmcub_trace_event_en
@@ -3647,12 +3726,16 @@ static int capabilities_show(struct seq_file *m, void *unused)
 	bool mall_supported = dc->caps.mall_size_total;
 	bool subvp_supported = dc->caps.subvp_fw_processing_delay_us;
 	unsigned int mall_in_use = false;
-	unsigned int subvp_in_use = dc->cap_funcs.get_subvp_en(dc, dc->current_state);
+	unsigned int subvp_in_use = false;
+
 	struct hubbub *hubbub = dc->res_pool->hubbub;
 
 	if (hubbub->funcs->get_mall_en)
 		hubbub->funcs->get_mall_en(hubbub, &mall_in_use);
 
+	if (dc->cap_funcs.get_subvp_en)
+		subvp_in_use = dc->cap_funcs.get_subvp_en(dc, dc->current_state);
+
 	seq_printf(m, "mall supported: %s, enabled: %s\n",
 			   mall_supported ? "yes" : "no", mall_in_use ? "yes" : "no");
 	seq_printf(m, "sub-viewport supported: %s, enabled: %s\n",
@@ -3880,6 +3963,9 @@ void dtn_debugfs_init(struct amdgpu_device *adev)
 	debugfs_create_file_unsafe("amdgpu_dm_force_timing_sync", 0644, root,
 				   adev, &force_timing_sync_ops);
 
+	debugfs_create_file_unsafe("amdgpu_dm_dmub_trace_mask", 0644, root,
+				   adev, &dmub_trace_mask_fops);
+
 	debugfs_create_file_unsafe("amdgpu_dm_dmcub_trace_event_en", 0644, root,
 				   adev, &dmcub_trace_event_state_fops);
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index eb6121ad92fd..85b7f58a7f35 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -64,6 +64,12 @@ static void apply_edid_quirks(struct edid *edid, struct dc_edid_caps *edid_caps)
 		DRM_DEBUG_DRIVER("Disabling FAMS on monitor with panel id %X\n", panel_id);
 		edid_caps->panel_patch.disable_fams = true;
 		break;
+	/* Workaround for some monitors that do not clear DPCD 0x317 if FreeSync is unsupported */
+	case drm_edid_encode_panel_id('A', 'U', 'O', 0xA7AB):
+	case drm_edid_encode_panel_id('A', 'U', 'O', 0xE69B):
+		DRM_DEBUG_DRIVER("Clearing DPCD 0x317 on monitor with panel id %X\n", panel_id);
+		edid_caps->panel_patch.remove_sink_ext_caps = true;
+		break;
 	default:
 		return;
 	}
@@ -334,15 +340,14 @@ enum act_return_status dm_helpers_dp_mst_poll_for_allocation_change_trigger(
 	return ACT_SUCCESS;
 }
 
-bool dm_helpers_dp_mst_send_payload_allocation(
+void dm_helpers_dp_mst_send_payload_allocation(
 		struct dc_context *ctx,
-		const struct dc_stream_state *stream,
-		bool enable)
+		const struct dc_stream_state *stream)
 {
 	struct amdgpu_dm_connector *aconnector;
 	struct drm_dp_mst_topology_state *mst_state;
 	struct drm_dp_mst_topology_mgr *mst_mgr;
-	struct drm_dp_mst_atomic_payload *new_payload, old_payload;
+	struct drm_dp_mst_atomic_payload *new_payload;
 	enum mst_progress_status set_flag = MST_ALLOCATE_NEW_PAYLOAD;
 	enum mst_progress_status clr_flag = MST_CLEAR_ALLOCATED_PAYLOAD;
 	int ret = 0;
@@ -350,25 +355,13 @@ bool dm_helpers_dp_mst_send_payload_allocation(
 	aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
 
 	if (!aconnector || !aconnector->mst_root)
-		return false;
+		return;
 
 	mst_mgr = &aconnector->mst_root->mst_mgr;
 	mst_state = to_drm_dp_mst_topology_state(mst_mgr->base.state);
-
 	new_payload = drm_atomic_get_mst_payload_state(mst_state, aconnector->mst_output_port);
 
-	if (!enable) {
-		set_flag = MST_CLEAR_ALLOCATED_PAYLOAD;
-		clr_flag = MST_ALLOCATE_NEW_PAYLOAD;
-	}
-
-	if (enable) {
-		ret = drm_dp_add_payload_part2(mst_mgr, mst_state->base.state, new_payload);
-	} else {
-		dm_helpers_construct_old_payload(mst_mgr, mst_state,
-						 new_payload, &old_payload);
-		drm_dp_remove_payload_part2(mst_mgr, mst_state, &old_payload, new_payload);
-	}
+	ret = drm_dp_add_payload_part2(mst_mgr, mst_state->base.state, new_payload);
 
 	if (ret) {
 		amdgpu_dm_set_mst_status(&aconnector->mst_status,
@@ -379,10 +372,36 @@ bool dm_helpers_dp_mst_send_payload_allocation(
 		amdgpu_dm_set_mst_status(&aconnector->mst_status,
 			clr_flag, false);
 	}
-
-	return true;
 }
 
+void dm_helpers_dp_mst_update_mst_mgr_for_deallocation(
+		struct dc_context *ctx,
+		const struct dc_stream_state *stream)
+{
+	struct amdgpu_dm_connector *aconnector;
+	struct drm_dp_mst_topology_state *mst_state;
+	struct drm_dp_mst_topology_mgr *mst_mgr;
+	struct drm_dp_mst_atomic_payload *new_payload, old_payload;
+	enum mst_progress_status set_flag = MST_CLEAR_ALLOCATED_PAYLOAD;
+	enum mst_progress_status clr_flag = MST_ALLOCATE_NEW_PAYLOAD;
+
+	aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
+
+	if (!aconnector || !aconnector->mst_root)
+		return;
+
+	mst_mgr = &aconnector->mst_root->mst_mgr;
+	mst_state = to_drm_dp_mst_topology_state(mst_mgr->base.state);
+	new_payload = drm_atomic_get_mst_payload_state(mst_state, aconnector->mst_output_port);
+	dm_helpers_construct_old_payload(mst_mgr, mst_state,
+					 new_payload, &old_payload);
+
+	drm_dp_remove_payload_part2(mst_mgr, mst_state, &old_payload, new_payload);
+
+	amdgpu_dm_set_mst_status(&aconnector->mst_status, set_flag, true);
+	amdgpu_dm_set_mst_status(&aconnector->mst_status, clr_flag, false);
+ }
+
 void dm_dtn_log_begin(struct dc_context *ctx,
 	struct dc_log_buffer_ctx *log_ctx)
 {
@@ -960,6 +979,11 @@ int dm_helper_dmub_aux_transfer_sync(
 		struct aux_payload *payload,
 		enum aux_return_code_type *operation_result)
 {
+	if (!link->hpd_status) {
+		*operation_result = AUX_RET_ERROR_HPD_DISCON;
+		return -1;
+	}
+
 	return amdgpu_dm_process_dmub_aux_transfer_sync(ctx, link->link_index, payload,
 			operation_result);
 }
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
index 51467f132c26..3390f0d8420a 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
@@ -711,7 +711,7 @@ static inline int dm_irq_state(struct amdgpu_device *adev,
 {
 	bool st;
 	enum dc_irq_source irq_source;
-
+	struct dc *dc = adev->dm.dc;
 	struct amdgpu_crtc *acrtc = adev->mode_info.crtcs[crtc_id];
 
 	if (!acrtc) {
@@ -729,6 +729,9 @@ static inline int dm_irq_state(struct amdgpu_device *adev,
 
 	st = (state == AMDGPU_IRQ_STATE_ENABLE);
 
+	if (dc && dc->caps.ips_support && dc->idle_optimizations_allowed)
+		dc_allow_idle_optimizations(dc, false);
+
 	dc_interrupt_set(adev->dm.dc, irq_source, st);
 	return 0;
 }
@@ -894,10 +897,15 @@ void amdgpu_dm_hpd_init(struct amdgpu_device *adev)
 
 	drm_connector_list_iter_begin(dev, &iter);
 	drm_for_each_connector_iter(connector, &iter) {
-		struct amdgpu_dm_connector *amdgpu_dm_connector =
-				to_amdgpu_dm_connector(connector);
+		struct amdgpu_dm_connector *amdgpu_dm_connector;
+		const struct dc_link *dc_link;
+
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
 
-		const struct dc_link *dc_link = amdgpu_dm_connector->dc_link;
+		amdgpu_dm_connector = to_amdgpu_dm_connector(connector);
+
+		dc_link = amdgpu_dm_connector->dc_link;
 
 		if (dc_link->irq_source_hpd != DC_IRQ_SOURCE_INVALID) {
 			dc_interrupt_set(adev->dm.dc,
@@ -930,9 +938,14 @@ void amdgpu_dm_hpd_fini(struct amdgpu_device *adev)
 
 	drm_connector_list_iter_begin(dev, &iter);
 	drm_for_each_connector_iter(connector, &iter) {
-		struct amdgpu_dm_connector *amdgpu_dm_connector =
-				to_amdgpu_dm_connector(connector);
-		const struct dc_link *dc_link = amdgpu_dm_connector->dc_link;
+		struct amdgpu_dm_connector *amdgpu_dm_connector;
+		const struct dc_link *dc_link;
+
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
+		amdgpu_dm_connector = to_amdgpu_dm_connector(connector);
+		dc_link = amdgpu_dm_connector->dc_link;
 
 		if (dc_link->irq_source_hpd != DC_IRQ_SOURCE_INVALID) {
 			dc_interrupt_set(adev->dm.dc,
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 3608d520b227..941e96f100f4 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -28,6 +28,7 @@
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_fixed.h>
+#include <drm/drm_edid.h>
 #include "dm_services.h"
 #include "amdgpu.h"
 #include "amdgpu_dm.h"
@@ -45,7 +46,7 @@
 #include "amdgpu_dm_debugfs.h"
 #endif
 
-#include "dc/dcn20/dcn20_resource.h"
+#include "dc/resource/dcn20/dcn20_resource.h"
 
 #define PEAK_FACTOR_X1000 1006
 
@@ -425,8 +426,7 @@ dm_mst_atomic_best_encoder(struct drm_connector *connector,
 {
 	struct drm_connector_state *connector_state = drm_atomic_get_new_connector_state(state,
 											 connector);
-	struct drm_device *dev = connector->dev;
-	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_device *adev = drm_to_adev(connector->dev);
 	struct amdgpu_crtc *acrtc = to_amdgpu_crtc(connector_state->crtc);
 
 	return &adev->dm.mst_encoders[acrtc->crtc_id].base;
@@ -1501,14 +1501,16 @@ int pre_validate_dsc(struct drm_atomic_state *state,
 		int ind = find_crtc_index_in_state_by_stream(state, stream);
 
 		if (ind >= 0) {
+			struct drm_connector *connector;
 			struct amdgpu_dm_connector *aconnector;
 			struct drm_connector_state *drm_new_conn_state;
 			struct dm_connector_state *dm_new_conn_state;
 			struct dm_crtc_state *dm_old_crtc_state;
 
-			aconnector =
+			connector =
 				amdgpu_dm_find_first_crtc_matching_connector(state,
 									     state->crtcs[ind].ptr);
+			aconnector = to_amdgpu_dm_connector(connector);
 			drm_new_conn_state =
 				drm_atomic_get_new_connector_state(state,
 								   &aconnector->base);
@@ -1603,9 +1605,8 @@ enum dc_status dm_dp_mst_is_port_support_mode(
 	struct dc_link_settings cur_link_settings;
 	unsigned int end_to_end_bw_in_kbps = 0;
 	unsigned int upper_link_bw_in_kbps = 0, down_link_bw_in_kbps = 0;
-	unsigned int max_compressed_bw_in_kbps = 0;
 	struct dc_dsc_bw_range bw_range = {0};
-	uint16_t full_pbn = aconnector->mst_output_port->full_pbn;
+	struct dc_dsc_config_options dsc_options = {0};
 
 	/*
 	 * Consider the case with the depth of the mst topology tree is equal or less than 2
@@ -1621,30 +1622,39 @@ enum dc_status dm_dp_mst_is_port_support_mode(
 	   (aconnector->mst_output_port->passthrough_aux ||
 	    aconnector->dsc_aux == &aconnector->mst_output_port->aux)) {
 		cur_link_settings = stream->link->verified_link_cap;
+		upper_link_bw_in_kbps = dc_link_bandwidth_kbps(aconnector->dc_link, &cur_link_settings);
+		down_link_bw_in_kbps = kbps_from_pbn(aconnector->mst_output_port->full_pbn);
 
-		upper_link_bw_in_kbps = dc_link_bandwidth_kbps(aconnector->dc_link,
-							       &cur_link_settings);
-		down_link_bw_in_kbps = kbps_from_pbn(full_pbn);
-
-		/* pick the bottleneck */
-		end_to_end_bw_in_kbps = min(upper_link_bw_in_kbps,
-					    down_link_bw_in_kbps);
-
-		/*
-		 * use the maximum dsc compression bandwidth as the required
-		 * bandwidth for the mode
-		 */
-		max_compressed_bw_in_kbps = bw_range.min_kbps;
+		/* pick the end to end bw bottleneck */
+		end_to_end_bw_in_kbps = min(upper_link_bw_in_kbps, down_link_bw_in_kbps);
 
-		if (end_to_end_bw_in_kbps < max_compressed_bw_in_kbps) {
-			DRM_DEBUG_DRIVER("Mode does not fit into DSC pass-through bandwidth validation\n");
+		if (end_to_end_bw_in_kbps < bw_range.min_kbps) {
+			DRM_DEBUG_DRIVER("maximum dsc compression cannot fit into end-to-end bandwidth\n");
 			return DC_FAIL_BANDWIDTH_VALIDATE;
 		}
+
+		if (end_to_end_bw_in_kbps < bw_range.stream_kbps) {
+			dc_dsc_get_default_config_option(stream->link->dc, &dsc_options);
+			dsc_options.max_target_bpp_limit_override_x16 = aconnector->base.display_info.max_dsc_bpp * 16;
+			if (dc_dsc_compute_config(stream->sink->ctx->dc->res_pool->dscs[0],
+					&stream->sink->dsc_caps.dsc_dec_caps,
+					&dsc_options,
+					end_to_end_bw_in_kbps,
+					&stream->timing,
+					dc_link_get_highest_encoding_format(stream->link),
+					&stream->timing.dsc_cfg)) {
+				stream->timing.flags.DSC = 1;
+				DRM_DEBUG_DRIVER("end-to-end bandwidth require dsc and dsc config found\n");
+			} else {
+				DRM_DEBUG_DRIVER("end-to-end bandwidth require dsc but dsc config not found\n");
+				return DC_FAIL_BANDWIDTH_VALIDATE;
+			}
+		}
 	} else {
 		/* check if mode could be supported within full_pbn */
 		bpp = convert_dc_color_depth_into_bpc(stream->timing.display_color_depth) * 3;
 		pbn = drm_dp_calc_pbn_mode(stream->timing.pix_clk_100hz / 10, bpp << 4);
-		if (pbn > full_pbn)
+		if (pbn > aconnector->mst_output_port->full_pbn)
 			return DC_FAIL_BANDWIDTH_VALIDATE;
 	}
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index 116121e647ca..8a4c40b4c27e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -1337,8 +1337,14 @@ static void amdgpu_dm_plane_drm_plane_reset(struct drm_plane *plane)
 	amdgpu_state = kzalloc(sizeof(*amdgpu_state), GFP_KERNEL);
 	WARN_ON(amdgpu_state == NULL);
 
-	if (amdgpu_state)
-		__drm_atomic_helper_plane_reset(plane, &amdgpu_state->base);
+	if (!amdgpu_state)
+		return;
+
+	__drm_atomic_helper_plane_reset(plane, &amdgpu_state->base);
+	amdgpu_state->degamma_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
+	amdgpu_state->hdr_mult = AMDGPU_HDR_MULT_DEFAULT;
+	amdgpu_state->shaper_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
+	amdgpu_state->blend_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
 }
 
 static struct drm_plane_state *amdgpu_dm_plane_drm_plane_duplicate_state(struct drm_plane *plane)
@@ -1357,6 +1363,27 @@ static struct drm_plane_state *amdgpu_dm_plane_drm_plane_duplicate_state(struct
 		dc_plane_state_retain(dm_plane_state->dc_state);
 	}
 
+	if (old_dm_plane_state->degamma_lut)
+		dm_plane_state->degamma_lut =
+			drm_property_blob_get(old_dm_plane_state->degamma_lut);
+	if (old_dm_plane_state->ctm)
+		dm_plane_state->ctm =
+			drm_property_blob_get(old_dm_plane_state->ctm);
+	if (old_dm_plane_state->shaper_lut)
+		dm_plane_state->shaper_lut =
+			drm_property_blob_get(old_dm_plane_state->shaper_lut);
+	if (old_dm_plane_state->lut3d)
+		dm_plane_state->lut3d =
+			drm_property_blob_get(old_dm_plane_state->lut3d);
+	if (old_dm_plane_state->blend_lut)
+		dm_plane_state->blend_lut =
+			drm_property_blob_get(old_dm_plane_state->blend_lut);
+
+	dm_plane_state->degamma_tf = old_dm_plane_state->degamma_tf;
+	dm_plane_state->hdr_mult = old_dm_plane_state->hdr_mult;
+	dm_plane_state->shaper_tf = old_dm_plane_state->shaper_tf;
+	dm_plane_state->blend_tf = old_dm_plane_state->blend_tf;
+
 	return &dm_plane_state->base;
 }
 
@@ -1424,12 +1451,206 @@ static void amdgpu_dm_plane_drm_plane_destroy_state(struct drm_plane *plane,
 {
 	struct dm_plane_state *dm_plane_state = to_dm_plane_state(state);
 
+	if (dm_plane_state->degamma_lut)
+		drm_property_blob_put(dm_plane_state->degamma_lut);
+	if (dm_plane_state->ctm)
+		drm_property_blob_put(dm_plane_state->ctm);
+	if (dm_plane_state->lut3d)
+		drm_property_blob_put(dm_plane_state->lut3d);
+	if (dm_plane_state->shaper_lut)
+		drm_property_blob_put(dm_plane_state->shaper_lut);
+	if (dm_plane_state->blend_lut)
+		drm_property_blob_put(dm_plane_state->blend_lut);
+
 	if (dm_plane_state->dc_state)
 		dc_plane_state_release(dm_plane_state->dc_state);
 
 	drm_atomic_helper_plane_destroy_state(plane, state);
 }
 
+#ifdef AMD_PRIVATE_COLOR
+static void
+dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm,
+					     struct drm_plane *plane)
+{
+	struct amdgpu_mode_info mode_info = dm->adev->mode_info;
+	struct dpp_color_caps dpp_color_caps = dm->dc->caps.color.dpp;
+
+	/* Check HW color pipeline capabilities on DPP block (pre-blending)
+	 * before exposing related properties.
+	 */
+	if (dpp_color_caps.dgam_ram || dpp_color_caps.gamma_corr) {
+		drm_object_attach_property(&plane->base,
+					   mode_info.plane_degamma_lut_property,
+					   0);
+		drm_object_attach_property(&plane->base,
+					   mode_info.plane_degamma_lut_size_property,
+					   MAX_COLOR_LUT_ENTRIES);
+		drm_object_attach_property(&plane->base,
+					   dm->adev->mode_info.plane_degamma_tf_property,
+					   AMDGPU_TRANSFER_FUNCTION_DEFAULT);
+	}
+	/* HDR MULT is always available */
+	drm_object_attach_property(&plane->base,
+				   dm->adev->mode_info.plane_hdr_mult_property,
+				   AMDGPU_HDR_MULT_DEFAULT);
+
+	/* Only enable plane CTM if both DPP and MPC gamut remap is available. */
+	if (dm->dc->caps.color.mpc.gamut_remap)
+		drm_object_attach_property(&plane->base,
+					   dm->adev->mode_info.plane_ctm_property, 0);
+
+	if (dpp_color_caps.hw_3d_lut) {
+		drm_object_attach_property(&plane->base,
+					   mode_info.plane_shaper_lut_property, 0);
+		drm_object_attach_property(&plane->base,
+					   mode_info.plane_shaper_lut_size_property,
+					   MAX_COLOR_LUT_ENTRIES);
+		drm_object_attach_property(&plane->base,
+					   mode_info.plane_shaper_tf_property,
+					   AMDGPU_TRANSFER_FUNCTION_DEFAULT);
+		drm_object_attach_property(&plane->base,
+					   mode_info.plane_lut3d_property, 0);
+		drm_object_attach_property(&plane->base,
+					   mode_info.plane_lut3d_size_property,
+					   MAX_COLOR_3DLUT_SIZE);
+	}
+
+	if (dpp_color_caps.ogam_ram) {
+		drm_object_attach_property(&plane->base,
+					   mode_info.plane_blend_lut_property, 0);
+		drm_object_attach_property(&plane->base,
+					   mode_info.plane_blend_lut_size_property,
+					   MAX_COLOR_LUT_ENTRIES);
+		drm_object_attach_property(&plane->base,
+					   mode_info.plane_blend_tf_property,
+					   AMDGPU_TRANSFER_FUNCTION_DEFAULT);
+	}
+}
+
+static int
+dm_atomic_plane_set_property(struct drm_plane *plane,
+			     struct drm_plane_state *state,
+			     struct drm_property *property,
+			     uint64_t val)
+{
+	struct dm_plane_state *dm_plane_state = to_dm_plane_state(state);
+	struct amdgpu_device *adev = drm_to_adev(plane->dev);
+	bool replaced = false;
+	int ret;
+
+	if (property == adev->mode_info.plane_degamma_lut_property) {
+		ret = drm_property_replace_blob_from_id(plane->dev,
+							&dm_plane_state->degamma_lut,
+							val, -1,
+							sizeof(struct drm_color_lut),
+							&replaced);
+		dm_plane_state->base.color_mgmt_changed |= replaced;
+		return ret;
+	} else if (property == adev->mode_info.plane_degamma_tf_property) {
+		if (dm_plane_state->degamma_tf != val) {
+			dm_plane_state->degamma_tf = val;
+			dm_plane_state->base.color_mgmt_changed = 1;
+		}
+	} else if (property == adev->mode_info.plane_hdr_mult_property) {
+		if (dm_plane_state->hdr_mult != val) {
+			dm_plane_state->hdr_mult = val;
+			dm_plane_state->base.color_mgmt_changed = 1;
+		}
+	} else if (property == adev->mode_info.plane_ctm_property) {
+		ret = drm_property_replace_blob_from_id(plane->dev,
+							&dm_plane_state->ctm,
+							val,
+							sizeof(struct drm_color_ctm_3x4), -1,
+							&replaced);
+		dm_plane_state->base.color_mgmt_changed |= replaced;
+		return ret;
+	} else if (property == adev->mode_info.plane_shaper_lut_property) {
+		ret = drm_property_replace_blob_from_id(plane->dev,
+							&dm_plane_state->shaper_lut,
+							val, -1,
+							sizeof(struct drm_color_lut),
+							&replaced);
+		dm_plane_state->base.color_mgmt_changed |= replaced;
+		return ret;
+	} else if (property == adev->mode_info.plane_shaper_tf_property) {
+		if (dm_plane_state->shaper_tf != val) {
+			dm_plane_state->shaper_tf = val;
+			dm_plane_state->base.color_mgmt_changed = 1;
+		}
+	} else if (property == adev->mode_info.plane_lut3d_property) {
+		ret = drm_property_replace_blob_from_id(plane->dev,
+							&dm_plane_state->lut3d,
+							val, -1,
+							sizeof(struct drm_color_lut),
+							&replaced);
+		dm_plane_state->base.color_mgmt_changed |= replaced;
+		return ret;
+	} else if (property == adev->mode_info.plane_blend_lut_property) {
+		ret = drm_property_replace_blob_from_id(plane->dev,
+							&dm_plane_state->blend_lut,
+							val, -1,
+							sizeof(struct drm_color_lut),
+							&replaced);
+		dm_plane_state->base.color_mgmt_changed |= replaced;
+		return ret;
+	} else if (property == adev->mode_info.plane_blend_tf_property) {
+		if (dm_plane_state->blend_tf != val) {
+			dm_plane_state->blend_tf = val;
+			dm_plane_state->base.color_mgmt_changed = 1;
+		}
+	} else {
+		drm_dbg_atomic(plane->dev,
+			       "[PLANE:%d:%s] unknown property [PROP:%d:%s]]\n",
+			       plane->base.id, plane->name,
+			       property->base.id, property->name);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+dm_atomic_plane_get_property(struct drm_plane *plane,
+			     const struct drm_plane_state *state,
+			     struct drm_property *property,
+			     uint64_t *val)
+{
+	struct dm_plane_state *dm_plane_state = to_dm_plane_state(state);
+	struct amdgpu_device *adev = drm_to_adev(plane->dev);
+
+	if (property == adev->mode_info.plane_degamma_lut_property) {
+		*val = (dm_plane_state->degamma_lut) ?
+			dm_plane_state->degamma_lut->base.id : 0;
+	} else if (property == adev->mode_info.plane_degamma_tf_property) {
+		*val = dm_plane_state->degamma_tf;
+	} else if (property == adev->mode_info.plane_hdr_mult_property) {
+		*val = dm_plane_state->hdr_mult;
+	} else if (property == adev->mode_info.plane_ctm_property) {
+		*val = (dm_plane_state->ctm) ?
+			dm_plane_state->ctm->base.id : 0;
+	} else 	if (property == adev->mode_info.plane_shaper_lut_property) {
+		*val = (dm_plane_state->shaper_lut) ?
+			dm_plane_state->shaper_lut->base.id : 0;
+	} else if (property == adev->mode_info.plane_shaper_tf_property) {
+		*val = dm_plane_state->shaper_tf;
+	} else 	if (property == adev->mode_info.plane_lut3d_property) {
+		*val = (dm_plane_state->lut3d) ?
+			dm_plane_state->lut3d->base.id : 0;
+	} else 	if (property == adev->mode_info.plane_blend_lut_property) {
+		*val = (dm_plane_state->blend_lut) ?
+			dm_plane_state->blend_lut->base.id : 0;
+	} else if (property == adev->mode_info.plane_blend_tf_property) {
+		*val = dm_plane_state->blend_tf;
+
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+#endif
+
 static const struct drm_plane_funcs dm_plane_funcs = {
 	.update_plane	= drm_atomic_helper_update_plane,
 	.disable_plane	= drm_atomic_helper_disable_plane,
@@ -1438,6 +1659,10 @@ static const struct drm_plane_funcs dm_plane_funcs = {
 	.atomic_duplicate_state = amdgpu_dm_plane_drm_plane_duplicate_state,
 	.atomic_destroy_state = amdgpu_dm_plane_drm_plane_destroy_state,
 	.format_mod_supported = amdgpu_dm_plane_format_mod_supported,
+#ifdef AMD_PRIVATE_COLOR
+	.atomic_set_property = dm_atomic_plane_set_property,
+	.atomic_get_property = dm_atomic_plane_get_property,
+#endif
 };
 
 int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
@@ -1517,6 +1742,9 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
 
 	drm_plane_helper_add(plane, &dm_plane_helper_funcs);
 
+#ifdef AMD_PRIVATE_COLOR
+	dm_atomic_plane_attach_color_mgmt_properties(dm, plane);
+#endif
 	/* Create (reset) the plane state */
 	if (plane->funcs->reset)
 		plane->funcs->reset(plane);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c
index 08ce3bb8f640..1f08c6564c3b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c
@@ -51,6 +51,9 @@ static bool link_supports_psrsu(struct dc_link *link)
 	    !link->dpcd_caps.psr_info.psr2_su_y_granularity_cap)
 		return false;
 
+	if (amdgpu_dc_debug_mask & DC_DISABLE_PSR_SU)
+		return false;
+
 	return dc_dmub_check_min_version(dc->ctx->dmub_srv->dmub);
 }
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c
new file mode 100644
index 000000000000..16e72d623630
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c
@@ -0,0 +1,216 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dm_services_types.h"
+
+#include "amdgpu.h"
+#include "amdgpu_dm.h"
+#include "amdgpu_dm_wb.h"
+#include "amdgpu_display.h"
+#include "dc.h"
+
+#include <drm/drm_edid.h>
+#include <drm/drm_atomic_state_helper.h>
+#include <drm/drm_modeset_helper_vtables.h>
+
+static const u32 amdgpu_dm_wb_formats[] = {
+	DRM_FORMAT_XRGB2101010,
+};
+
+static int amdgpu_dm_wb_encoder_atomic_check(struct drm_encoder *encoder,
+					struct drm_crtc_state *crtc_state,
+					struct drm_connector_state *conn_state)
+{
+	struct drm_framebuffer *fb;
+	const struct drm_display_mode *mode = &crtc_state->mode;
+	bool found = false;
+	uint8_t i;
+
+	if (!conn_state->writeback_job || !conn_state->writeback_job->fb)
+		return 0;
+
+	fb = conn_state->writeback_job->fb;
+	if (fb->width != mode->hdisplay || fb->height != mode->vdisplay) {
+		DRM_DEBUG_KMS("Invalid framebuffer size %ux%u\n",
+			      fb->width, fb->height);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < sizeof(amdgpu_dm_wb_formats) / sizeof(u32); i++) {
+		if (fb->format->format == amdgpu_dm_wb_formats[i])
+			found = true;
+	}
+
+	if (!found) {
+		DRM_DEBUG_KMS("Invalid pixel format %p4cc\n",
+			      &fb->format->format);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+
+static int amdgpu_dm_wb_connector_get_modes(struct drm_connector *connector)
+{
+	struct drm_device *dev = connector->dev;
+
+	return drm_add_modes_noedid(connector, dev->mode_config.max_width,
+				    dev->mode_config.max_height);
+}
+
+static int amdgpu_dm_wb_prepare_job(struct drm_writeback_connector *wb_connector,
+			       struct drm_writeback_job *job)
+{
+	struct amdgpu_framebuffer *afb;
+	struct drm_gem_object *obj;
+	struct amdgpu_device *adev;
+	struct amdgpu_bo *rbo;
+	uint32_t domain;
+	int r;
+
+	if (!job->fb) {
+		DRM_DEBUG_KMS("No FB bound\n");
+		return 0;
+	}
+
+	afb = to_amdgpu_framebuffer(job->fb);
+	obj = job->fb->obj[0];
+	rbo = gem_to_amdgpu_bo(obj);
+	adev = amdgpu_ttm_adev(rbo->tbo.bdev);
+
+	r = amdgpu_bo_reserve(rbo, true);
+	if (r) {
+		dev_err(adev->dev, "fail to reserve bo (%d)\n", r);
+		return r;
+	}
+
+	r = dma_resv_reserve_fences(rbo->tbo.base.resv, 1);
+	if (r) {
+		dev_err(adev->dev, "reserving fence slot failed (%d)\n", r);
+		goto error_unlock;
+	}
+
+	domain = amdgpu_display_supported_domains(adev, rbo->flags);
+
+	r = amdgpu_bo_pin(rbo, domain);
+	if (unlikely(r != 0)) {
+		if (r != -ERESTARTSYS)
+			DRM_ERROR("Failed to pin framebuffer with error %d\n", r);
+		goto error_unlock;
+	}
+
+	r = amdgpu_ttm_alloc_gart(&rbo->tbo);
+	if (unlikely(r != 0)) {
+		DRM_ERROR("%p bind failed\n", rbo);
+		goto error_unpin;
+	}
+
+	amdgpu_bo_unreserve(rbo);
+
+	afb->address = amdgpu_bo_gpu_offset(rbo);
+
+	amdgpu_bo_ref(rbo);
+
+	return 0;
+
+error_unpin:
+	amdgpu_bo_unpin(rbo);
+
+error_unlock:
+	amdgpu_bo_unreserve(rbo);
+	return r;
+}
+
+static void amdgpu_dm_wb_cleanup_job(struct drm_writeback_connector *connector,
+				struct drm_writeback_job *job)
+{
+	struct amdgpu_bo *rbo;
+	int r;
+
+	if (!job->fb)
+		return;
+
+	rbo = gem_to_amdgpu_bo(job->fb->obj[0]);
+	r = amdgpu_bo_reserve(rbo, false);
+	if (unlikely(r)) {
+		DRM_ERROR("failed to reserve rbo before unpin\n");
+		return;
+	}
+
+	amdgpu_bo_unpin(rbo);
+	amdgpu_bo_unreserve(rbo);
+	amdgpu_bo_unref(&rbo);
+}
+
+static const struct drm_encoder_helper_funcs amdgpu_dm_wb_encoder_helper_funcs = {
+	.atomic_check = amdgpu_dm_wb_encoder_atomic_check,
+};
+
+static const struct drm_connector_funcs amdgpu_dm_wb_connector_funcs = {
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.destroy = drm_connector_cleanup,
+	.reset = amdgpu_dm_connector_funcs_reset,
+	.atomic_duplicate_state = amdgpu_dm_connector_atomic_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+};
+
+static const struct drm_connector_helper_funcs amdgpu_dm_wb_conn_helper_funcs = {
+	.get_modes = amdgpu_dm_wb_connector_get_modes,
+	.prepare_writeback_job = amdgpu_dm_wb_prepare_job,
+	.cleanup_writeback_job = amdgpu_dm_wb_cleanup_job,
+};
+
+int amdgpu_dm_wb_connector_init(struct amdgpu_display_manager *dm,
+				struct amdgpu_dm_wb_connector *wbcon,
+				uint32_t link_index)
+{
+	struct dc *dc = dm->dc;
+	struct dc_link *link = dc_get_link_at_index(dc, link_index);
+	int res = 0;
+
+	wbcon->link = link;
+
+	drm_connector_helper_add(&wbcon->base.base, &amdgpu_dm_wb_conn_helper_funcs);
+
+	res = drm_writeback_connector_init(&dm->adev->ddev, &wbcon->base,
+					    &amdgpu_dm_wb_connector_funcs,
+					    &amdgpu_dm_wb_encoder_helper_funcs,
+					    amdgpu_dm_wb_formats,
+					    ARRAY_SIZE(amdgpu_dm_wb_formats),
+					    amdgpu_dm_get_encoder_crtc_mask(dm->adev));
+
+	if (res)
+		return res;
+	/*
+	 * Some of the properties below require access to state, like bpc.
+	 * Allocate some default initial connector state with our reset helper.
+	 */
+	if (wbcon->base.base.funcs->reset)
+		wbcon->base.base.funcs->reset(&wbcon->base.base);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h
new file mode 100644
index 000000000000..13d31c857dee
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __AMDGPU_DM_WB_H__
+#define __AMDGPU_DM_WB_H__
+
+#include <drm/drm_writeback.h>
+
+int amdgpu_dm_wb_connector_init(struct amdgpu_display_manager *dm,
+				struct amdgpu_dm_wb_connector *dm_wbcon,
+				uint32_t link_index);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/Makefile b/drivers/gpu/drm/amd/display/dc/Makefile
index 3a169b78e7e4..7991ae468f75 100644
--- a/drivers/gpu/drm/amd/display/dc/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/Makefile
@@ -22,7 +22,7 @@
 #
 # Makefile for Display Core (dc) component.
 
-DC_LIBS = basics bios dml clk_mgr dce gpio hwss irq link virtual dsc
+DC_LIBS = basics bios dml clk_mgr dce gpio hwss irq link virtual dsc resource optc
 
 ifdef CONFIG_DRM_AMD_DC_FP
 
@@ -34,12 +34,8 @@ DC_LIBS += dcn21
 DC_LIBS += dcn201
 DC_LIBS += dcn30
 DC_LIBS += dcn301
-DC_LIBS += dcn302
-DC_LIBS += dcn303
 DC_LIBS += dcn31
 DC_LIBS += dcn314
-DC_LIBS += dcn315
-DC_LIBS += dcn316
 DC_LIBS += dcn32
 DC_LIBS += dcn321
 DC_LIBS += dcn35
@@ -51,7 +47,6 @@ DC_LIBS += dce120
 
 DC_LIBS += dce112
 DC_LIBS += dce110
-DC_LIBS += dce100
 DC_LIBS += dce80
 
 ifdef CONFIG_DRM_AMD_DC_SI
@@ -65,7 +60,7 @@ AMD_DC = $(addsuffix /Makefile, $(addprefix $(FULL_AMD_DISPLAY_PATH)/dc/,$(DC_LI
 include $(AMD_DC)
 
 DISPLAY_CORE = dc.o dc_stat.o dc_resource.o dc_hw_sequencer.o dc_sink.o \
-dc_surface.o dc_debug.o dc_stream.o dc_link_enc_cfg.o dc_link_exports.o
+dc_surface.o dc_debug.o dc_stream.o dc_link_enc_cfg.o dc_link_exports.o dc_state.o
 
 DISPLAY_CORE += dc_vm_helper.o
 
diff --git a/drivers/gpu/drm/amd/display/dc/basics/conversion.c b/drivers/gpu/drm/amd/display/dc/basics/conversion.c
index e295a839ab47..1090d235086a 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/conversion.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/conversion.c
@@ -103,7 +103,8 @@ void convert_float_matrix(
 
 static uint32_t find_gcd(uint32_t a, uint32_t b)
 {
-	uint32_t remainder = 0;
+	uint32_t remainder;
+
 	while (b != 0) {
 		remainder = a % b;
 		a = b;
diff --git a/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c b/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c
index f2dfa96f9ef5..39530b2ea495 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c
@@ -94,7 +94,7 @@ static void calculate_bandwidth(
 	const uint32_t s_high = 7;
 	const uint32_t dmif_chunk_buff_margin = 1;
 
-	uint32_t max_chunks_fbc_mode;
+	uint32_t max_chunks_fbc_mode = 0;
 	int32_t num_cursor_lines;
 
 	int32_t i, j, k;
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
index 7cdb1a8a0ba0..05f392501c0a 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
@@ -1014,13 +1014,20 @@ static enum bp_result get_ss_info_v4_5(
 		DC_LOG_BIOS("AS_SIGNAL_TYPE_HDMI ss_percentage: %d\n", ss_info->spread_spectrum_percentage);
 		break;
 	case AS_SIGNAL_TYPE_DISPLAY_PORT:
-		ss_info->spread_spectrum_percentage =
+		if (bp->base.integrated_info) {
+			DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", bp->base.integrated_info->gpuclk_ss_percentage);
+			ss_info->spread_spectrum_percentage =
+					bp->base.integrated_info->gpuclk_ss_percentage;
+			ss_info->type.CENTER_MODE =
+					bp->base.integrated_info->gpuclk_ss_type;
+		} else {
+			ss_info->spread_spectrum_percentage =
 				disp_cntl_tbl->dp_ss_percentage;
-		ss_info->spread_spectrum_range =
+			ss_info->spread_spectrum_range =
 				disp_cntl_tbl->dp_ss_rate_10hz * 10;
-		if (disp_cntl_tbl->dp_ss_mode & ATOM_SS_CENTRE_SPREAD_MODE)
-			ss_info->type.CENTER_MODE = true;
-
+			if (disp_cntl_tbl->dp_ss_mode & ATOM_SS_CENTRE_SPREAD_MODE)
+				ss_info->type.CENTER_MODE = true;
+		}
 		DC_LOG_BIOS("AS_SIGNAL_TYPE_DISPLAY_PORT ss_percentage: %d\n", ss_info->spread_spectrum_percentage);
 		break;
 	case AS_SIGNAL_TYPE_GPU_PLL:
@@ -1691,7 +1698,7 @@ static enum bp_result bios_parser_enable_disp_power_gating(
 static enum bp_result bios_parser_enable_lvtma_control(
 	struct dc_bios *dcb,
 	uint8_t uc_pwr_on,
-	uint8_t panel_instance,
+	uint8_t pwrseq_instance,
 	uint8_t bypass_panel_control_wait)
 {
 	struct bios_parser *bp = BP_FROM_DCB(dcb);
@@ -1699,7 +1706,7 @@ static enum bp_result bios_parser_enable_lvtma_control(
 	if (!bp->cmd_tbl.enable_lvtma_control)
 		return BP_RESULT_FAILURE;
 
-	return bp->cmd_tbl.enable_lvtma_control(bp, uc_pwr_on, panel_instance, bypass_panel_control_wait);
+	return bp->cmd_tbl.enable_lvtma_control(bp, uc_pwr_on, pwrseq_instance, bypass_panel_control_wait);
 }
 
 static bool bios_parser_is_accelerated_mode(
@@ -1843,19 +1850,21 @@ static enum bp_result get_firmware_info_v3_2(
 		/* Vega12 */
 		smu_info_v3_2 = GET_IMAGE(struct atom_smu_info_v3_2,
 							DATA_TABLES(smu_info));
-		DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_2->gpuclk_ss_percentage);
 		if (!smu_info_v3_2)
 			return BP_RESULT_BADBIOSTABLE;
 
+		DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_2->gpuclk_ss_percentage);
+
 		info->default_engine_clk = smu_info_v3_2->bootup_dcefclk_10khz * 10;
 	} else if (revision.minor == 3) {
 		/* Vega20 */
 		smu_info_v3_3 = GET_IMAGE(struct atom_smu_info_v3_3,
 							DATA_TABLES(smu_info));
-		DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_3->gpuclk_ss_percentage);
 		if (!smu_info_v3_3)
 			return BP_RESULT_BADBIOSTABLE;
 
+		DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_3->gpuclk_ss_percentage);
+
 		info->default_engine_clk = smu_info_v3_3->bootup_dcefclk_10khz * 10;
 	}
 
@@ -2214,22 +2223,22 @@ static enum bp_result bios_parser_get_disp_connector_caps_info(
 
 	switch (bp->object_info_tbl.revision.minor) {
 	case 4:
-	    default:
-		    object = get_bios_object(bp, object_id);
-
-		    if (!object)
-			    return BP_RESULT_BADINPUT;
-
-		    record = get_disp_connector_caps_record(bp, object);
-		    if (!record)
-			    return BP_RESULT_NORECORD;
-
-		    info->INTERNAL_DISPLAY =
-			    (record->connectcaps & ATOM_CONNECTOR_CAP_INTERNAL_DISPLAY) ? 1 : 0;
-		    info->INTERNAL_DISPLAY_BL =
-			    (record->connectcaps & ATOM_CONNECTOR_CAP_INTERNAL_DISPLAY_BL) ? 1 : 0;
-		    break;
-	    case 5:
+		default:
+			object = get_bios_object(bp, object_id);
+
+			if (!object)
+				return BP_RESULT_BADINPUT;
+
+			record = get_disp_connector_caps_record(bp, object);
+			if (!record)
+				return BP_RESULT_NORECORD;
+
+			info->INTERNAL_DISPLAY =
+				(record->connectcaps & ATOM_CONNECTOR_CAP_INTERNAL_DISPLAY) ? 1 : 0;
+			info->INTERNAL_DISPLAY_BL =
+				(record->connectcaps & ATOM_CONNECTOR_CAP_INTERNAL_DISPLAY_BL) ? 1 : 0;
+			break;
+	case 5:
 		object_path_v3 = get_bios_object_from_path_v3(bp, object_id);
 
 		if (!object_path_v3)
@@ -2391,7 +2400,6 @@ static enum bp_result get_vram_info_v30(
 	return result;
 }
 
-
 /*
  * get_integrated_info_v11
  *
@@ -2416,10 +2424,11 @@ static enum bp_result get_integrated_info_v11(
 	info_v11 = GET_IMAGE(struct atom_integrated_system_info_v1_11,
 					DATA_TABLES(integratedsysteminfo));
 
-	DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v11->gpuclk_ss_percentage);
 	if (info_v11 == NULL)
 		return BP_RESULT_BADBIOSTABLE;
 
+	DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v11->gpuclk_ss_percentage);
+
 	info->gpu_cap_info =
 	le32_to_cpu(info_v11->gpucapinfo);
 	/*
@@ -2631,11 +2640,12 @@ static enum bp_result get_integrated_info_v2_1(
 
 	info_v2_1 = GET_IMAGE(struct atom_integrated_system_info_v2_1,
 					DATA_TABLES(integratedsysteminfo));
-	DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_1->gpuclk_ss_percentage);
 
 	if (info_v2_1 == NULL)
 		return BP_RESULT_BADBIOSTABLE;
 
+	DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_1->gpuclk_ss_percentage);
+
 	info->gpu_cap_info =
 	le32_to_cpu(info_v2_1->gpucapinfo);
 	/*
@@ -2793,11 +2803,11 @@ static enum bp_result get_integrated_info_v2_2(
 	info_v2_2 = GET_IMAGE(struct atom_integrated_system_info_v2_2,
 					DATA_TABLES(integratedsysteminfo));
 
-	DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_2->gpuclk_ss_percentage);
-
 	if (info_v2_2 == NULL)
 		return BP_RESULT_BADBIOSTABLE;
 
+	DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_2->gpuclk_ss_percentage);
+
 	info->gpu_cap_info =
 	le32_to_cpu(info_v2_2->gpucapinfo);
 	/*
@@ -2814,6 +2824,8 @@ static enum bp_result get_integrated_info_v2_2(
 	info->ma_channel_number = info_v2_2->umachannelnumber;
 	info->dp_ss_control =
 		le16_to_cpu(info_v2_2->reserved1);
+	info->gpuclk_ss_percentage = info_v2_2->gpuclk_ss_percentage;
+	info->gpuclk_ss_type = info_v2_2->gpuclk_ss_type;
 
 	for (i = 0; i < NUMBER_OF_UCHAR_FOR_GUID; ++i) {
 		info->ext_disp_conn_info.gu_id[i] =
@@ -3323,27 +3335,28 @@ static enum bp_result get_bracket_layout_record(
 		DC_LOG_DETECTION_EDID_PARSER("Invalid slot_layout_info\n");
 		return BP_RESULT_BADINPUT;
 	}
+
 	tbl = &bp->object_info_tbl;
 	v1_4 = tbl->v1_4;
 	v1_5 = tbl->v1_5;
 
 	result = BP_RESULT_NORECORD;
 	switch (bp->object_info_tbl.revision.minor) {
-		case 4:
-		default:
-			for (i = 0; i < v1_4->number_of_path; ++i)	{
-				if (bracket_layout_id ==
-					v1_4->display_path[i].display_objid) {
-					result = update_slot_layout_info(dcb, i, slot_layout_info);
-					break;
-				}
+	case 4:
+	default:
+		for (i = 0; i < v1_4->number_of_path; ++i) {
+			if (bracket_layout_id == v1_4->display_path[i].display_objid) {
+				result = update_slot_layout_info(dcb, i, slot_layout_info);
+				break;
 			}
-		    break;
-		case 5:
-			for (i = 0; i < v1_5->number_of_path; ++i)
-				result = update_slot_layout_info_v2(dcb, i, slot_layout_info);
-			break;
+		}
+		break;
+	case 5:
+		for (i = 0; i < v1_5->number_of_path; ++i)
+			result = update_slot_layout_info_v2(dcb, i, slot_layout_info);
+		break;
 	}
+
 	return result;
 }
 
@@ -3352,9 +3365,7 @@ static enum bp_result bios_get_board_layout_info(
 	struct board_layout_info *board_layout_info)
 {
 	unsigned int i;
-
 	struct bios_parser *bp;
-
 	static enum bp_result record_result;
 	unsigned int max_slots;
 
@@ -3364,7 +3375,6 @@ static enum bp_result bios_get_board_layout_info(
 		0, 0
 	};
 
-
 	bp = BP_FROM_DCB(dcb);
 
 	if (board_layout_info == NULL) {
@@ -3545,7 +3555,6 @@ static const struct dc_vbios_funcs vbios_funcs = {
 	.bios_parser_destroy = firmware_parser_destroy,
 
 	.get_board_layout_info = bios_get_board_layout_info,
-	/* TODO: use this fn in hw init?*/
 	.pack_data_tables = bios_parser_pack_data_tables,
 
 	.get_atom_dc_golden_table = bios_get_atom_dc_golden_table,
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
index 90a02d7bd3da..293a919d605d 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
@@ -123,7 +123,7 @@ static void encoder_control_dmcub(
 		sizeof(cmd.digx_encoder_control.header);
 	cmd.digx_encoder_control.encoder_control.dig.stream_param = *dig;
 
-	dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 static enum bp_result encoder_control_digx_v1_5(
@@ -259,7 +259,7 @@ static void transmitter_control_dmcub(
 		sizeof(cmd.dig1_transmitter_control.header);
 	cmd.dig1_transmitter_control.transmitter_control.dig = *dig;
 
-	dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 static enum bp_result transmitter_control_v1_6(
@@ -321,7 +321,7 @@ static void transmitter_control_dmcub_v1_7(
 		sizeof(cmd.dig1_transmitter_control.header);
 	cmd.dig1_transmitter_control.transmitter_control.dig_v1_7 = *dig;
 
-	dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 static enum bp_result transmitter_control_v1_7(
@@ -429,7 +429,7 @@ static void set_pixel_clock_dmcub(
 		sizeof(cmd.set_pixel_clock.header);
 	cmd.set_pixel_clock.pixel_clock.clk = *clk;
 
-	dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 static enum bp_result set_pixel_clock_v7(
@@ -796,7 +796,7 @@ static void enable_disp_power_gating_dmcub(
 		sizeof(cmd.enable_disp_power_gating.header);
 	cmd.enable_disp_power_gating.power_gating.pwr = *pwr;
 
-	dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 static enum bp_result enable_disp_power_gating_v2_1(
@@ -976,7 +976,7 @@ static unsigned int get_smu_clock_info_v3_1(struct bios_parser *bp, uint8_t id)
 static enum bp_result enable_lvtma_control(
 	struct bios_parser *bp,
 	uint8_t uc_pwr_on,
-	uint8_t panel_instance,
+	uint8_t pwrseq_instance,
 	uint8_t bypass_panel_control_wait);
 
 static void init_enable_lvtma_control(struct bios_parser *bp)
@@ -989,7 +989,7 @@ static void init_enable_lvtma_control(struct bios_parser *bp)
 static void enable_lvtma_control_dmcub(
 	struct dc_dmub_srv *dmcub,
 	uint8_t uc_pwr_on,
-	uint8_t panel_instance,
+	uint8_t pwrseq_instance,
 	uint8_t bypass_panel_control_wait)
 {
 
@@ -1002,17 +1002,17 @@ static void enable_lvtma_control_dmcub(
 			DMUB_CMD__VBIOS_LVTMA_CONTROL;
 	cmd.lvtma_control.data.uc_pwr_action =
 			uc_pwr_on;
-	cmd.lvtma_control.data.panel_inst =
-			panel_instance;
+	cmd.lvtma_control.data.pwrseq_inst =
+			pwrseq_instance;
 	cmd.lvtma_control.data.bypass_panel_control_wait =
 			bypass_panel_control_wait;
-	dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 static enum bp_result enable_lvtma_control(
 	struct bios_parser *bp,
 	uint8_t uc_pwr_on,
-	uint8_t panel_instance,
+	uint8_t pwrseq_instance,
 	uint8_t bypass_panel_control_wait)
 {
 	enum bp_result result = BP_RESULT_FAILURE;
@@ -1021,7 +1021,7 @@ static enum bp_result enable_lvtma_control(
 	    bp->base.ctx->dc->debug.dmub_command_table) {
 		enable_lvtma_control_dmcub(bp->base.ctx->dmub_srv,
 				uc_pwr_on,
-				panel_instance,
+				pwrseq_instance,
 				bypass_panel_control_wait);
 		return BP_RESULT_OK;
 	}
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.h b/drivers/gpu/drm/amd/display/dc/bios/command_table2.h
index b6d09bf6cf72..41c8c014397f 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.h
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.h
@@ -96,7 +96,7 @@ struct cmd_tbl {
 			struct bios_parser *bp, uint8_t id);
 	enum bp_result (*enable_lvtma_control)(struct bios_parser *bp,
 			uint8_t uc_pwr_on,
-			uint8_t panel_instance,
+			uint8_t pwrseq_instance,
 			uint8_t bypass_panel_control_wait);
 };
 
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
index 3e73c4e59d40..28a2a837d2f0 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
@@ -29,6 +29,7 @@
 #include "dc_types.h"
 #include "dccg.h"
 #include "clk_mgr_internal.h"
+#include "dc_state_priv.h"
 #include "link.h"
 
 #include "dce100/dce_clk_mgr.h"
@@ -63,7 +64,7 @@ int clk_mgr_helper_get_active_display_cnt(
 		/* Don't count SubVP phantom pipes as part of active
 		 * display count
 		 */
-		if (stream->mall_stream_config.type == SUBVP_PHANTOM)
+		if (dc_state_get_stream_subvp_type(context, stream) == SUBVP_PHANTOM)
 			continue;
 
 		/*
@@ -368,7 +369,7 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p
 	}
 	break;
 
-#endif /* CONFIG_DRM_AMD_DC_FP - Family RV */
+#endif	/* CONFIG_DRM_AMD_DC_FP */
 	default:
 		ASSERT(0); /* Unknown Asic */
 		break;
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c
index a5489fe6875f..aa9fd1dc550a 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c
@@ -546,6 +546,8 @@ static unsigned int find_dcfclk_for_voltage(const struct vg_dpm_clocks *clock_ta
 	int i;
 
 	for (i = 0; i < VG_NUM_SOC_VOLTAGE_LEVELS; i++) {
+		if (i >= VG_NUM_DCFCLK_DPM_LEVELS)
+			break;
 		if (clock_table->SocVoltage[i] == voltage)
 			return clock_table->DcfClocks[i];
 	}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
index 3db4ef564b99..ce1386e22576 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
@@ -253,7 +253,7 @@ void dcn31_update_clocks(struct clk_mgr *clk_mgr_base,
 	cmd.notify_clocks.clocks.dispclk_khz = clk_mgr_base->clks.dispclk_khz;
 	cmd.notify_clocks.clocks.dppclk_khz = clk_mgr_base->clks.dppclk_khz;
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr)
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
index 7326b7565846..a84f1e376dee 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
@@ -87,6 +87,20 @@ static const struct IP_BASE CLK_BASE = { { { { 0x00016C00, 0x02401800, 0, 0, 0,
 #define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK	0x0000F000L
 #define CLK1_CLK_PLL_REQ__FbMult_frac_MASK	0xFFFF0000L
 
+#define regCLK1_CLK2_BYPASS_CNTL			0x029c
+#define regCLK1_CLK2_BYPASS_CNTL_BASE_IDX	0
+
+#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL__SHIFT	0x0
+#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV__SHIFT	0x10
+#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK		0x00000007L
+#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV_MASK		0x000F0000L
+
+#define regCLK6_0_CLK6_spll_field_8				0x464b
+#define regCLK6_0_CLK6_spll_field_8_BASE_IDX	0
+
+#define CLK6_0_CLK6_spll_field_8__spll_ssc_en__SHIFT	0xd
+#define CLK6_0_CLK6_spll_field_8__spll_ssc_en_MASK		0x00002000L
+
 #define REG(reg_name) \
 	(CLK_BASE.instance[0].segment[reg ## reg_name ## _BASE_IDX] + reg ## reg_name)
 
@@ -131,35 +145,63 @@ static int dcn314_get_active_display_cnt_wa(
 	return display_count;
 }
 
-static void dcn314_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool disable)
+static void dcn314_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context,
+				  bool safe_to_lower, bool disable)
 {
 	struct dc *dc = clk_mgr_base->ctx->dc;
 	int i;
 
 	for (i = 0; i < dc->res_pool->pipe_count; ++i) {
-		struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+		struct pipe_ctx *pipe = safe_to_lower
+			? &context->res_ctx.pipe_ctx[i]
+			: &dc->current_state->res_ctx.pipe_ctx[i];
 
 		if (pipe->top_pipe || pipe->prev_odm_pipe)
 			continue;
 		if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal))) {
-			struct stream_encoder *stream_enc = pipe->stream_res.stream_enc;
-
 			if (disable) {
-				if (stream_enc && stream_enc->funcs->disable_fifo)
-					pipe->stream_res.stream_enc->funcs->disable_fifo(stream_enc);
+				if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc)
+					pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
 
-				pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
 				reset_sync_context_for_pipe(dc, context, i);
 			} else {
 				pipe->stream_res.tg->funcs->enable_crtc(pipe->stream_res.tg);
-
-				if (stream_enc && stream_enc->funcs->enable_fifo)
-					pipe->stream_res.stream_enc->funcs->enable_fifo(stream_enc);
 			}
 		}
 	}
 }
 
+bool dcn314_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base)
+{
+	struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+	uint32_t ssc_enable;
+
+	REG_GET(CLK6_0_CLK6_spll_field_8, spll_ssc_en, &ssc_enable);
+
+	return ssc_enable == 1;
+}
+
+void dcn314_init_clocks(struct clk_mgr *clk_mgr)
+{
+	struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr);
+	uint32_t ref_dtbclk = clk_mgr->clks.ref_dtbclk_khz;
+
+	memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks));
+	// Assumption is that boot state always supports pstate
+	clk_mgr->clks.ref_dtbclk_khz = ref_dtbclk;	// restore ref_dtbclk
+	clk_mgr->clks.p_state_change_support = true;
+	clk_mgr->clks.prev_p_state_change_support = true;
+	clk_mgr->clks.pwr_state = DCN_PWR_STATE_UNKNOWN;
+	clk_mgr->clks.zstate_support = DCN_ZSTATE_SUPPORT_UNKNOWN;
+
+	// to adjust dp_dto reference clock if ssc is enable otherwise to apply dprefclk
+	if (dcn314_is_spll_ssc_enabled(clk_mgr))
+		clk_mgr->dp_dto_source_clock_in_khz =
+			dce_adjust_dp_ref_freq_for_ss(clk_mgr_int, clk_mgr->dprefclk_khz);
+	else
+		clk_mgr->dp_dto_source_clock_in_khz = clk_mgr->dprefclk_khz;
+}
+
 void dcn314_update_clocks(struct clk_mgr *clk_mgr_base,
 			struct dc_state *context,
 			bool safe_to_lower)
@@ -252,11 +294,11 @@ void dcn314_update_clocks(struct clk_mgr *clk_mgr_base,
 	}
 
 	if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) {
-		dcn314_disable_otg_wa(clk_mgr_base, context, true);
+		dcn314_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true);
 
 		clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz;
 		dcn314_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz);
-		dcn314_disable_otg_wa(clk_mgr_base, context, false);
+		dcn314_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false);
 
 		update_dispclk = true;
 	}
@@ -284,7 +326,7 @@ void dcn314_update_clocks(struct clk_mgr *clk_mgr_base,
 	cmd.notify_clocks.clocks.dispclk_khz = clk_mgr_base->clks.dispclk_khz;
 	cmd.notify_clocks.clocks.dppclk_khz = clk_mgr_base->clks.dppclk_khz;
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr)
@@ -436,6 +478,11 @@ static DpmClocks314_t dummy_clocks;
 
 static struct dcn314_watermarks dummy_wms = { 0 };
 
+static struct dcn314_ss_info_table ss_info_table = {
+	.ss_divider = 1000,
+	.ss_percentage = {0, 0, 375, 375, 375}
+};
+
 static void dcn314_build_watermark_ranges(struct clk_bw_params *bw_params, struct dcn314_watermarks *table)
 {
 	int i, num_valid_sets;
@@ -708,13 +755,31 @@ static struct clk_mgr_funcs dcn314_funcs = {
 	.get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
 	.get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz,
 	.update_clocks = dcn314_update_clocks,
-	.init_clocks = dcn31_init_clocks,
+	.init_clocks = dcn314_init_clocks,
 	.enable_pme_wa = dcn314_enable_pme_wa,
 	.are_clock_states_equal = dcn314_are_clock_states_equal,
 	.notify_wm_ranges = dcn314_notify_wm_ranges
 };
 extern struct clk_mgr_funcs dcn3_fpga_funcs;
 
+static void dcn314_read_ss_info_from_lut(struct clk_mgr_internal *clk_mgr)
+{
+	uint32_t clock_source;
+	//uint32_t ssc_enable;
+
+	REG_GET(CLK1_CLK2_BYPASS_CNTL, CLK2_BYPASS_SEL, &clock_source);
+	//REG_GET(CLK6_0_CLK6_spll_field_8, spll_ssc_en, &ssc_enable);
+
+	if (dcn314_is_spll_ssc_enabled(&clk_mgr->base) && (clock_source < ARRAY_SIZE(ss_info_table.ss_percentage))) {
+		clk_mgr->dprefclk_ss_percentage = ss_info_table.ss_percentage[clock_source];
+
+		if (clk_mgr->dprefclk_ss_percentage != 0) {
+			clk_mgr->ss_on_dprefclk = true;
+			clk_mgr->dprefclk_ss_divider = ss_info_table.ss_divider;
+		}
+	}
+}
+
 void dcn314_clk_mgr_construct(
 		struct dc_context *ctx,
 		struct clk_mgr_dcn314 *clk_mgr,
@@ -782,6 +847,7 @@ void dcn314_clk_mgr_construct(
 	clk_mgr->base.base.dprefclk_khz = 600000;
 	clk_mgr->base.base.clks.ref_dtbclk_khz = 600000;
 	dce_clock_read_ss_info(&clk_mgr->base);
+	dcn314_read_ss_info_from_lut(&clk_mgr->base);
 	/*if bios enabled SS, driver needs to adjust dtb clock, only enable with correct bios*/
 
 	clk_mgr->base.base.bw_params = &dcn314_bw_params;
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h
index 171f84340eb2..002c28e80720 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h
@@ -28,6 +28,8 @@
 #define __DCN314_CLK_MGR_H__
 #include "clk_mgr_internal.h"
 
+#define DCN314_NUM_CLOCK_SOURCES   5
+
 struct dcn314_watermarks;
 
 struct dcn314_smu_watermark_set {
@@ -40,9 +42,18 @@ struct clk_mgr_dcn314 {
 	struct dcn314_smu_watermark_set smu_wm_set;
 };
 
+struct dcn314_ss_info_table {
+	uint32_t ss_divider;
+	uint32_t ss_percentage[DCN314_NUM_CLOCK_SOURCES];
+};
+
 bool dcn314_are_clock_states_equal(struct dc_clocks *a,
 		struct dc_clocks *b);
 
+bool dcn314_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base);
+
+void dcn314_init_clocks(struct clk_mgr *clk_mgr);
+
 void dcn314_update_clocks(struct clk_mgr *clk_mgr_base,
 			struct dc_state *context,
 			bool safe_to_lower);
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c
index b2c4f97afc8b..644da4637320 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c
@@ -232,7 +232,7 @@ static void dcn315_update_clocks(struct clk_mgr *clk_mgr_base,
 	cmd.notify_clocks.clocks.dispclk_khz = clk_mgr_base->clks.dispclk_khz;
 	cmd.notify_clocks.clocks.dppclk_khz = clk_mgr_base->clks.dppclk_khz;
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 static void dcn315_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass,
@@ -334,7 +334,7 @@ static struct wm_table lpddr5_wm_table = {
 		{
 			.wm_inst = WM_A,
 			.wm_type = WM_TYPE_PSTATE_CHG,
-			.pstate_latency_us = 11.65333,
+			.pstate_latency_us = 129.0,
 			.sr_exit_time_us = 11.5,
 			.sr_enter_plus_exit_time_us = 14.5,
 			.valid = true,
@@ -342,7 +342,7 @@ static struct wm_table lpddr5_wm_table = {
 		{
 			.wm_inst = WM_B,
 			.wm_type = WM_TYPE_PSTATE_CHG,
-			.pstate_latency_us = 11.65333,
+			.pstate_latency_us = 129.0,
 			.sr_exit_time_us = 11.5,
 			.sr_enter_plus_exit_time_us = 14.5,
 			.valid = true,
@@ -350,7 +350,7 @@ static struct wm_table lpddr5_wm_table = {
 		{
 			.wm_inst = WM_C,
 			.wm_type = WM_TYPE_PSTATE_CHG,
-			.pstate_latency_us = 11.65333,
+			.pstate_latency_us = 129.0,
 			.sr_exit_time_us = 11.5,
 			.sr_enter_plus_exit_time_us = 14.5,
 			.valid = true,
@@ -358,7 +358,7 @@ static struct wm_table lpddr5_wm_table = {
 		{
 			.wm_inst = WM_D,
 			.wm_type = WM_TYPE_PSTATE_CHG,
-			.pstate_latency_us = 11.65333,
+			.pstate_latency_us = 129.0,
 			.sr_exit_time_us = 11.5,
 			.sr_enter_plus_exit_time_us = 14.5,
 			.valid = true,
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
index 09151cc56ce4..12f3e8aa46d8 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
@@ -239,7 +239,7 @@ static void dcn316_update_clocks(struct clk_mgr *clk_mgr_base,
 	cmd.notify_clocks.clocks.dispclk_khz = clk_mgr_base->clks.dispclk_khz;
 	cmd.notify_clocks.clocks.dppclk_khz = clk_mgr_base->clks.dppclk_khz;
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 static void dcn316_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass,
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
index a496930b1f9c..aadd07bc68c5 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
@@ -25,7 +25,6 @@
 
 #include "dccg.h"
 #include "clk_mgr_internal.h"
-
 #include "dcn32/dcn32_clk_mgr_smu_msg.h"
 #include "dcn20/dcn20_clk_mgr.h"
 #include "dce100/dce_clk_mgr.h"
@@ -34,7 +33,7 @@
 #include "core_types.h"
 #include "dm_helpers.h"
 #include "link.h"
-
+#include "dc_state_priv.h"
 #include "atomfirmware.h"
 #include "smu13_driver_if.h"
 
@@ -458,20 +457,56 @@ static int dcn32_get_dispclk_from_dentist(struct clk_mgr *clk_mgr_base)
 	return 0;
 }
 
-static void dcn32_auto_dpm_test_log(struct dc_clocks *new_clocks, struct clk_mgr_internal *clk_mgr)
+static bool dcn32_check_native_scaling(struct pipe_ctx *pipe)
 {
-    unsigned int dispclk_khz_reg    = REG_READ(CLK1_CLK0_CURRENT_CNT); // DISPCLK
-    unsigned int dppclk_khz_reg     = REG_READ(CLK1_CLK1_CURRENT_CNT); // DPPCLK
-    unsigned int dprefclk_khz_reg   = REG_READ(CLK1_CLK2_CURRENT_CNT); // DPREFCLK
-    unsigned int dcfclk_khz_reg     = REG_READ(CLK1_CLK3_CURRENT_CNT); // DCFCLK
-    unsigned int dtbclk_khz_reg     = REG_READ(CLK1_CLK4_CURRENT_CNT); // DTBCLK
-    unsigned int fclk_khz_reg       = REG_READ(CLK4_CLK0_CURRENT_CNT); // FCLK
+	bool is_native_scaling = false;
+	int width = pipe->plane_state->src_rect.width;
+	int height = pipe->plane_state->src_rect.height;
+
+	if (pipe->stream->timing.h_addressable == width &&
+			pipe->stream->timing.v_addressable == height &&
+			pipe->plane_state->dst_rect.width == width &&
+			pipe->plane_state->dst_rect.height == height)
+		is_native_scaling = true;
+
+	return is_native_scaling;
+}
+
+static void dcn32_auto_dpm_test_log(
+		struct dc_clocks *new_clocks,
+		struct clk_mgr_internal *clk_mgr,
+		struct dc_state *context)
+{
+	unsigned int dispclk_khz_reg, dppclk_khz_reg, dprefclk_khz_reg, dcfclk_khz_reg, dtbclk_khz_reg,
+				 fclk_khz_reg, mall_ss_size_bytes;
+	int dramclk_khz_override, fclk_khz_override, num_fclk_levels;
+
+	struct pipe_ctx *pipe_ctx_list[MAX_PIPES];
+	int active_pipe_count = 0;
+
+	for (int i = 0; i < MAX_PIPES; i++) {
+		struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+		if (pipe_ctx->stream && dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) {
+			pipe_ctx_list[active_pipe_count] = pipe_ctx;
+			active_pipe_count++;
+		}
+	}
+
+	mall_ss_size_bytes = context->bw_ctx.bw.dcn.mall_ss_size_bytes;
+
+    dispclk_khz_reg    = REG_READ(CLK1_CLK0_CURRENT_CNT); // DISPCLK
+    dppclk_khz_reg     = REG_READ(CLK1_CLK1_CURRENT_CNT); // DPPCLK
+    dprefclk_khz_reg   = REG_READ(CLK1_CLK2_CURRENT_CNT); // DPREFCLK
+    dcfclk_khz_reg     = REG_READ(CLK1_CLK3_CURRENT_CNT); // DCFCLK
+    dtbclk_khz_reg     = REG_READ(CLK1_CLK4_CURRENT_CNT); // DTBCLK
+    fclk_khz_reg       = REG_READ(CLK4_CLK0_CURRENT_CNT); // FCLK
 
     // Overrides for these clocks in case there is no p_state change support
-    int dramclk_khz_override = new_clocks->dramclk_khz;
-    int fclk_khz_override = new_clocks->fclk_khz;
+    dramclk_khz_override = new_clocks->dramclk_khz;
+    fclk_khz_override = new_clocks->fclk_khz;
 
-    int num_fclk_levels = clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_fclk_levels - 1;
+    num_fclk_levels = clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_fclk_levels - 1;
 
     if (!new_clocks->p_state_change_support) {
 	    dramclk_khz_override = clk_mgr->base.bw_params->max_memclk_mhz * 1000;
@@ -488,16 +523,49 @@ static void dcn32_auto_dpm_test_log(struct dc_clocks *new_clocks, struct clk_mgr
 	//
 	//				AutoDPMTest: clk1:%d - clk2:%d - clk3:%d - clk4:%d\n"
 	////////////////////////////////////////////////////////////////////////////
-	if (new_clocks &&
+	if (new_clocks && active_pipe_count > 0 &&
 		new_clocks->dramclk_khz > 0 &&
 		new_clocks->fclk_khz > 0 &&
 		new_clocks->dcfclk_khz > 0 &&
 		new_clocks->dppclk_khz > 0) {
 
+		uint32_t pix_clk_list[MAX_PIPES] = {0};
+		int p_state_list[MAX_PIPES] = {0};
+		int disp_src_width_list[MAX_PIPES] = {0};
+		int disp_src_height_list[MAX_PIPES] = {0};
+		uint64_t disp_src_refresh_list[MAX_PIPES] = {0};
+		bool is_scaled_list[MAX_PIPES] = {0};
+
+		for (int i = 0; i < active_pipe_count; i++) {
+			struct pipe_ctx *curr_pipe_ctx = pipe_ctx_list[i];
+			uint64_t refresh_rate;
+
+			pix_clk_list[i] = curr_pipe_ctx->stream->timing.pix_clk_100hz;
+			p_state_list[i] = curr_pipe_ctx->p_state_type;
+
+			refresh_rate = (curr_pipe_ctx->stream->timing.pix_clk_100hz * (uint64_t)100 +
+				curr_pipe_ctx->stream->timing.v_total * curr_pipe_ctx->stream->timing.h_total - (uint64_t)1);
+			refresh_rate = div_u64(refresh_rate, curr_pipe_ctx->stream->timing.v_total);
+			refresh_rate = div_u64(refresh_rate, curr_pipe_ctx->stream->timing.h_total);
+			disp_src_refresh_list[i] = refresh_rate;
+
+			if (curr_pipe_ctx->plane_state) {
+				is_scaled_list[i] = !(dcn32_check_native_scaling(curr_pipe_ctx));
+				disp_src_width_list[i] = curr_pipe_ctx->plane_state->src_rect.width;
+				disp_src_height_list[i] = curr_pipe_ctx->plane_state->src_rect.height;
+			}
+		}
+
 		DC_LOG_AUTO_DPM_TEST("AutoDPMTest: dramclk:%d - fclk:%d - "
 			"dcfclk:%d - dppclk:%d - dispclk_hw:%d - "
 			"dppclk_hw:%d - dprefclk_hw:%d - dcfclk_hw:%d - "
-			"dtbclk_hw:%d - fclk_hw:%d\n",
+			"dtbclk_hw:%d - fclk_hw:%d - pix_clk_0:%d - pix_clk_1:%d - "
+			"pix_clk_2:%d - pix_clk_3:%d - mall_ss_size:%d - p_state_type_0:%d - "
+			"p_state_type_1:%d - p_state_type_2:%d - p_state_type_3:%d - "
+			"pix_width_0:%d - pix_height_0:%d - refresh_rate_0:%lld - is_scaled_0:%d - "
+			"pix_width_1:%d - pix_height_1:%d - refresh_rate_1:%lld - is_scaled_1:%d - "
+			"pix_width_2:%d - pix_height_2:%d - refresh_rate_2:%lld - is_scaled_2:%d - "
+			"pix_width_3:%d - pix_height_3:%d - refresh_rate_3:%lld - is_scaled_3:%d - LOG_END\n",
 			dramclk_khz_override,
 			fclk_khz_override,
 			new_clocks->dcfclk_khz,
@@ -507,7 +575,14 @@ static void dcn32_auto_dpm_test_log(struct dc_clocks *new_clocks, struct clk_mgr
 			dprefclk_khz_reg,
 			dcfclk_khz_reg,
 			dtbclk_khz_reg,
-			fclk_khz_reg);
+			fclk_khz_reg,
+			pix_clk_list[0], pix_clk_list[1], pix_clk_list[3], pix_clk_list[2],
+			mall_ss_size_bytes,
+			p_state_list[0], p_state_list[1], p_state_list[2], p_state_list[3],
+			disp_src_width_list[0], disp_src_height_list[0], disp_src_refresh_list[0], is_scaled_list[0],
+			disp_src_width_list[1], disp_src_height_list[1], disp_src_refresh_list[1], is_scaled_list[1],
+			disp_src_width_list[2], disp_src_height_list[2], disp_src_refresh_list[2], is_scaled_list[2],
+			disp_src_width_list[3], disp_src_height_list[3], disp_src_refresh_list[3], is_scaled_list[3]);
 	}
 }
 
@@ -680,6 +755,7 @@ static void dcn32_update_clocks(struct clk_mgr *clk_mgr_base,
 		/* DCCG requires KHz precision for DTBCLK */
 		clk_mgr_base->clks.ref_dtbclk_khz =
 				dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DTBCLK, khz_to_mhz_ceil(new_clocks->ref_dtbclk_khz));
+
 		dcn32_update_clocks_update_dtb_dto(clk_mgr, context, clk_mgr_base->clks.ref_dtbclk_khz);
 	}
 
@@ -708,7 +784,7 @@ static void dcn32_update_clocks(struct clk_mgr *clk_mgr_base,
 				clk_mgr_base->clks.dispclk_khz / 1000 / 7);
 
 	if (dc->config.enable_auto_dpm_test_logs) {
-	    dcn32_auto_dpm_test_log(new_clocks, clk_mgr);
+	    dcn32_auto_dpm_test_log(new_clocks, clk_mgr, context);
 	}
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
index 507a7cf56711..e64890259235 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
@@ -50,6 +50,7 @@
 #include "dc_dmub_srv.h"
 #include "link.h"
 #include "logger_types.h"
+
 #undef DC_LOGGER
 #define DC_LOGGER \
 	clk_mgr->base.base.ctx->logger
@@ -80,12 +81,12 @@
 
 static int dcn35_get_active_display_cnt_wa(
 		struct dc *dc,
-		struct dc_state *context)
+		struct dc_state *context,
+		int *all_active_disps)
 {
-	int i, display_count;
+	int i, display_count = 0;
 	bool tmds_present = false;
 
-	display_count = 0;
 	for (i = 0; i < context->stream_count; i++) {
 		const struct dc_stream_state *stream = context->streams[i];
 
@@ -103,7 +104,8 @@ static int dcn35_get_active_display_cnt_wa(
 				link->link_enc->funcs->is_dig_enabled(link->link_enc))
 			display_count++;
 	}
-
+	if (all_active_disps != NULL)
+		*all_active_disps = display_count;
 	/* WA for hang on HDMI after display off back on*/
 	if (display_count == 0 && tmds_present)
 		display_count = 1;
@@ -126,21 +128,13 @@ static void dcn35_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *
 			continue;
 		if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal) ||
 				     !pipe->stream->link_enc)) {
-			struct stream_encoder *stream_enc = pipe->stream_res.stream_enc;
-
 			if (disable) {
-				if (stream_enc && stream_enc->funcs->disable_fifo)
-					pipe->stream_res.stream_enc->funcs->disable_fifo(stream_enc);
-
 				if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc)
 					pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
 
 				reset_sync_context_for_pipe(dc, context, i);
 			} else {
 				pipe->stream_res.tg->funcs->enable_crtc(pipe->stream_res.tg);
-
-				if (stream_enc && stream_enc->funcs->enable_fifo)
-					pipe->stream_res.stream_enc->funcs->enable_fifo(stream_enc);
 			}
 		}
 	}
@@ -224,14 +218,19 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base,
 	struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
 	struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk;
 	struct dc *dc = clk_mgr_base->ctx->dc;
-	int display_count;
+	int display_count = 0;
 	bool update_dppclk = false;
 	bool update_dispclk = false;
 	bool dpp_clock_lowered = false;
+	int all_active_disps = 0;
 
 	if (dc->work_arounds.skip_clock_update)
 		return;
 
+	display_count = dcn35_get_active_display_cnt_wa(dc, context, &all_active_disps);
+	if (new_clocks->dtbclk_en && !new_clocks->ref_dtbclk_khz)
+		new_clocks->ref_dtbclk_khz = 600000;
+
 	/*
 	 * if it is safe to lower, but we are already in the lower state, we don't have to do anything
 	 * also if safe to lower is false, we just go in the higher state
@@ -250,7 +249,6 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base,
 		}
 		/* check that we're not already in lower */
 		if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_LOW_POWER) {
-			display_count = dcn35_get_active_display_cnt_wa(dc, context);
 			/* if we can go lower, go lower */
 			if (display_count == 0)
 				clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER;
@@ -265,8 +263,10 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base,
 
 		if (!clk_mgr_base->clks.dtbclk_en && new_clocks->dtbclk_en) {
 			dcn35_smu_set_dtbclk(clk_mgr, true);
-			dcn35_update_clocks_update_dtb_dto(clk_mgr, context, clk_mgr_base->clks.ref_dtbclk_khz);
 			clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en;
+
+			dcn35_update_clocks_update_dtb_dto(clk_mgr, context, new_clocks->ref_dtbclk_khz);
+			clk_mgr_base->clks.ref_dtbclk_khz = new_clocks->ref_dtbclk_khz;
 		}
 
 		/* check that we're not already in D0 */
@@ -314,17 +314,12 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base,
 		update_dispclk = true;
 	}
 
-	if (!new_clocks->dtbclk_en) {
-		new_clocks->ref_dtbclk_khz = 600000;
-	}
-
 	/* clock limits are received with MHz precision, divide by 1000 to prevent setting clocks at every call */
 	if (!dc->debug.disable_dtb_ref_clk_switch &&
-			should_set_clock(safe_to_lower, new_clocks->ref_dtbclk_khz / 1000, clk_mgr_base->clks.ref_dtbclk_khz / 1000)) {
-		/* DCCG requires KHz precision for DTBCLK */
-		dcn35_smu_set_dtbclk(clk_mgr, true);
-
-		dcn35_update_clocks_update_dtb_dto(clk_mgr, context, clk_mgr_base->clks.ref_dtbclk_khz);
+	    should_set_clock(safe_to_lower, new_clocks->ref_dtbclk_khz / 1000,
+			     clk_mgr_base->clks.ref_dtbclk_khz / 1000)) {
+		dcn35_update_clocks_update_dtb_dto(clk_mgr, context, new_clocks->ref_dtbclk_khz);
+		clk_mgr_base->clks.ref_dtbclk_khz = new_clocks->ref_dtbclk_khz;
 	}
 
 	if (dpp_clock_lowered) {
@@ -348,7 +343,7 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base,
 	cmd.notify_clocks.clocks.dispclk_khz = clk_mgr_base->clks.dispclk_khz;
 	cmd.notify_clocks.clocks.dppclk_khz = clk_mgr_base->clks.dppclk_khz;
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr)
@@ -423,9 +418,8 @@ bool dcn35_are_clock_states_equal(struct dc_clocks *a,
 }
 
 static void dcn35_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass,
-		struct clk_mgr *clk_mgr_base, struct clk_log_info *log_info)
+		struct clk_mgr_dcn35 *clk_mgr)
 {
-
 }
 
 static struct clk_bw_params dcn35_bw_params = {
@@ -443,32 +437,32 @@ static struct wm_table ddr5_wm_table = {
 			.wm_inst = WM_A,
 			.wm_type = WM_TYPE_PSTATE_CHG,
 			.pstate_latency_us = 11.72,
-			.sr_exit_time_us = 9,
-			.sr_enter_plus_exit_time_us = 11,
+			.sr_exit_time_us = 28.0,
+			.sr_enter_plus_exit_time_us = 30.0,
 			.valid = true,
 		},
 		{
 			.wm_inst = WM_B,
 			.wm_type = WM_TYPE_PSTATE_CHG,
 			.pstate_latency_us = 11.72,
-			.sr_exit_time_us = 9,
-			.sr_enter_plus_exit_time_us = 11,
+			.sr_exit_time_us = 28.0,
+			.sr_enter_plus_exit_time_us = 30.0,
 			.valid = true,
 		},
 		{
 			.wm_inst = WM_C,
 			.wm_type = WM_TYPE_PSTATE_CHG,
 			.pstate_latency_us = 11.72,
-			.sr_exit_time_us = 9,
-			.sr_enter_plus_exit_time_us = 11,
+			.sr_exit_time_us = 28.0,
+			.sr_enter_plus_exit_time_us = 30.0,
 			.valid = true,
 		},
 		{
 			.wm_inst = WM_D,
 			.wm_type = WM_TYPE_PSTATE_CHG,
 			.pstate_latency_us = 11.72,
-			.sr_exit_time_us = 9,
-			.sr_enter_plus_exit_time_us = 11,
+			.sr_exit_time_us = 28.0,
+			.sr_enter_plus_exit_time_us = 30.0,
 			.valid = true,
 		},
 	}
@@ -480,32 +474,32 @@ static struct wm_table lpddr5_wm_table = {
 			.wm_inst = WM_A,
 			.wm_type = WM_TYPE_PSTATE_CHG,
 			.pstate_latency_us = 11.65333,
-			.sr_exit_time_us = 11.5,
-			.sr_enter_plus_exit_time_us = 14.5,
+			.sr_exit_time_us = 28.0,
+			.sr_enter_plus_exit_time_us = 30.0,
 			.valid = true,
 		},
 		{
 			.wm_inst = WM_B,
 			.wm_type = WM_TYPE_PSTATE_CHG,
 			.pstate_latency_us = 11.65333,
-			.sr_exit_time_us = 11.5,
-			.sr_enter_plus_exit_time_us = 14.5,
+			.sr_exit_time_us = 28.0,
+			.sr_enter_plus_exit_time_us = 30.0,
 			.valid = true,
 		},
 		{
 			.wm_inst = WM_C,
 			.wm_type = WM_TYPE_PSTATE_CHG,
 			.pstate_latency_us = 11.65333,
-			.sr_exit_time_us = 11.5,
-			.sr_enter_plus_exit_time_us = 14.5,
+			.sr_exit_time_us = 28.0,
+			.sr_enter_plus_exit_time_us = 30.0,
 			.valid = true,
 		},
 		{
 			.wm_inst = WM_D,
 			.wm_type = WM_TYPE_PSTATE_CHG,
 			.pstate_latency_us = 11.65333,
-			.sr_exit_time_us = 11.5,
-			.sr_enter_plus_exit_time_us = 14.5,
+			.sr_exit_time_us = 28.0,
+			.sr_enter_plus_exit_time_us = 30.0,
 			.valid = true,
 		},
 	}
@@ -515,11 +509,6 @@ static DpmClocks_t_dcn35 dummy_clocks;
 
 static struct dcn35_watermarks dummy_wms = { 0 };
 
-static struct dcn35_ss_info_table ss_info_table = {
-	.ss_divider = 1000,
-	.ss_percentage = {0, 0, 375, 375, 375}
-};
-
 static void dcn35_build_watermark_ranges(struct clk_bw_params *bw_params, struct dcn35_watermarks *table)
 {
 	int i, num_valid_sets;
@@ -653,27 +642,50 @@ static unsigned int convert_wck_ratio(uint8_t wck_ratio)
 	return 1;
 }
 
+static inline uint32_t calc_dram_speed_mts(const MemPstateTable_t *entry)
+{
+	return entry->UClk * convert_wck_ratio(entry->WckRatio) * 2;
+}
+
 static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk_mgr,
 						    struct integrated_info *bios_info,
 						    DpmClocks_t_dcn35 *clock_table)
 {
 	struct clk_bw_params *bw_params = clk_mgr->base.bw_params;
 	struct clk_limit_table_entry def_max = bw_params->clk_table.entries[bw_params->clk_table.num_entries - 1];
-	uint32_t max_pstate = 0,  max_uclk = 0, max_fclk = 0;
-	uint32_t min_pstate = 0, max_dispclk = 0, max_dppclk = 0;
+	uint32_t max_fclk = 0, min_pstate = 0, max_dispclk = 0, max_dppclk = 0;
+	uint32_t max_pstate = 0, max_dram_speed_mts = 0, min_dram_speed_mts = 0;
+	uint32_t num_memps, num_fclk, num_dcfclk;
 	int i;
 
-	for (i = 0; i < clock_table->NumMemPstatesEnabled; i++) {
-		if (is_valid_clock_value(clock_table->MemPstateTable[i].UClk) &&
-		    clock_table->MemPstateTable[i].UClk > max_uclk) {
-			max_uclk = clock_table->MemPstateTable[i].UClk;
+	/* Determine min/max p-state values. */
+	num_memps = (clock_table->NumMemPstatesEnabled > NUM_MEM_PSTATE_LEVELS) ? NUM_MEM_PSTATE_LEVELS :
+		clock_table->NumMemPstatesEnabled;
+	for (i = 0; i < num_memps; i++) {
+		uint32_t dram_speed_mts = calc_dram_speed_mts(&clock_table->MemPstateTable[i]);
+
+		if (is_valid_clock_value(dram_speed_mts) && dram_speed_mts > max_dram_speed_mts) {
+			max_dram_speed_mts = dram_speed_mts;
 			max_pstate = i;
 		}
 	}
 
-	/* We expect the table to contain at least one valid Uclk entry. */
-	ASSERT(is_valid_clock_value(max_uclk));
+	min_dram_speed_mts = max_dram_speed_mts;
+	min_pstate = max_pstate;
+
+	for (i = 0; i < num_memps; i++) {
+		uint32_t dram_speed_mts = calc_dram_speed_mts(&clock_table->MemPstateTable[i]);
+
+		if (is_valid_clock_value(dram_speed_mts) && dram_speed_mts < min_dram_speed_mts) {
+			min_dram_speed_mts = dram_speed_mts;
+			min_pstate = i;
+		}
+	}
 
+	/* We expect the table to contain at least one valid P-state entry. */
+	ASSERT(clock_table->NumMemPstatesEnabled &&
+	       is_valid_clock_value(max_dram_speed_mts) &&
+	       is_valid_clock_value(min_dram_speed_mts));
 
 	/* dispclk and dppclk can be max at any voltage, same number of levels for both */
 	if (clock_table->NumDispClkLevelsEnabled <= NUM_DISPCLK_DPM_LEVELS &&
@@ -683,47 +695,50 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk
 		max_dppclk = find_max_clk_value(clock_table->DppClocks,
 			clock_table->NumDispClkLevelsEnabled);
 	} else {
+		/* Invalid number of entries in the table from PMFW. */
 		ASSERT(0);
 	}
-	if (clock_table->NumFclkLevelsEnabled <= NUM_FCLK_DPM_LEVELS)
-		max_fclk = find_max_clk_value(clock_table->FclkClocks_Freq,
-			clock_table->NumFclkLevelsEnabled);
 
-	for (i = 0; i < clock_table->NumMemPstatesEnabled; i++) {
-		uint32_t min_uclk = clock_table->MemPstateTable[0].UClk;
-		int j;
+	/* Base the clock table on dcfclk, need at least one entry regardless of pmfw table */
+	ASSERT(clock_table->NumDcfClkLevelsEnabled > 0);
 
-		for (j = 1; j < clock_table->NumMemPstatesEnabled; j++) {
-			if (is_valid_clock_value(clock_table->MemPstateTable[j].UClk) &&
-			    clock_table->MemPstateTable[j].UClk < min_uclk &&
-			    clock_table->MemPstateTable[j].Voltage <= clock_table->SocVoltage[i]) {
-				min_uclk = clock_table->MemPstateTable[j].UClk;
-				min_pstate = j;
-			}
-		}
+	num_fclk = (clock_table->NumFclkLevelsEnabled > NUM_FCLK_DPM_LEVELS) ? NUM_FCLK_DPM_LEVELS :
+		clock_table->NumFclkLevelsEnabled;
+	max_fclk = find_max_clk_value(clock_table->FclkClocks_Freq, num_fclk);
+
+	num_dcfclk = (clock_table->NumFclkLevelsEnabled > NUM_DCFCLK_DPM_LEVELS) ? NUM_DCFCLK_DPM_LEVELS :
+		clock_table->NumDcfClkLevelsEnabled;
+	for (i = 0; i < num_dcfclk; i++) {
+		int j;
 
+		/* First search defaults for the clocks we don't read using closest lower or equal default dcfclk */
 		for (j = bw_params->clk_table.num_entries - 1; j > 0; j--)
 			if (bw_params->clk_table.entries[j].dcfclk_mhz <= clock_table->DcfClocks[i])
-			break;
+				break;
 
 		bw_params->clk_table.entries[i].phyclk_mhz = bw_params->clk_table.entries[j].phyclk_mhz;
 		bw_params->clk_table.entries[i].phyclk_d18_mhz = bw_params->clk_table.entries[j].phyclk_d18_mhz;
 		bw_params->clk_table.entries[i].dtbclk_mhz = bw_params->clk_table.entries[j].dtbclk_mhz;
-		bw_params->clk_table.entries[i].fclk_mhz = max_fclk;
+
+		/* Now update clocks we do read */
 		bw_params->clk_table.entries[i].memclk_mhz = clock_table->MemPstateTable[min_pstate].MemClk;
 		bw_params->clk_table.entries[i].voltage = clock_table->MemPstateTable[min_pstate].Voltage;
 		bw_params->clk_table.entries[i].dcfclk_mhz = clock_table->DcfClocks[i];
 		bw_params->clk_table.entries[i].socclk_mhz = clock_table->SocClocks[i];
 		bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk;
 		bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk;
-		bw_params->clk_table.entries[i].wck_ratio = convert_wck_ratio(
-			clock_table->MemPstateTable[min_pstate].WckRatio);
-		}
+		bw_params->clk_table.entries[i].wck_ratio =
+			convert_wck_ratio(clock_table->MemPstateTable[min_pstate].WckRatio);
+
+		/* Dcfclk and Fclk are tied, but at a different ratio */
+		bw_params->clk_table.entries[i].fclk_mhz = min(max_fclk, 2 * clock_table->DcfClocks[i]);
+	}
 
 	/* Make sure to include at least one entry at highest pstate */
 	if (max_pstate != min_pstate || i == 0) {
 		if (i > MAX_NUM_DPM_LVL - 1)
 			i = MAX_NUM_DPM_LVL - 1;
+
 		bw_params->clk_table.entries[i].fclk_mhz = max_fclk;
 		bw_params->clk_table.entries[i].memclk_mhz = clock_table->MemPstateTable[max_pstate].MemClk;
 		bw_params->clk_table.entries[i].voltage = clock_table->MemPstateTable[max_pstate].Voltage;
@@ -739,6 +754,7 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk
 	}
 	bw_params->clk_table.num_entries = i--;
 
+	/* Make sure all highest clocks are included*/
 	bw_params->clk_table.entries[i].socclk_mhz =
 		find_max_clk_value(clock_table->SocClocks, NUM_SOCCLK_DPM_LEVELS);
 	bw_params->clk_table.entries[i].dispclk_mhz =
@@ -757,6 +773,11 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk
 	bw_params->clk_table.num_entries_per_clk.num_fclk_levels = clock_table->NumFclkLevelsEnabled;
 	bw_params->clk_table.num_entries_per_clk.num_memclk_levels = clock_table->NumMemPstatesEnabled;
 	bw_params->clk_table.num_entries_per_clk.num_socclk_levels = clock_table->NumSocClkLevelsEnabled;
+
+	/*
+	 * Set any 0 clocks to max default setting. Not an issue for
+	 * power since we aren't doing switching in such case anyway
+	 */
 	for (i = 0; i < bw_params->clk_table.num_entries; i++) {
 		if (!bw_params->clk_table.entries[i].fclk_mhz) {
 			bw_params->clk_table.entries[i].fclk_mhz = def_max.fclk_mhz;
@@ -805,7 +826,7 @@ static void dcn35_set_low_power_state(struct clk_mgr *clk_mgr_base)
 	struct dc_state *context = dc->current_state;
 
 	if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_LOW_POWER) {
-		display_count = dcn35_get_active_display_cnt_wa(dc, context);
+		display_count = dcn35_get_active_display_cnt_wa(dc, context, NULL);
 		/* if we can go lower, go lower */
 		if (display_count == 0)
 			clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER;
@@ -965,21 +986,6 @@ struct clk_mgr_funcs dcn35_fpga_funcs = {
 	.get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz,
 };
 
-static void dcn35_read_ss_info_from_lut(struct clk_mgr_internal *clk_mgr)
-{
-	uint32_t clock_source;
-	struct dc_context *ctx = clk_mgr->base.ctx;
-
-	REG_GET(CLK1_CLK2_BYPASS_CNTL, CLK2_BYPASS_SEL, &clock_source);
-
-	clk_mgr->dprefclk_ss_percentage = ss_info_table.ss_percentage[clock_source];
-
-	if (clk_mgr->dprefclk_ss_percentage != 0) {
-		clk_mgr->ss_on_dprefclk = true;
-		clk_mgr->dprefclk_ss_divider = ss_info_table.ss_divider;
-	}
-}
-
 void dcn35_clk_mgr_construct(
 		struct dc_context *ctx,
 		struct clk_mgr_dcn35 *clk_mgr,
@@ -987,7 +993,6 @@ void dcn35_clk_mgr_construct(
 		struct dccg *dccg)
 {
 	struct dcn35_smu_dpm_clks smu_dpm_clks = { 0 };
-	struct clk_log_info log_info = {0};
 	clk_mgr->base.base.ctx = ctx;
 	clk_mgr->base.base.funcs = &dcn35_funcs;
 
@@ -1040,20 +1045,14 @@ void dcn35_clk_mgr_construct(
 		dcn35_bw_params.wm_table = ddr5_wm_table;
 	}
 	/* Saved clocks configured at boot for debug purposes */
-	dcn35_dump_clk_registers(&clk_mgr->base.base.boot_snapshot, &clk_mgr->base.base, &log_info);
+	dcn35_dump_clk_registers(&clk_mgr->base.base.boot_snapshot, clk_mgr);
 
 	clk_mgr->base.base.dprefclk_khz = dcn35_smu_get_dprefclk(&clk_mgr->base);
-	clk_mgr->base.base.clks.ref_dtbclk_khz = dcn35_smu_get_dtbclk(&clk_mgr->base);
-
-	if (!clk_mgr->base.base.clks.ref_dtbclk_khz)
-		dcn35_smu_set_dtbclk(&clk_mgr->base, true);
+	clk_mgr->base.base.clks.ref_dtbclk_khz = 600000;
 
-	clk_mgr->base.base.clks.dtbclk_en = true;
 	dce_clock_read_ss_info(&clk_mgr->base);
 	/*when clk src is from FCH, it could have ss, same clock src as DPREF clk*/
 
-	dcn35_read_ss_info_from_lut(&clk_mgr->base);
-
 	clk_mgr->base.base.bw_params = &dcn35_bw_params;
 
 	if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) {
@@ -1129,7 +1128,6 @@ void dcn35_clk_mgr_construct(
 			ctx->dc->debug.disable_dpp_power_gate = false;
 			ctx->dc->debug.disable_hubp_power_gate = false;
 			ctx->dc->debug.disable_dsc_power_gate = false;
-			ctx->dc->debug.disable_hpo_power_gate = false;
 		} else {
 			/*let's reset the config control flag*/
 			ctx->dc->config.disable_ips = DMUB_IPS_DISABLE_ALL; /*pmfw not support it, disable it all*/
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c
index b6b8c3ca1572..6d4a1ffab5ed 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c
@@ -116,6 +116,9 @@ static uint32_t dcn35_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, un
 			msleep(delay_us/1000);
 		else if (delay_us > 0)
 			udelay(delay_us);
+
+		if (clk_mgr->base.ctx->dc->debug.disable_timeout)
+			max_retries++;
 	} while (max_retries--);
 
 	return res_val;
@@ -276,7 +279,7 @@ void dcn35_smu_set_display_idle_optimization(struct clk_mgr_internal *clk_mgr, u
 		clk_mgr,
 		VBIOSSMC_MSG_SetDisplayIdleOptimizations,
 		idle_info);
-	smu_print("VBIOSSMC_MSG_SetDisplayIdleOptimizations idle_info  = %d\n", idle_info);
+	smu_print("%s: VBIOSSMC_MSG_SetDisplayIdleOptimizations idle_info  = %x\n", __func__, idle_info);
 }
 
 void dcn35_smu_enable_phy_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool enable)
@@ -295,7 +298,7 @@ void dcn35_smu_enable_phy_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool e
 			clk_mgr,
 			VBIOSSMC_MSG_SetDisplayIdleOptimizations,
 			idle_info.data);
-	smu_print("dcn35_smu_enable_phy_refclk_pwrdwn  = %d\n", enable ? 1 : 0);
+	smu_print("%s smu_enable_phy_refclk_pwrdwn  = %d\n", __func__, enable ? 1 : 0);
 }
 
 void dcn35_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr)
@@ -307,6 +310,7 @@ void dcn35_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr)
 			clk_mgr,
 			VBIOSSMC_MSG_UpdatePmeRestore,
 			0);
+	smu_print("%s: SMC_MSG_UpdatePmeRestore\n", __func__);
 }
 
 void dcn35_smu_set_dram_addr_high(struct clk_mgr_internal *clk_mgr, uint32_t addr_high)
@@ -347,7 +351,7 @@ void dcn35_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr)
 
 void dcn35_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zstate_support_state support)
 {
-	unsigned int msg_id, param;
+	unsigned int msg_id, param, retv;
 
 	if (!clk_mgr->smu_present)
 		return;
@@ -357,27 +361,32 @@ void dcn35_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zst
 	case DCN_ZSTATE_SUPPORT_ALLOW:
 		msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
 		param = (1 << 10) | (1 << 9) | (1 << 8);
+		smu_print("%s: SMC_MSG_AllowZstatesEntry msg = ALLOW, param = %d\n", __func__, param);
 		break;
 
 	case DCN_ZSTATE_SUPPORT_DISALLOW:
 		msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
 		param = 0;
+		smu_print("%s: SMC_MSG_AllowZstatesEntry msg_id = DISALLOW, param = %d\n",  __func__, param);
 		break;
 
 
 	case DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY:
 		msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
 		param = (1 << 10);
+		smu_print("%s: SMC_MSG_AllowZstatesEntry msg = ALLOW_Z10_ONLY, param = %d\n", __func__, param);
 		break;
 
 	case DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY:
 		msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
 		param = (1 << 10) | (1 << 8);
+		smu_print("%s: SMC_MSG_AllowZstatesEntry msg = ALLOW_Z8_Z10_ONLY, param = %d\n", __func__, param);
 		break;
 
 	case DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY:
 		msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
 		param = (1 << 8);
+		smu_print("%s: SMC_MSG_AllowZstatesEntry msg = ALLOW_Z8_ONLY, param = %d\n", __func__, param);
 		break;
 
 	default: //DCN_ZSTATE_SUPPORT_UNKNOWN
@@ -387,11 +396,11 @@ void dcn35_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zst
 	}
 
 
-	dcn35_smu_send_msg_with_param(
+	retv = dcn35_smu_send_msg_with_param(
 		clk_mgr,
 		msg_id,
 		param);
-	smu_print("dcn35_smu_set_zstate_support msg_id = %d, param = %d\n", msg_id, param);
+	smu_print("%s:  msg_id = %d, param = 0x%x, return = %d\n", __func__, msg_id, param, retv);
 }
 
 int dcn35_smu_get_dprefclk(struct clk_mgr_internal *clk_mgr)
@@ -405,7 +414,7 @@ int dcn35_smu_get_dprefclk(struct clk_mgr_internal *clk_mgr)
 						 VBIOSSMC_MSG_GetDprefclkFreq,
 						 0);
 
-	smu_print("dcn35_smu_get_DPREF clk  = %d mhz\n", dprefclk);
+	smu_print("%s:  SMU DPREF clk  = %d mhz\n",  __func__, dprefclk);
 	return dprefclk * 1000;
 }
 
@@ -420,7 +429,7 @@ int dcn35_smu_get_dtbclk(struct clk_mgr_internal *clk_mgr)
 					       VBIOSSMC_MSG_GetDtbclkFreq,
 					       0);
 
-	smu_print("dcn35_smu_get_dtbclk  = %d mhz\n", dtbclk);
+	smu_print("%s: get_dtbclk  = %dmhz\n", __func__, dtbclk);
 	return dtbclk * 1000;
 }
 /* Arg = 1: Turn DTB on; 0: Turn DTB CLK OFF. when it is on, it is 600MHZ */
@@ -433,7 +442,7 @@ void dcn35_smu_set_dtbclk(struct clk_mgr_internal *clk_mgr, bool enable)
 			clk_mgr,
 			VBIOSSMC_MSG_SetDtbClk,
 			enable);
-	smu_print("dcn35_smu_set_dtbclk  = %d \n", enable ? 1 : 0);
+	smu_print("%s: smu_set_dtbclk = %d\n", __func__, enable ? 1 : 0);
 }
 
 void dcn35_vbios_smu_enable_48mhz_tmdp_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool enable)
@@ -442,30 +451,45 @@ void dcn35_vbios_smu_enable_48mhz_tmdp_refclk_pwrdwn(struct clk_mgr_internal *cl
 			clk_mgr,
 			VBIOSSMC_MSG_EnableTmdp48MHzRefclkPwrDown,
 			enable);
+	smu_print("%s: smu_enable_48mhz_tmdp_refclk_pwrdwn = %d\n", __func__, enable ? 1 : 0);
 }
 
 int dcn35_smu_exit_low_power_state(struct clk_mgr_internal *clk_mgr)
 {
-	return dcn35_smu_send_msg_with_param(
+	int retv;
+
+	retv = dcn35_smu_send_msg_with_param(
 		clk_mgr,
 		VBIOSSMC_MSG_DispPsrExit,
 		0);
+	smu_print("%s: smu_exit_low_power_state return = %d\n", __func__, retv);
+	return retv;
 }
 
 int dcn35_smu_get_ips_supported(struct clk_mgr_internal *clk_mgr)
 {
-	return dcn35_smu_send_msg_with_param(
+	int retv;
+
+	retv = dcn35_smu_send_msg_with_param(
 			clk_mgr,
 			VBIOSSMC_MSG_QueryIPS2Support,
 			0);
+
+	//smu_print("%s: VBIOSSMC_MSG_QueryIPS2Support return = %x\n", __func__, retv);
+	return retv;
 }
 
 void dcn35_smu_write_ips_scratch(struct clk_mgr_internal *clk_mgr, uint32_t param)
 {
 	REG_WRITE(MP1_SMN_C2PMSG_71, param);
+	//smu_print("%s: write_ips_scratch = %x\n", __func__, param);
 }
 
 uint32_t dcn35_smu_read_ips_scratch(struct clk_mgr_internal *clk_mgr)
 {
-	return REG_READ(MP1_SMN_C2PMSG_71);
+	uint32_t retv;
+
+	retv = REG_READ(MP1_SMN_C2PMSG_71);
+	//smu_print("%s: dcn35_smu_read_ips_scratch = %x\n",  __func__, retv);
+	return retv;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 76b47f178127..2c424e435962 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -34,6 +34,8 @@
 #include "dce/dce_hwseq.h"
 
 #include "resource.h"
+#include "dc_state.h"
+#include "dc_state_priv.h"
 
 #include "gpio_service_interface.h"
 #include "clk_mgr.h"
@@ -519,7 +521,7 @@ dc_stream_forward_dmub_crc_window(struct dc_dmub_srv *dmub_srv,
 		cmd.secure_display.roi_info.y_end = rect->y + rect->height;
 	}
 
-	dm_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
+	dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
 }
 
 static inline void
@@ -808,7 +810,7 @@ static void dc_destruct(struct dc *dc)
 		link_enc_cfg_init(dc, dc->current_state);
 
 	if (dc->current_state) {
-		dc_release_state(dc->current_state);
+		dc_state_release(dc->current_state);
 		dc->current_state = NULL;
 	}
 
@@ -1020,29 +1022,27 @@ static bool dc_construct(struct dc *dc,
 	}
 #endif
 
+	if (!create_links(dc, init_params->num_virtual_links))
+		goto fail;
+
+	/* Create additional DIG link encoder objects if fewer than the platform
+	 * supports were created during link construction.
+	 */
+	if (!create_link_encoders(dc))
+		goto fail;
+
 	/* Creation of current_state must occur after dc->dml
 	 * is initialized in dc_create_resource_pool because
 	 * on creation it copies the contents of dc->dml
 	 */
 
-	dc->current_state = dc_create_state(dc);
+	dc->current_state = dc_state_create(dc);
 
 	if (!dc->current_state) {
 		dm_error("%s: failed to create validate ctx\n", __func__);
 		goto fail;
 	}
 
-	if (!create_links(dc, init_params->num_virtual_links))
-		goto fail;
-
-	/* Create additional DIG link encoder objects if fewer than the platform
-	 * supports were created during link construction.
-	 */
-	if (!create_link_encoders(dc))
-		goto fail;
-
-	dc_resource_state_construct(dc, dc->current_state);
-
 	return true;
 
 fail:
@@ -1085,7 +1085,7 @@ static void apply_ctx_interdependent_lock(struct dc *dc,
 	}
 }
 
-static void dc_update_viusal_confirm_color(struct dc *dc, struct dc_state *context, struct pipe_ctx *pipe_ctx)
+static void dc_update_visual_confirm_color(struct dc *dc, struct dc_state *context, struct pipe_ctx *pipe_ctx)
 {
 	if (dc->ctx->dce_version >= DCN_VERSION_1_0) {
 		memset(&pipe_ctx->visual_confirm_color, 0, sizeof(struct tg_color));
@@ -1105,9 +1105,9 @@ static void dc_update_viusal_confirm_color(struct dc *dc, struct dc_state *conte
 			if (dc->debug.visual_confirm == VISUAL_CONFIRM_MPCTREE)
 				get_mpctree_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color));
 			else if (dc->debug.visual_confirm == VISUAL_CONFIRM_SUBVP)
-				get_subvp_visual_confirm_color(dc, context, pipe_ctx, &(pipe_ctx->visual_confirm_color));
+				get_subvp_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color));
 			else if (dc->debug.visual_confirm == VISUAL_CONFIRM_MCLK_SWITCH)
-				get_mclk_switch_visual_confirm_color(dc, context, pipe_ctx, &(pipe_ctx->visual_confirm_color));
+				get_mclk_switch_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color));
 		}
 	}
 }
@@ -1115,7 +1115,7 @@ static void dc_update_viusal_confirm_color(struct dc *dc, struct dc_state *conte
 static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
 {
 	int i, j;
-	struct dc_state *dangling_context = dc_create_state(dc);
+	struct dc_state *dangling_context = dc_state_create_current_copy(dc);
 	struct dc_state *current_ctx;
 	struct pipe_ctx *pipe;
 	struct timing_generator *tg;
@@ -1123,8 +1123,6 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
 	if (dangling_context == NULL)
 		return;
 
-	dc_resource_state_copy_construct(dc->current_state, dangling_context);
-
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct dc_stream_state *old_stream =
 				dc->current_state->res_ctx.pipe_ctx[i].stream;
@@ -1161,6 +1159,7 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
 		}
 
 		if (should_disable && old_stream) {
+			bool is_phantom = dc_state_get_stream_subvp_type(dc->current_state, old_stream) == SUBVP_PHANTOM;
 			pipe = &dc->current_state->res_ctx.pipe_ctx[i];
 			tg = pipe->stream_res.tg;
 			/* When disabling plane for a phantom pipe, we must turn on the
@@ -1169,22 +1168,29 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
 			 * state that can result in underflow or hang when enabling it
 			 * again for different use.
 			 */
-			if (old_stream->mall_stream_config.type == SUBVP_PHANTOM) {
+			if (is_phantom) {
 				if (tg->funcs->enable_crtc) {
 					int main_pipe_width, main_pipe_height;
+					struct dc_stream_state *old_paired_stream = dc_state_get_paired_subvp_stream(dc->current_state, old_stream);
 
-					main_pipe_width = old_stream->mall_stream_config.paired_stream->dst.width;
-					main_pipe_height = old_stream->mall_stream_config.paired_stream->dst.height;
+					main_pipe_width = old_paired_stream->dst.width;
+					main_pipe_height = old_paired_stream->dst.height;
 					if (dc->hwss.blank_phantom)
 						dc->hwss.blank_phantom(dc, tg, main_pipe_width, main_pipe_height);
 					tg->funcs->enable_crtc(tg);
 				}
 			}
-			dc_rem_all_planes_for_stream(dc, old_stream, dangling_context);
+
+			if (is_phantom)
+				dc_state_rem_all_phantom_planes_for_stream(dc, old_stream, dangling_context, true);
+			else
+				dc_state_rem_all_planes_for_stream(dc, old_stream, dangling_context);
 			disable_all_writeback_pipes_for_stream(dc, old_stream, dangling_context);
 
-			if (pipe->stream && pipe->plane_state)
-				dc_update_viusal_confirm_color(dc, context, pipe);
+			if (pipe->stream && pipe->plane_state) {
+				set_p_state_switch_method(dc, context, pipe);
+				dc_update_visual_confirm_color(dc, context, pipe);
+			}
 
 			if (dc->hwss.apply_ctx_for_surface) {
 				apply_ctx_interdependent_lock(dc, dc->current_state, old_stream, true);
@@ -1203,7 +1209,7 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
 			 * The OTG is set to disable on falling edge of VUPDATE so the plane disable
 			 * will still get it's double buffer update.
 			 */
-			if (old_stream->mall_stream_config.type == SUBVP_PHANTOM) {
+			if (is_phantom) {
 				if (tg->funcs->disable_phantom_crtc)
 					tg->funcs->disable_phantom_crtc(tg);
 			}
@@ -1212,7 +1218,7 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
 
 	current_ctx = dc->current_state;
 	dc->current_state = dangling_context;
-	dc_release_state(current_ctx);
+	dc_state_release(current_ctx);
 }
 
 static void disable_vbios_mode_if_required(
@@ -1284,7 +1290,7 @@ static void wait_for_no_pipes_pending(struct dc *dc, struct dc_state *context)
 		int count = 0;
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 
-		if (!pipe->plane_state || pipe->stream->mall_stream_config.type == SUBVP_PHANTOM)
+		if (!pipe->plane_state || dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM)
 			continue;
 
 		/* Timeout 100 ms */
@@ -1510,7 +1516,7 @@ static void program_timing_sync(
 		}
 
 		for (k = 0; k < group_size; k++) {
-			struct dc_stream_status *status = dc_stream_get_status_from_state(ctx, pipe_set[k]->stream);
+			struct dc_stream_status *status = dc_state_get_stream_status(ctx, pipe_set[k]->stream);
 
 			status->timing_sync_info.group_id = num_group;
 			status->timing_sync_info.group_size = group_size;
@@ -1521,7 +1527,7 @@ static void program_timing_sync(
 
 		}
 
-		/* remove any other pipes that are already been synced */
+		/* remove any other unblanked pipes as they have already been synced */
 		if (dc->config.use_pipe_ctx_sync_logic) {
 			/* check pipe's syncd to decide which pipe to be removed */
 			for (j = 1; j < group_size; j++) {
@@ -1534,6 +1540,7 @@ static void program_timing_sync(
 					pipe_set[j]->pipe_idx_syncd = pipe_set[0]->pipe_idx_syncd;
 			}
 		} else {
+			/* remove any other pipes by checking valid plane */
 			for (j = j + 1; j < group_size; j++) {
 				bool is_blanked;
 
@@ -1554,7 +1561,7 @@ static void program_timing_sync(
 		if (group_size > 1) {
 			if (sync_type == TIMING_SYNCHRONIZABLE) {
 				dc->hwss.enable_timing_synchronization(
-					dc, group_index, group_size, pipe_set);
+					dc, ctx, group_index, group_size, pipe_set);
 			} else
 				if (sync_type == VBLANK_SYNCHRONIZABLE) {
 				dc->hwss.enable_vblanks_synchronization(
@@ -1836,7 +1843,7 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
 		struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
 
 		/* Check old context for SubVP */
-		subvp_prev_use |= (old_pipe->stream && old_pipe->stream->mall_stream_config.type == SUBVP_PHANTOM);
+		subvp_prev_use |= (dc_state_get_pipe_subvp_type(dc->current_state, old_pipe) == SUBVP_PHANTOM);
 		if (subvp_prev_use)
 			break;
 	}
@@ -1964,6 +1971,10 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
 		wait_for_no_pipes_pending(dc, context);
 		/* pplib is notified if disp_num changed */
 		dc->hwss.optimize_bandwidth(dc, context);
+		/* Need to do otg sync again as otg could be out of sync due to otg
+		 * workaround applied during clock update
+		 */
+		dc_trigger_sync(dc, context);
 	}
 
 	if (dc->hwss.update_dsc_pg)
@@ -1990,9 +2001,9 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
 	old_state = dc->current_state;
 	dc->current_state = context;
 
-	dc_release_state(old_state);
+	dc_state_release(old_state);
 
-	dc_retain_state(dc->current_state);
+	dc_state_retain(dc->current_state);
 
 	return result;
 }
@@ -2063,12 +2074,10 @@ enum dc_status dc_commit_streams(struct dc *dc,
 	if (handle_exit_odm2to1)
 		res = commit_minimal_transition_state(dc, dc->current_state);
 
-	context = dc_create_state(dc);
+	context = dc_state_create_current_copy(dc);
 	if (!context)
 		goto context_alloc_fail;
 
-	dc_resource_state_copy_construct_current(dc, context);
-
 	res = dc_validate_with_context(dc, set, stream_count, context, false);
 	if (res != DC_OK) {
 		BREAK_TO_DEBUGGER();
@@ -2083,7 +2092,7 @@ enum dc_status dc_commit_streams(struct dc *dc,
 				streams[i]->out.otg_offset = context->stream_status[j].primary_otg_inst;
 
 			if (dc_is_embedded_signal(streams[i]->signal)) {
-				struct dc_stream_status *status = dc_stream_get_status_from_state(context, streams[i]);
+				struct dc_stream_status *status = dc_state_get_stream_status(context, streams[i]);
 
 				if (dc->hwss.is_abm_supported)
 					status->is_abm_supported = dc->hwss.is_abm_supported(dc, context, streams[i]);
@@ -2094,7 +2103,7 @@ enum dc_status dc_commit_streams(struct dc *dc,
 	}
 
 fail:
-	dc_release_state(context);
+	dc_state_release(context);
 
 context_alloc_fail:
 
@@ -2148,7 +2157,7 @@ static bool is_flip_pending_in_pipes(struct dc *dc, struct dc_state *context)
 		pipe = &context->res_ctx.pipe_ctx[i];
 
 		// Don't check flip pending on phantom pipes
-		if (!pipe->plane_state || (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM))
+		if (!pipe->plane_state || (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM))
 			continue;
 
 		/* Must set to false to start with, due to OR in update function */
@@ -2206,7 +2215,7 @@ void dc_post_update_surfaces_to_stream(struct dc *dc)
 			if (context->res_ctx.pipe_ctx[i].stream == NULL ||
 					context->res_ctx.pipe_ctx[i].plane_state == NULL) {
 				context->res_ctx.pipe_ctx[i].pipe_idx = i;
-				dc->hwss.disable_plane(dc, &context->res_ctx.pipe_ctx[i]);
+				dc->hwss.disable_plane(dc, context, &context->res_ctx.pipe_ctx[i]);
 			}
 
 		process_deferred_updates(dc);
@@ -2221,110 +2230,6 @@ void dc_post_update_surfaces_to_stream(struct dc *dc)
 	dc->wm_optimized_required = false;
 }
 
-static void init_state(struct dc *dc, struct dc_state *context)
-{
-	/* Each context must have their own instance of VBA and in order to
-	 * initialize and obtain IP and SOC the base DML instance from DC is
-	 * initially copied into every context
-	 */
-	memcpy(&context->bw_ctx.dml, &dc->dml, sizeof(struct display_mode_lib));
-}
-
-struct dc_state *dc_create_state(struct dc *dc)
-{
-	struct dc_state *context = kvzalloc(sizeof(struct dc_state),
-					    GFP_KERNEL);
-
-	if (!context)
-		return NULL;
-
-	init_state(dc, context);
-
-#ifdef CONFIG_DRM_AMD_DC_FP
-	if (dc->debug.using_dml2) {
-		dml2_create(dc, &dc->dml2_options, &context->bw_ctx.dml2);
-	}
-#endif
-	kref_init(&context->refcount);
-
-	return context;
-}
-
-struct dc_state *dc_copy_state(struct dc_state *src_ctx)
-{
-	int i, j;
-	struct dc_state *new_ctx = kvmalloc(sizeof(struct dc_state), GFP_KERNEL);
-#ifdef CONFIG_DRM_AMD_DC_FP
-	struct dml2_context *dml2 =  NULL;
-#endif
-
-	if (!new_ctx)
-		return NULL;
-	memcpy(new_ctx, src_ctx, sizeof(struct dc_state));
-
-#ifdef CONFIG_DRM_AMD_DC_FP
-	if (new_ctx->bw_ctx.dml2) {
-		dml2 = kzalloc(sizeof(struct dml2_context), GFP_KERNEL);
-		if (!dml2)
-			return NULL;
-
-		memcpy(dml2, src_ctx->bw_ctx.dml2, sizeof(struct dml2_context));
-		new_ctx->bw_ctx.dml2 = dml2;
-	}
-#endif
-
-	for (i = 0; i < MAX_PIPES; i++) {
-			struct pipe_ctx *cur_pipe = &new_ctx->res_ctx.pipe_ctx[i];
-
-			if (cur_pipe->top_pipe)
-				cur_pipe->top_pipe =  &new_ctx->res_ctx.pipe_ctx[cur_pipe->top_pipe->pipe_idx];
-
-			if (cur_pipe->bottom_pipe)
-				cur_pipe->bottom_pipe = &new_ctx->res_ctx.pipe_ctx[cur_pipe->bottom_pipe->pipe_idx];
-
-			if (cur_pipe->prev_odm_pipe)
-				cur_pipe->prev_odm_pipe =  &new_ctx->res_ctx.pipe_ctx[cur_pipe->prev_odm_pipe->pipe_idx];
-
-			if (cur_pipe->next_odm_pipe)
-				cur_pipe->next_odm_pipe = &new_ctx->res_ctx.pipe_ctx[cur_pipe->next_odm_pipe->pipe_idx];
-
-	}
-
-	for (i = 0; i < new_ctx->stream_count; i++) {
-			dc_stream_retain(new_ctx->streams[i]);
-			for (j = 0; j < new_ctx->stream_status[i].plane_count; j++)
-				dc_plane_state_retain(
-					new_ctx->stream_status[i].plane_states[j]);
-	}
-
-	kref_init(&new_ctx->refcount);
-
-	return new_ctx;
-}
-
-void dc_retain_state(struct dc_state *context)
-{
-	kref_get(&context->refcount);
-}
-
-static void dc_state_free(struct kref *kref)
-{
-	struct dc_state *context = container_of(kref, struct dc_state, refcount);
-	dc_resource_state_destruct(context);
-
-#ifdef CONFIG_DRM_AMD_DC_FP
-	dml2_destroy(context->bw_ctx.dml2);
-	context->bw_ctx.dml2 = 0;
-#endif
-
-	kvfree(context);
-}
-
-void dc_release_state(struct dc_state *context)
-{
-	kref_put(&context->refcount, dc_state_free);
-}
-
 bool dc_set_generic_gpio_for_stereo(bool enable,
 		struct gpio_service *gpio_service)
 {
@@ -2997,11 +2902,9 @@ static void copy_stream_update_to_stream(struct dc *dc,
 				       update->dsc_config->num_slices_v != 0);
 
 		/* Use temporarry context for validating new DSC config */
-		struct dc_state *dsc_validate_context = dc_create_state(dc);
+		struct dc_state *dsc_validate_context = dc_state_create_copy(dc->current_state);
 
 		if (dsc_validate_context) {
-			dc_resource_state_copy_construct(dc->current_state, dsc_validate_context);
-
 			stream->timing.dsc_cfg = *update->dsc_config;
 			stream->timing.flags.DSC = enable_dsc;
 			if (!dc->res_pool->funcs->validate_bandwidth(dc, dsc_validate_context, true)) {
@@ -3010,7 +2913,7 @@ static void copy_stream_update_to_stream(struct dc *dc,
 				update->dsc_config = NULL;
 			}
 
-			dc_release_state(dsc_validate_context);
+			dc_state_release(dsc_validate_context);
 		} else {
 			DC_ERROR("Failed to allocate new validate context for DSC change\n");
 			update->dsc_config = NULL;
@@ -3109,30 +3012,27 @@ static bool update_planes_and_stream_state(struct dc *dc,
 			new_planes[i] = srf_updates[i].surface;
 
 		/* initialize scratch memory for building context */
-		context = dc_create_state(dc);
+		context = dc_state_create_copy(dc->current_state);
 		if (context == NULL) {
 			DC_ERROR("Failed to allocate new validate context!\n");
 			return false;
 		}
 
-		dc_resource_state_copy_construct(
-				dc->current_state, context);
-
 		/* For each full update, remove all existing phantom pipes first.
 		 * Ensures that we have enough pipes for newly added MPO planes
 		 */
-		if (dc->res_pool->funcs->remove_phantom_pipes)
-			dc->res_pool->funcs->remove_phantom_pipes(dc, context, false);
+		dc_state_remove_phantom_streams_and_planes(dc, context);
+		dc_state_release_phantom_streams_and_planes(dc, context);
 
 		/*remove old surfaces from context */
-		if (!dc_rem_all_planes_for_stream(dc, stream, context)) {
+		if (!dc_state_rem_all_planes_for_stream(dc, stream, context)) {
 
 			BREAK_TO_DEBUGGER();
 			goto fail;
 		}
 
 		/* add surface to context */
-		if (!dc_add_all_planes_for_stream(dc, stream, new_planes, surface_count, context)) {
+		if (!dc_state_add_all_planes_for_stream(dc, stream, new_planes, surface_count, context)) {
 
 			BREAK_TO_DEBUGGER();
 			goto fail;
@@ -3157,19 +3057,6 @@ static bool update_planes_and_stream_state(struct dc *dc,
 
 	if (update_type == UPDATE_TYPE_FULL) {
 		if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false)) {
-			/* For phantom pipes we remove and create a new set of phantom pipes
-			 * for each full update (because we don't know if we'll need phantom
-			 * pipes until after the first round of validation). However, if validation
-			 * fails we need to keep the existing phantom pipes (because we don't update
-			 * the dc->current_state).
-			 *
-			 * The phantom stream/plane refcount is decremented for validation because
-			 * we assume it'll be removed (the free comes when the dc_state is freed),
-			 * but if validation fails we have to increment back the refcount so it's
-			 * consistent.
-			 */
-			if (dc->res_pool->funcs->retain_phantom_pipes)
-				dc->res_pool->funcs->retain_phantom_pipes(dc, dc->current_state);
 			BREAK_TO_DEBUGGER();
 			goto fail;
 		}
@@ -3190,7 +3077,7 @@ static bool update_planes_and_stream_state(struct dc *dc,
 	return true;
 
 fail:
-	dc_release_state(context);
+	dc_state_release(context);
 
 	return false;
 
@@ -3386,7 +3273,7 @@ void dc_dmub_update_dirty_rect(struct dc *dc,
 
 			update_dirty_rect->panel_inst = panel_inst;
 			update_dirty_rect->pipe_idx = j;
-			dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
+			dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
 		}
 	}
 }
@@ -3488,18 +3375,24 @@ static void commit_planes_for_stream_fast(struct dc *dc,
 {
 	int i, j;
 	struct pipe_ctx *top_pipe_to_program = NULL;
+	struct dc_stream_status *stream_status = NULL;
 	dc_z10_restore(dc);
 
 	top_pipe_to_program = resource_get_otg_master_for_stream(
 			&context->res_ctx,
 			stream);
 
-	if (dc->debug.visual_confirm) {
-		for (i = 0; i < dc->res_pool->pipe_count; i++) {
-			struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+	if (!top_pipe_to_program)
+		return;
+
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+		if (pipe->stream && pipe->plane_state) {
+			set_p_state_switch_method(dc, context, pipe);
 
-			if (pipe->stream && pipe->plane_state)
-				dc_update_viusal_confirm_color(dc, context, pipe);
+			if (dc->debug.visual_confirm)
+				dc_update_visual_confirm_color(dc, context, pipe);
 		}
 	}
 
@@ -3523,6 +3416,8 @@ static void commit_planes_for_stream_fast(struct dc *dc,
 		}
 	}
 
+	stream_status = dc_state_get_stream_status(context, stream);
+
 	build_dmub_cmd_list(dc,
 			srf_updates,
 			surface_count,
@@ -3535,7 +3430,8 @@ static void commit_planes_for_stream_fast(struct dc *dc,
 			context->dmub_cmd_count,
 			context->block_sequence,
 			&(context->block_sequence_steps),
-			top_pipe_to_program);
+			top_pipe_to_program,
+			stream_status);
 	hwss_execute_sequence(dc,
 			context->block_sequence,
 			context->block_sequence_steps);
@@ -3626,12 +3522,12 @@ static void commit_planes_for_stream(struct dc *dc,
 	top_pipe_to_program = resource_get_otg_master_for_stream(
 				&context->res_ctx,
 				stream);
-
+	ASSERT(top_pipe_to_program != NULL);
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
 
 		// Check old context for SubVP
-		subvp_prev_use |= (old_pipe->stream && old_pipe->stream->mall_stream_config.type == SUBVP_PHANTOM);
+		subvp_prev_use |= (dc_state_get_pipe_subvp_type(dc->current_state, old_pipe) == SUBVP_PHANTOM);
 		if (subvp_prev_use)
 			break;
 	}
@@ -3639,19 +3535,22 @@ static void commit_planes_for_stream(struct dc *dc,
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 
-		if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+		if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) {
 			subvp_curr_use = true;
 			break;
 		}
 	}
 
-	if (dc->debug.visual_confirm)
-		for (i = 0; i < dc->res_pool->pipe_count; i++) {
-			struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+		if (pipe->stream && pipe->plane_state) {
+			set_p_state_switch_method(dc, context, pipe);
 
-			if (pipe->stream && pipe->plane_state)
-				dc_update_viusal_confirm_color(dc, context, pipe);
+			if (dc->debug.visual_confirm)
+				dc_update_visual_confirm_color(dc, context, pipe);
 		}
+	}
 
 	if (stream->test_pattern.type != DP_TEST_PATTERN_VIDEO_MODE) {
 		struct pipe_ctx *mpcc_pipe;
@@ -3918,7 +3817,9 @@ static void commit_planes_for_stream(struct dc *dc,
 		 * programming has completed (we turn on phantom OTG in order
 		 * to complete the plane disable for phantom pipes).
 		 */
-		dc->hwss.apply_ctx_to_hw(dc, context);
+
+		if (dc->hwss.disable_phantom_streams)
+			dc->hwss.disable_phantom_streams(dc, context);
 	}
 
 	if (update_type != UPDATE_TYPE_FAST)
@@ -4024,7 +3925,7 @@ static bool could_mpcc_tree_change_for_active_pipes(struct dc *dc,
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
 
-		if (pipe->stream && pipe->stream->mall_stream_config.type != SUBVP_NONE) {
+		if (dc_state_get_pipe_subvp_type(dc->current_state, pipe) != SUBVP_NONE) {
 			subvp_active = true;
 			break;
 		}
@@ -4061,7 +3962,7 @@ struct pipe_split_policy_backup {
 static void release_minimal_transition_state(struct dc *dc,
 		struct dc_state *context, struct pipe_split_policy_backup *policy)
 {
-	dc_release_state(context);
+	dc_state_release(context);
 	/* restore previous pipe split and odm policy */
 	if (!dc->config.is_vmin_only_asic)
 		dc->debug.pipe_split_policy = policy->mpc_policy;
@@ -4072,7 +3973,7 @@ static void release_minimal_transition_state(struct dc *dc,
 static struct dc_state *create_minimal_transition_state(struct dc *dc,
 		struct dc_state *base_context, struct pipe_split_policy_backup *policy)
 {
-	struct dc_state *minimal_transition_context = dc_create_state(dc);
+	struct dc_state *minimal_transition_context = NULL;
 	unsigned int i, j;
 
 	if (!dc->config.is_vmin_only_asic) {
@@ -4084,7 +3985,9 @@ static struct dc_state *create_minimal_transition_state(struct dc *dc,
 	policy->subvp_policy = dc->debug.force_disable_subvp;
 	dc->debug.force_disable_subvp = true;
 
-	dc_resource_state_copy_construct(base_context, minimal_transition_context);
+	minimal_transition_context = dc_state_create_copy(base_context);
+	if (!minimal_transition_context)
+		return NULL;
 
 	/* commit minimal state */
 	if (dc->res_pool->funcs->validate_bandwidth(dc, minimal_transition_context, false)) {
@@ -4116,7 +4019,6 @@ static bool commit_minimal_transition_state_for_windowed_mpo_odm(struct dc *dc,
 	bool success = false;
 	struct dc_state *minimal_transition_context;
 	struct pipe_split_policy_backup policy;
-	struct mall_temp_config mall_temp_config;
 
 	/* commit based on new context */
 	/* Since all phantom pipes are removed in full validation,
@@ -4125,8 +4027,6 @@ static bool commit_minimal_transition_state_for_windowed_mpo_odm(struct dc *dc,
 	 * pipe as subvp/phantom will be cleared (dc copy constructor
 	 * creates a shallow copy).
 	 */
-	if (dc->res_pool->funcs->save_mall_state)
-		dc->res_pool->funcs->save_mall_state(dc, context, &mall_temp_config);
 	minimal_transition_context = create_minimal_transition_state(dc,
 			context, &policy);
 	if (minimal_transition_context) {
@@ -4139,16 +4039,6 @@ static bool commit_minimal_transition_state_for_windowed_mpo_odm(struct dc *dc,
 			success = dc_commit_state_no_check(dc, minimal_transition_context) == DC_OK;
 		}
 		release_minimal_transition_state(dc, minimal_transition_context, &policy);
-		if (dc->res_pool->funcs->restore_mall_state)
-			dc->res_pool->funcs->restore_mall_state(dc, context, &mall_temp_config);
-		/* If we do a minimal transition with plane removal and the context
-		 * has subvp we also have to retain back the phantom stream / planes
-		 * since the refcount is decremented as part of the min transition
-		 * (we commit a state with no subvp, so the phantom streams / planes
-		 * had to be removed).
-		 */
-		if (dc->res_pool->funcs->retain_phantom_pipes)
-			dc->res_pool->funcs->retain_phantom_pipes(dc, context);
 	}
 
 	if (!success) {
@@ -4216,7 +4106,7 @@ static bool commit_minimal_transition_state(struct dc *dc,
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
 
-		if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+		if (pipe->stream && dc_state_get_pipe_subvp_type(dc->current_state, pipe) == SUBVP_PHANTOM) {
 			subvp_in_use = true;
 			break;
 		}
@@ -4457,6 +4347,8 @@ static bool should_commit_minimal_transition_for_windowed_mpo_odm(struct dc *dc,
 
 	cur_pipe = resource_get_otg_master_for_stream(&dc->current_state->res_ctx, stream);
 	new_pipe = resource_get_otg_master_for_stream(&context->res_ctx, stream);
+	if (!cur_pipe || !new_pipe)
+		return false;
 	cur_is_odm_in_use = resource_get_odm_slice_count(cur_pipe) > 1;
 	new_is_odm_in_use = resource_get_odm_slice_count(new_pipe) > 1;
 	if (cur_is_odm_in_use == new_is_odm_in_use)
@@ -4482,7 +4374,6 @@ bool dc_update_planes_and_stream(struct dc *dc,
 	struct dc_state *context;
 	enum surface_update_type update_type;
 	int i;
-	struct mall_temp_config mall_temp_config;
 	struct dc_fast_update fast_update[MAX_SURFACES] = {0};
 
 	/* In cases where MPO and split or ODM are used transitions can
@@ -4526,23 +4417,10 @@ bool dc_update_planes_and_stream(struct dc *dc,
 		 * pipe as subvp/phantom will be cleared (dc copy constructor
 		 * creates a shallow copy).
 		 */
-		if (dc->res_pool->funcs->save_mall_state)
-			dc->res_pool->funcs->save_mall_state(dc, context, &mall_temp_config);
 		if (!commit_minimal_transition_state(dc, context)) {
-			dc_release_state(context);
+			dc_state_release(context);
 			return false;
 		}
-		if (dc->res_pool->funcs->restore_mall_state)
-			dc->res_pool->funcs->restore_mall_state(dc, context, &mall_temp_config);
-
-		/* If we do a minimal transition with plane removal and the context
-		 * has subvp we also have to retain back the phantom stream / planes
-		 * since the refcount is decremented as part of the min transition
-		 * (we commit a state with no subvp, so the phantom streams / planes
-		 * had to be removed).
-		 */
-		if (dc->res_pool->funcs->retain_phantom_pipes)
-			dc->res_pool->funcs->retain_phantom_pipes(dc, context);
 		update_type = UPDATE_TYPE_FULL;
 	}
 
@@ -4599,7 +4477,7 @@ bool dc_update_planes_and_stream(struct dc *dc,
 		struct dc_state *old = dc->current_state;
 
 		dc->current_state = context;
-		dc_release_state(old);
+		dc_state_release(old);
 
 		// clear any forced full updates
 		for (i = 0; i < dc->res_pool->pipe_count; i++) {
@@ -4658,14 +4536,12 @@ void dc_commit_updates_for_stream(struct dc *dc,
 	if (update_type >= UPDATE_TYPE_FULL) {
 
 		/* initialize scratch memory for building context */
-		context = dc_create_state(dc);
+		context = dc_state_create_copy(state);
 		if (context == NULL) {
 			DC_ERROR("Failed to allocate new validate context!\n");
 			return;
 		}
 
-		dc_resource_state_copy_construct(state, context);
-
 		for (i = 0; i < dc->res_pool->pipe_count; i++) {
 			struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i];
 			struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
@@ -4704,7 +4580,7 @@ void dc_commit_updates_for_stream(struct dc *dc,
 	if (update_type >= UPDATE_TYPE_FULL) {
 		if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false)) {
 			DC_ERROR("Mode validation failed for stream update!\n");
-			dc_release_state(context);
+			dc_state_release(context);
 			return;
 		}
 	}
@@ -4737,7 +4613,7 @@ void dc_commit_updates_for_stream(struct dc *dc,
 		struct dc_state *old = dc->current_state;
 
 		dc->current_state = context;
-		dc_release_state(old);
+		dc_state_release(old);
 
 		for (i = 0; i < dc->res_pool->pipe_count; i++) {
 			struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
@@ -4810,7 +4686,7 @@ void dc_set_power_state(
 
 	switch (power_state) {
 	case DC_ACPI_CM_POWER_STATE_D0:
-		dc_resource_state_construct(dc, dc->current_state);
+		dc_state_construct(dc, dc->current_state);
 
 		dc_z10_restore(dc);
 
@@ -4825,7 +4701,7 @@ void dc_set_power_state(
 	default:
 		ASSERT(dc->current_state->stream_count == 0);
 
-		dc_resource_state_destruct(dc->current_state);
+		dc_state_destruct(dc->current_state);
 
 		break;
 	}
@@ -4902,6 +4778,38 @@ bool dc_set_psr_allow_active(struct dc *dc, bool enable)
 	return true;
 }
 
+/* enable/disable eDP Replay without specify stream for eDP */
+bool dc_set_replay_allow_active(struct dc *dc, bool active)
+{
+	int i;
+	bool allow_active;
+
+	for (i = 0; i < dc->current_state->stream_count; i++) {
+		struct dc_link *link;
+		struct dc_stream_state *stream = dc->current_state->streams[i];
+
+		link = stream->link;
+		if (!link)
+			continue;
+
+		if (link->replay_settings.replay_feature_enabled) {
+			if (active && !link->replay_settings.replay_allow_active) {
+				allow_active = true;
+				if (!dc_link_set_replay_allow_active(link, &allow_active,
+					false, false, NULL))
+					return false;
+			} else if (!active && link->replay_settings.replay_allow_active) {
+				allow_active = false;
+				if (!dc_link_set_replay_allow_active(link, &allow_active,
+					true, false, NULL))
+					return false;
+			}
+		}
+	}
+
+	return true;
+}
+
 void dc_allow_idle_optimizations(struct dc *dc, bool allow)
 {
 	if (dc->debug.disable_idle_power_optimizations)
@@ -5095,18 +5003,28 @@ void dc_mclk_switch_using_fw_based_vblank_stretch_shut_down(struct dc *dc)
  */
 bool dc_is_dmub_outbox_supported(struct dc *dc)
 {
-	/* DCN31 B0 USB4 DPIA needs dmub notifications for interrupts */
-	if (dc->ctx->asic_id.chip_family == FAMILY_YELLOW_CARP &&
-	    dc->ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0 &&
-	    !dc->debug.dpia_debug.bits.disable_dpia)
-		return true;
+	switch (dc->ctx->asic_id.chip_family) {
 
-	if (dc->ctx->asic_id.chip_family == AMDGPU_FAMILY_GC_11_0_1 &&
-	    !dc->debug.dpia_debug.bits.disable_dpia)
-		return true;
+	case FAMILY_YELLOW_CARP:
+		/* DCN31 B0 USB4 DPIA needs dmub notifications for interrupts */
+		if (dc->ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0 &&
+		    !dc->debug.dpia_debug.bits.disable_dpia)
+			return true;
+	break;
+
+	case AMDGPU_FAMILY_GC_11_0_1:
+	case AMDGPU_FAMILY_GC_11_5_0:
+		if (!dc->debug.dpia_debug.bits.disable_dpia)
+			return true;
+	break;
+
+	default:
+		break;
+	}
 
 	/* dmub aux needs dmub notifications to be enabled */
 	return dc->debug.enable_dmub_aux_for_legacy_ddc;
+
 }
 
 /**
@@ -5203,7 +5121,7 @@ bool dc_process_dmub_aux_transfer_async(struct dc *dc,
 			);
 	}
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	return true;
 }
@@ -5257,7 +5175,7 @@ bool dc_process_dmub_set_config_async(struct dc *dc,
 	cmd.set_config_access.set_config_control.cmd_pkt.msg_type = payload->msg_type;
 	cmd.set_config_access.set_config_control.cmd_pkt.msg_data = payload->msg_data;
 
-	if (!dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) {
+	if (!dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) {
 		/* command is not processed by dmub */
 		notify->sc_status = SET_CONFIG_UNKNOWN_ERROR;
 		return is_cmd_complete;
@@ -5300,7 +5218,7 @@ enum dc_status dc_process_dmub_set_mst_slots(const struct dc *dc,
 	cmd.set_mst_alloc_slots.mst_slots_control.instance = dc->links[link_index]->ddc_hw_inst;
 	cmd.set_mst_alloc_slots.mst_slots_control.mst_alloc_slots = mst_alloc_slots;
 
-	if (!dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
+	if (!dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
 		/* command is not processed by dmub */
 		return DC_ERROR_UNEXPECTED;
 
@@ -5338,7 +5256,7 @@ void dc_process_dmub_dpia_hpd_int_enable(const struct dc *dc,
 	cmd.dpia_hpd_int_enable.header.type = DMUB_CMD__DPIA_HPD_INT_ENABLE;
 	cmd.dpia_hpd_int_enable.enable = hpd_int_enable;
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	DC_LOG_DEBUG("%s: hpd_int_enable(%d)\n", __func__, hpd_int_enable);
 }
@@ -5437,6 +5355,8 @@ bool dc_abm_save_restore(
 	struct dc_link *link = stream->sink->link;
 	struct dc_link *edp_links[MAX_NUM_EDP];
 
+	if (link->replay_settings.replay_feature_enabled)
+		return false;
 
 	/*find primary pipe associated with stream*/
 	for (i = 0; i < MAX_PIPES; i++) {
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
index fe07160932d6..9c05b1a07142 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
@@ -31,6 +31,7 @@
 #include "basics/dc_common.h"
 #include "resource.h"
 #include "dc_dmub_srv.h"
+#include "dc_state_priv.h"
 
 #define NUM_ELEMENTS(a) (sizeof(a) / sizeof((a)[0]))
 
@@ -425,45 +426,130 @@ void get_hdr_visual_confirm_color(
 }
 
 void get_subvp_visual_confirm_color(
-		struct dc *dc,
-		struct dc_state *context,
 		struct pipe_ctx *pipe_ctx,
 		struct tg_color *color)
 {
 	uint32_t color_value = MAX_TG_COLOR_VALUE;
-	bool enable_subvp = false;
-	int i;
-
-	if (!dc->ctx || !dc->ctx->dmub_srv || !pipe_ctx || !context)
-		return;
+	if (pipe_ctx) {
+		switch (pipe_ctx->p_state_type) {
+		case P_STATE_SUB_VP:
+			color->color_r_cr = color_value;
+			color->color_g_y  = 0;
+			color->color_b_cb = 0;
+			break;
+		case P_STATE_DRR_SUB_VP:
+			color->color_r_cr = 0;
+			color->color_g_y  = color_value;
+			color->color_b_cb = 0;
+			break;
+		case P_STATE_V_BLANK_SUB_VP:
+			color->color_r_cr = 0;
+			color->color_g_y  = 0;
+			color->color_b_cb = color_value;
+			break;
+		default:
+			break;
+		}
+	}
+}
 
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+void get_mclk_switch_visual_confirm_color(
+		struct pipe_ctx *pipe_ctx,
+		struct tg_color *color)
+{
+	uint32_t color_value = MAX_TG_COLOR_VALUE;
 
-		if (pipe->stream && pipe->stream->mall_stream_config.paired_stream &&
-		    pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
-			/* SubVP enable - red */
-			color->color_g_y = 0;
+	if (pipe_ctx) {
+		switch (pipe_ctx->p_state_type) {
+		case P_STATE_V_BLANK:
+			color->color_r_cr = color_value;
+			color->color_g_y = color_value;
 			color->color_b_cb = 0;
+			break;
+		case P_STATE_FPO:
+			color->color_r_cr = 0;
+			color->color_g_y  = color_value;
+			color->color_b_cb = color_value;
+			break;
+		case P_STATE_V_ACTIVE:
 			color->color_r_cr = color_value;
-			enable_subvp = true;
-
-			if (pipe_ctx->stream == pipe->stream)
-				return;
+			color->color_g_y  = 0;
+			color->color_b_cb = color_value;
+			break;
+		case P_STATE_SUB_VP:
+			color->color_r_cr = color_value;
+			color->color_g_y  = 0;
+			color->color_b_cb = 0;
+			break;
+		case P_STATE_DRR_SUB_VP:
+			color->color_r_cr = 0;
+			color->color_g_y  = color_value;
+			color->color_b_cb = 0;
+			break;
+		case P_STATE_V_BLANK_SUB_VP:
+			color->color_r_cr = 0;
+			color->color_g_y  = 0;
+			color->color_b_cb = color_value;
+			break;
+		default:
 			break;
 		}
 	}
+}
 
-	if (enable_subvp && pipe_ctx->stream->mall_stream_config.type == SUBVP_NONE) {
-		color->color_r_cr = 0;
-		if (pipe_ctx->stream->allow_freesync == 1) {
-			/* SubVP enable and DRR on - green */
-			color->color_b_cb = 0;
-			color->color_g_y = color_value;
+void set_p_state_switch_method(
+		struct dc *dc,
+		struct dc_state *context,
+		struct pipe_ctx *pipe_ctx)
+{
+	struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
+	bool enable_subvp;
+
+	if (!dc->ctx || !dc->ctx->dmub_srv || !pipe_ctx || !vba || !context)
+		return;
+
+	if (vba->DRAMClockChangeSupport[vba->VoltageLevel][vba->maxMpcComb] !=
+			dm_dram_clock_change_unsupported) {
+		/* MCLK switching is supported */
+		if (!pipe_ctx->has_vactive_margin) {
+			/* In Vblank - yellow */
+			pipe_ctx->p_state_type = P_STATE_V_BLANK;
+
+			if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) {
+				/* FPO + Vblank - cyan */
+				pipe_ctx->p_state_type = P_STATE_FPO;
+			}
 		} else {
-			/* SubVP enable and No DRR - blue */
-			color->color_g_y = 0;
-			color->color_b_cb = color_value;
+			/* In Vactive - pink */
+			pipe_ctx->p_state_type = P_STATE_V_ACTIVE;
+		}
+
+		/* SubVP */
+		enable_subvp = false;
+
+		for (int i = 0; i < dc->res_pool->pipe_count; i++) {
+			struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+			if (pipe->stream && dc_state_get_paired_subvp_stream(context, pipe->stream) &&
+					dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) {
+				/* SubVP enable - red */
+				pipe_ctx->p_state_type = P_STATE_SUB_VP;
+				enable_subvp = true;
+
+				if (pipe_ctx->stream == pipe->stream)
+					return;
+				break;
+			}
+		}
+
+		if (enable_subvp && dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_NONE) {
+			if (pipe_ctx->stream->allow_freesync == 1) {
+				/* SubVP enable and DRR on - green */
+				pipe_ctx->p_state_type = P_STATE_DRR_SUB_VP;
+			} else {
+				/* SubVP enable and No DRR - blue */
+				pipe_ctx->p_state_type = P_STATE_V_BLANK_SUB_VP;
+			}
 		}
 	}
 }
@@ -473,7 +559,8 @@ void hwss_build_fast_sequence(struct dc *dc,
 		unsigned int dmub_cmd_count,
 		struct block_sequence block_sequence[],
 		int *num_steps,
-		struct pipe_ctx *pipe_ctx)
+		struct pipe_ctx *pipe_ctx,
+		struct dc_stream_status *stream_status)
 {
 	struct dc_plane_state *plane = pipe_ctx->plane_state;
 	struct dc_stream_state *stream = pipe_ctx->stream;
@@ -490,7 +577,8 @@ void hwss_build_fast_sequence(struct dc *dc,
 	if (dc->hwss.subvp_pipe_control_lock_fast) {
 		block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.dc = dc;
 		block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.lock = true;
-		block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.pipe_ctx = pipe_ctx;
+		block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.subvp_immediate_flip =
+				plane->flip_immediate && stream_status->mall_stream_config.type == SUBVP_MAIN;
 		block_sequence[*num_steps].func = DMUB_SUBVP_PIPE_CONTROL_LOCK_FAST;
 		(*num_steps)++;
 	}
@@ -529,7 +617,7 @@ void hwss_build_fast_sequence(struct dc *dc,
 			}
 			if (dc->hwss.update_plane_addr && current_mpc_pipe->plane_state->update_flags.bits.addr_update) {
 				if (resource_is_pipe_type(current_mpc_pipe, OTG_MASTER) &&
-						current_mpc_pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
+						stream_status->mall_stream_config.type == SUBVP_MAIN) {
 					block_sequence[*num_steps].params.subvp_save_surf_addr.dc_dmub_srv = dc->ctx->dmub_srv;
 					block_sequence[*num_steps].params.subvp_save_surf_addr.addr = &current_mpc_pipe->plane_state->address;
 					block_sequence[*num_steps].params.subvp_save_surf_addr.subvp_index = current_mpc_pipe->subvp_index;
@@ -612,7 +700,8 @@ void hwss_build_fast_sequence(struct dc *dc,
 	if (dc->hwss.subvp_pipe_control_lock_fast) {
 		block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.dc = dc;
 		block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.lock = false;
-		block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.pipe_ctx = pipe_ctx;
+		block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.subvp_immediate_flip =
+				plane->flip_immediate && stream_status->mall_stream_config.type == SUBVP_MAIN;
 		block_sequence[*num_steps].func = DMUB_SUBVP_PIPE_CONTROL_LOCK_FAST;
 		(*num_steps)++;
 	}
@@ -724,7 +813,7 @@ void hwss_send_dmcub_cmd(union block_sequence_params *params)
 	union dmub_rb_cmd *cmd = params->send_dmcub_cmd_params.cmd;
 	enum dm_dmub_wait_type wait_type = params->send_dmcub_cmd_params.wait_type;
 
-	dm_execute_dmub_cmd(ctx, cmd, wait_type);
+	dc_wake_and_execute_dmub_cmd(ctx, cmd, wait_type);
 }
 
 void hwss_program_manual_trigger(union block_sequence_params *params)
@@ -812,42 +901,6 @@ void hwss_subvp_save_surf_addr(union block_sequence_params *params)
 	dc_dmub_srv_subvp_save_surf_addr(dc_dmub_srv, addr, subvp_index);
 }
 
-void get_mclk_switch_visual_confirm_color(
-		struct dc *dc,
-		struct dc_state *context,
-		struct pipe_ctx *pipe_ctx,
-		struct tg_color *color)
-{
-	uint32_t color_value = MAX_TG_COLOR_VALUE;
-	struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
-
-	if (!dc->ctx || !dc->ctx->dmub_srv || !pipe_ctx || !vba || !context)
-		return;
-
-	if (vba->DRAMClockChangeSupport[vba->VoltageLevel][vba->maxMpcComb] !=
-			dm_dram_clock_change_unsupported) {
-		/* MCLK switching is supported */
-		if (!pipe_ctx->has_vactive_margin) {
-			/* In Vblank - yellow */
-			color->color_r_cr = color_value;
-			color->color_g_y = color_value;
-
-			if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) {
-				/* FPO + Vblank - cyan */
-				color->color_r_cr = 0;
-				color->color_g_y  = color_value;
-				color->color_b_cb = color_value;
-			}
-		} else {
-			/* In Vactive - pink */
-			color->color_r_cr = color_value;
-			color->color_b_cb = color_value;
-		}
-		/* SubVP */
-		get_subvp_visual_confirm_color(dc, context, pipe_ctx, color);
-	}
-}
-
 void get_surface_tile_visual_confirm_color(
 		struct pipe_ctx *pipe_ctx,
 		struct tg_color *color)
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
index ed94187c2afa..c6c35037bdb8 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
@@ -467,6 +467,13 @@ bool dc_link_setup_psr(struct dc_link *link,
 	return link->dc->link_srv->edp_setup_psr(link, stream, psr_config, psr_context);
 }
 
+bool dc_link_set_replay_allow_active(struct dc_link *link, const bool *allow_active,
+		bool wait, bool force_static, const unsigned int *power_opts)
+{
+	return link->dc->link_srv->edp_set_replay_allow_active(link, allow_active, wait,
+			force_static, power_opts);
+}
+
 bool dc_link_get_replay_state(const struct dc_link *link, uint64_t *state)
 {
 	return link->dc->link_srv->edp_get_replay_state(link, state);
@@ -497,7 +504,7 @@ void dc_link_enable_hpd_filter(struct dc_link *link, bool enable)
 	link->dc->link_srv->enable_hpd_filter(link, enable);
 }
 
-bool dc_link_validate(struct dc *dc, const struct dc_stream_state *streams, const unsigned int count)
+bool dc_link_dp_dpia_validate(struct dc *dc, const struct dc_stream_state *streams, const unsigned int count)
 {
 	return dc->link_srv->validate_dpia_bandwidth(streams, count);
 }
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index a1f1d1003992..9fbdb09697fd 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -42,6 +42,7 @@
 #include "link_enc_cfg.h"
 #include "link.h"
 #include "clk_mgr.h"
+#include "dc_state_priv.h"
 #include "virtual/virtual_link_hwss.h"
 #include "link/hwss/link_hwss_dio.h"
 #include "link/hwss/link_hwss_dpia.h"
@@ -69,8 +70,8 @@
 #include "dcn314/dcn314_resource.h"
 #include "dcn315/dcn315_resource.h"
 #include "dcn316/dcn316_resource.h"
-#include "../dcn32/dcn32_resource.h"
-#include "../dcn321/dcn321_resource.h"
+#include "dcn32/dcn32_resource.h"
+#include "dcn321/dcn321_resource.h"
 #include "dcn35/dcn35_resource.h"
 
 #define VISUAL_CONFIRM_BASE_DEFAULT 3
@@ -1764,6 +1765,29 @@ int recource_find_free_pipe_not_used_in_cur_res_ctx(
 	return free_pipe_idx;
 }
 
+int recource_find_free_pipe_used_as_otg_master_in_cur_res_ctx(
+		const struct resource_context *cur_res_ctx,
+		struct resource_context *new_res_ctx,
+		const struct resource_pool *pool)
+{
+	int free_pipe_idx = FREE_PIPE_INDEX_NOT_FOUND;
+	const struct pipe_ctx *new_pipe, *cur_pipe;
+	int i;
+
+	for (i = 0; i < pool->pipe_count; i++) {
+		cur_pipe = &cur_res_ctx->pipe_ctx[i];
+		new_pipe = &new_res_ctx->pipe_ctx[i];
+
+		if (resource_is_pipe_type(cur_pipe, OTG_MASTER) &&
+				resource_is_pipe_type(new_pipe, FREE_PIPE)) {
+			free_pipe_idx = i;
+			break;
+		}
+	}
+
+	return free_pipe_idx;
+}
+
 int resource_find_free_pipe_used_as_cur_sec_dpp_in_mpcc_combine(
 		const struct resource_context *cur_res_ctx,
 		struct resource_context *new_res_ctx,
@@ -2170,6 +2194,10 @@ void resource_log_pipe_topology_update(struct dc *dc, struct dc_state *state)
 	for (stream_idx = 0; stream_idx < state->stream_count; stream_idx++) {
 		otg_master = resource_get_otg_master_for_stream(
 				&state->res_ctx, state->streams[stream_idx]);
+		if (!otg_master	|| otg_master->stream_res.tg == NULL) {
+			DC_LOG_DC("topology update: otg_master NULL stream_idx %d!\n", stream_idx);
+			return;
+		}
 		slice_count = resource_get_opp_heads_for_otg_master(otg_master,
 				&state->res_ctx, opp_heads);
 		for (slice_idx = 0; slice_idx < slice_count; slice_idx++) {
@@ -2233,7 +2261,7 @@ static struct pipe_ctx *get_last_dpp_pipe_in_mpcc_combine(
 }
 
 static bool update_pipe_params_after_odm_slice_count_change(
-		const struct dc_stream_state *stream,
+		struct pipe_ctx *otg_master,
 		struct dc_state *context,
 		const struct resource_pool *pool)
 {
@@ -2243,9 +2271,12 @@ static bool update_pipe_params_after_odm_slice_count_change(
 
 	for (i = 0; i < pool->pipe_count && result; i++) {
 		pipe = &context->res_ctx.pipe_ctx[i];
-		if (pipe->stream == stream && pipe->plane_state)
+		if (pipe->stream == otg_master->stream && pipe->plane_state)
 			result = resource_build_scaling_params(pipe);
 	}
+
+	if (pool->funcs->build_pipe_pix_clk_params)
+		pool->funcs->build_pipe_pix_clk_params(otg_master);
 	return result;
 }
 
@@ -2433,6 +2464,9 @@ void resource_remove_otg_master_for_stream_output(struct dc_state *context,
 	struct pipe_ctx *otg_master = resource_get_otg_master_for_stream(
 			&context->res_ctx, stream);
 
+	if (!otg_master)
+		return;
+
 	ASSERT(resource_get_odm_slice_count(otg_master) == 1);
 	ASSERT(otg_master->plane_state == NULL);
 	ASSERT(otg_master->stream_res.stream_enc);
@@ -2928,7 +2962,7 @@ bool resource_update_pipes_for_stream_with_slice_count(
 					otg_master, new_ctx, pool);
 	if (result)
 		result = update_pipe_params_after_odm_slice_count_change(
-				otg_master->stream, new_ctx, pool);
+				otg_master, new_ctx, pool);
 	return result;
 }
 
@@ -2967,189 +3001,6 @@ bool resource_update_pipes_for_plane_with_slice_count(
 	return result;
 }
 
-bool dc_add_plane_to_context(
-		const struct dc *dc,
-		struct dc_stream_state *stream,
-		struct dc_plane_state *plane_state,
-		struct dc_state *context)
-{
-	struct resource_pool *pool = dc->res_pool;
-	struct pipe_ctx *otg_master_pipe;
-	struct dc_stream_status *stream_status = NULL;
-	bool added = false;
-
-	stream_status = dc_stream_get_status_from_state(context, stream);
-	if (stream_status == NULL) {
-		dm_error("Existing stream not found; failed to attach surface!\n");
-		goto out;
-	} else if (stream_status->plane_count == MAX_SURFACE_NUM) {
-		dm_error("Surface: can not attach plane_state %p! Maximum is: %d\n",
-				plane_state, MAX_SURFACE_NUM);
-		goto out;
-	}
-
-	otg_master_pipe = resource_get_otg_master_for_stream(
-			&context->res_ctx, stream);
-	added = resource_append_dpp_pipes_for_plane_composition(context,
-			dc->current_state, pool, otg_master_pipe, plane_state);
-
-	if (added) {
-		stream_status->plane_states[stream_status->plane_count] =
-				plane_state;
-		stream_status->plane_count++;
-		dc_plane_state_retain(plane_state);
-	}
-
-out:
-	return added;
-}
-
-bool dc_remove_plane_from_context(
-		const struct dc *dc,
-		struct dc_stream_state *stream,
-		struct dc_plane_state *plane_state,
-		struct dc_state *context)
-{
-	int i;
-	struct dc_stream_status *stream_status = NULL;
-	struct resource_pool *pool = dc->res_pool;
-
-	if (!plane_state)
-		return true;
-
-	for (i = 0; i < context->stream_count; i++)
-		if (context->streams[i] == stream) {
-			stream_status = &context->stream_status[i];
-			break;
-		}
-
-	if (stream_status == NULL) {
-		dm_error("Existing stream not found; failed to remove plane.\n");
-		return false;
-	}
-
-	resource_remove_dpp_pipes_for_plane_composition(
-			context, pool, plane_state);
-
-	for (i = 0; i < stream_status->plane_count; i++) {
-		if (stream_status->plane_states[i] == plane_state) {
-			dc_plane_state_release(stream_status->plane_states[i]);
-			break;
-		}
-	}
-
-	if (i == stream_status->plane_count) {
-		dm_error("Existing plane_state not found; failed to detach it!\n");
-		return false;
-	}
-
-	stream_status->plane_count--;
-
-	/* Start at the plane we've just released, and move all the planes one index forward to "trim" the array */
-	for (; i < stream_status->plane_count; i++)
-		stream_status->plane_states[i] = stream_status->plane_states[i + 1];
-
-	stream_status->plane_states[stream_status->plane_count] = NULL;
-
-	if (stream_status->plane_count == 0 && dc->config.enable_windowed_mpo_odm)
-		/* ODM combine could prevent us from supporting more planes
-		 * we will reset ODM slice count back to 1 when all planes have
-		 * been removed to maximize the amount of planes supported when
-		 * new planes are added.
-		 */
-		resource_update_pipes_for_stream_with_slice_count(
-				context, dc->current_state, dc->res_pool, stream, 1);
-
-	return true;
-}
-
-/**
- * dc_rem_all_planes_for_stream - Remove planes attached to the target stream.
- *
- * @dc: Current dc state.
- * @stream: Target stream, which we want to remove the attached plans.
- * @context: New context.
- *
- * Return:
- * Return true if DC was able to remove all planes from the target
- * stream, otherwise, return false.
- */
-bool dc_rem_all_planes_for_stream(
-		const struct dc *dc,
-		struct dc_stream_state *stream,
-		struct dc_state *context)
-{
-	int i, old_plane_count;
-	struct dc_stream_status *stream_status = NULL;
-	struct dc_plane_state *del_planes[MAX_SURFACE_NUM] = { 0 };
-
-	for (i = 0; i < context->stream_count; i++)
-			if (context->streams[i] == stream) {
-				stream_status = &context->stream_status[i];
-				break;
-			}
-
-	if (stream_status == NULL) {
-		dm_error("Existing stream %p not found!\n", stream);
-		return false;
-	}
-
-	old_plane_count = stream_status->plane_count;
-
-	for (i = 0; i < old_plane_count; i++)
-		del_planes[i] = stream_status->plane_states[i];
-
-	for (i = 0; i < old_plane_count; i++)
-		if (!dc_remove_plane_from_context(dc, stream, del_planes[i], context))
-			return false;
-
-	return true;
-}
-
-static bool add_all_planes_for_stream(
-		const struct dc *dc,
-		struct dc_stream_state *stream,
-		const struct dc_validation_set set[],
-		int set_count,
-		struct dc_state *context)
-{
-	int i, j;
-
-	for (i = 0; i < set_count; i++)
-		if (set[i].stream == stream)
-			break;
-
-	if (i == set_count) {
-		dm_error("Stream %p not found in set!\n", stream);
-		return false;
-	}
-
-	for (j = 0; j < set[i].plane_count; j++)
-		if (!dc_add_plane_to_context(dc, stream, set[i].plane_states[j], context))
-			return false;
-
-	return true;
-}
-
-bool dc_add_all_planes_for_stream(
-		const struct dc *dc,
-		struct dc_stream_state *stream,
-		struct dc_plane_state * const *plane_states,
-		int plane_count,
-		struct dc_state *context)
-{
-	struct dc_validation_set set;
-	int i;
-
-	set.stream = stream;
-	set.plane_count = plane_count;
-
-	for (i = 0; i < plane_count; i++)
-		set.plane_states[i] = plane_states[i];
-
-	return add_all_planes_for_stream(dc, stream, &set, 1, context);
-}
-
 bool dc_is_timing_changed(struct dc_stream_state *cur_stream,
 		       struct dc_stream_state *new_stream)
 {
@@ -3301,84 +3152,6 @@ static struct audio *find_first_free_audio(
 	return NULL;
 }
 
-/*
- * dc_add_stream_to_ctx() - Add a new dc_stream_state to a dc_state.
- */
-enum dc_status dc_add_stream_to_ctx(
-		struct dc *dc,
-		struct dc_state *new_ctx,
-		struct dc_stream_state *stream)
-{
-	enum dc_status res;
-	DC_LOGGER_INIT(dc->ctx->logger);
-
-	if (new_ctx->stream_count >= dc->res_pool->timing_generator_count) {
-		DC_LOG_WARNING("Max streams reached, can't add stream %p !\n", stream);
-		return DC_ERROR_UNEXPECTED;
-	}
-
-	new_ctx->streams[new_ctx->stream_count] = stream;
-	dc_stream_retain(stream);
-	new_ctx->stream_count++;
-
-	res = resource_add_otg_master_for_stream_output(
-			new_ctx, dc->res_pool, stream);
-	if (res != DC_OK)
-		DC_LOG_WARNING("Adding stream %p to context failed with err %d!\n", stream, res);
-
-	return res;
-}
-
-/*
- * dc_remove_stream_from_ctx() - Remove a stream from a dc_state.
- */
-enum dc_status dc_remove_stream_from_ctx(
-			struct dc *dc,
-			struct dc_state *new_ctx,
-			struct dc_stream_state *stream)
-{
-	int i;
-	struct dc_context *dc_ctx = dc->ctx;
-	struct pipe_ctx *del_pipe = resource_get_otg_master_for_stream(
-			&new_ctx->res_ctx, stream);
-
-	if (!del_pipe) {
-		DC_ERROR("Pipe not found for stream %p !\n", stream);
-		return DC_ERROR_UNEXPECTED;
-	}
-
-	resource_update_pipes_for_stream_with_slice_count(new_ctx,
-			dc->current_state, dc->res_pool, stream, 1);
-	resource_remove_otg_master_for_stream_output(
-			new_ctx, dc->res_pool, stream);
-
-	for (i = 0; i < new_ctx->stream_count; i++)
-		if (new_ctx->streams[i] == stream)
-			break;
-
-	if (new_ctx->streams[i] != stream) {
-		DC_ERROR("Context doesn't have stream %p !\n", stream);
-		return DC_ERROR_UNEXPECTED;
-	}
-
-	dc_stream_release(new_ctx->streams[i]);
-	new_ctx->stream_count--;
-
-	/* Trim back arrays */
-	for (; i < new_ctx->stream_count; i++) {
-		new_ctx->streams[i] = new_ctx->streams[i + 1];
-		new_ctx->stream_status[i] = new_ctx->stream_status[i + 1];
-	}
-
-	new_ctx->streams[new_ctx->stream_count] = NULL;
-	memset(
-			&new_ctx->stream_status[new_ctx->stream_count],
-			0,
-			sizeof(new_ctx->stream_status[0]));
-
-	return DC_OK;
-}
-
 static struct dc_stream_state *find_pll_sharable_stream(
 		struct dc_stream_state *stream_needs_pll,
 		struct dc_state *context)
@@ -3586,6 +3359,7 @@ static void mark_seamless_boot_stream(
  *       |________|_______________|___________|_____________|
  */
 static bool acquire_otg_master_pipe_for_stream(
+		const struct dc_state *cur_ctx,
 		struct dc_state *new_ctx,
 		const struct resource_pool *pool,
 		struct dc_stream_state *stream)
@@ -3599,7 +3373,22 @@ static bool acquire_otg_master_pipe_for_stream(
 	int pipe_idx;
 	struct pipe_ctx *pipe_ctx = NULL;
 
-	pipe_idx = resource_find_any_free_pipe(&new_ctx->res_ctx, pool);
+	/*
+	 * Upper level code is responsible to optimize unnecessary addition and
+	 * removal for unchanged streams. So unchanged stream will keep the same
+	 * OTG master instance allocated. When current stream is removed and a
+	 * new stream is added, we want to reuse the OTG instance made available
+	 * by the removed stream first. If not found, we try to avoid of using
+	 * any free pipes already used in current context as this could tear
+	 * down exiting ODM/MPC/MPO configuration unnecessarily.
+	 */
+	pipe_idx = recource_find_free_pipe_used_as_otg_master_in_cur_res_ctx(
+			&cur_ctx->res_ctx, &new_ctx->res_ctx, pool);
+	if (pipe_idx == FREE_PIPE_INDEX_NOT_FOUND)
+		pipe_idx = recource_find_free_pipe_not_used_in_cur_res_ctx(
+				&cur_ctx->res_ctx, &new_ctx->res_ctx, pool);
+	if (pipe_idx == FREE_PIPE_INDEX_NOT_FOUND)
+		pipe_idx = resource_find_any_free_pipe(&new_ctx->res_ctx, pool);
 	if (pipe_idx != FREE_PIPE_INDEX_NOT_FOUND) {
 		pipe_ctx = &new_ctx->res_ctx.pipe_ctx[pipe_idx];
 		memset(pipe_ctx, 0, sizeof(*pipe_ctx));
@@ -3659,7 +3448,7 @@ enum dc_status resource_map_pool_resources(
 
 	if (!acquired)
 		/* acquire new resources */
-		acquired = acquire_otg_master_pipe_for_stream(
+		acquired = acquire_otg_master_pipe_for_stream(dc->current_state,
 				context, pool, stream);
 
 	pipe_ctx = resource_get_otg_master_for_stream(&context->res_ctx, stream);
@@ -3742,34 +3531,6 @@ enum dc_status resource_map_pool_resources(
 	return DC_ERROR_UNEXPECTED;
 }
 
-/**
- * dc_resource_state_copy_construct_current() - Creates a new dc_state from existing state
- *
- * @dc: copy out of dc->current_state
- * @dst_ctx: copy into this
- *
- * This function makes a shallow copy of the current DC state and increments
- * refcounts on existing streams and planes.
- */
-void dc_resource_state_copy_construct_current(
-		const struct dc *dc,
-		struct dc_state *dst_ctx)
-{
-	dc_resource_state_copy_construct(dc->current_state, dst_ctx);
-}
-
-
-void dc_resource_state_construct(
-		const struct dc *dc,
-		struct dc_state *dst_ctx)
-{
-	dst_ctx->clk_mgr = dc->clk_mgr;
-
-	/* Initialise DIG link encoder resource tracking variables. */
-	link_enc_cfg_init(dc, dst_ctx);
-}
-
-
 bool dc_resource_is_dsc_encoding_supported(const struct dc *dc)
 {
 	if (dc->res_pool == NULL)
@@ -3813,6 +3574,31 @@ static bool planes_changed_for_existing_stream(struct dc_state *context,
 	return false;
 }
 
+static bool add_all_planes_for_stream(
+		const struct dc *dc,
+		struct dc_stream_state *stream,
+		const struct dc_validation_set set[],
+		int set_count,
+		struct dc_state *state)
+{
+	int i, j;
+
+	for (i = 0; i < set_count; i++)
+		if (set[i].stream == stream)
+			break;
+
+	if (i == set_count) {
+		dm_error("Stream %p not found in set!\n", stream);
+		return false;
+	}
+
+	for (j = 0; j < set[i].plane_count; j++)
+		if (!dc_state_add_plane(dc, stream, set[i].plane_states[j], state))
+			return false;
+
+	return true;
+}
+
 /**
  * dc_validate_with_context - Validate and update the potential new stream in the context object
  *
@@ -3918,7 +3704,8 @@ enum dc_status dc_validate_with_context(struct dc *dc,
 						       unchanged_streams[i],
 						       set,
 						       set_count)) {
-			if (!dc_rem_all_planes_for_stream(dc,
+
+			if (!dc_state_rem_all_planes_for_stream(dc,
 							  unchanged_streams[i],
 							  context)) {
 				res = DC_FAIL_DETACH_SURFACES;
@@ -3940,12 +3727,24 @@ enum dc_status dc_validate_with_context(struct dc *dc,
 			}
 		}
 
-		if (!dc_rem_all_planes_for_stream(dc, del_streams[i], context)) {
-			res = DC_FAIL_DETACH_SURFACES;
-			goto fail;
+		if (dc_state_get_stream_subvp_type(context, del_streams[i]) == SUBVP_PHANTOM) {
+			/* remove phantoms specifically */
+			if (!dc_state_rem_all_phantom_planes_for_stream(dc, del_streams[i], context, true)) {
+				res = DC_FAIL_DETACH_SURFACES;
+				goto fail;
+			}
+
+			res = dc_state_remove_phantom_stream(dc, context, del_streams[i]);
+			dc_state_release_phantom_stream(dc, context, del_streams[i]);
+		} else {
+			if (!dc_state_rem_all_planes_for_stream(dc, del_streams[i], context)) {
+				res = DC_FAIL_DETACH_SURFACES;
+				goto fail;
+			}
+
+			res = dc_state_remove_stream(dc, context, del_streams[i]);
 		}
 
-		res = dc_remove_stream_from_ctx(dc, context, del_streams[i]);
 		if (res != DC_OK)
 			goto fail;
 	}
@@ -3968,7 +3767,7 @@ enum dc_status dc_validate_with_context(struct dc *dc,
 	/* Add new streams and then add all planes for the new stream */
 	for (i = 0; i < add_streams_count; i++) {
 		calculate_phy_pix_clks(add_streams[i]);
-		res = dc_add_stream_to_ctx(dc, context, add_streams[i]);
+		res = dc_state_add_stream(dc, context, add_streams[i]);
 		if (res != DC_OK)
 			goto fail;
 
@@ -4474,84 +4273,6 @@ static void set_vtem_info_packet(
 	*info_packet = stream->vtem_infopacket;
 }
 
-void dc_resource_state_destruct(struct dc_state *context)
-{
-	int i, j;
-
-	for (i = 0; i < context->stream_count; i++) {
-		for (j = 0; j < context->stream_status[i].plane_count; j++)
-			dc_plane_state_release(
-				context->stream_status[i].plane_states[j]);
-
-		context->stream_status[i].plane_count = 0;
-		dc_stream_release(context->streams[i]);
-		context->streams[i] = NULL;
-	}
-	context->stream_count = 0;
-	context->stream_mask = 0;
-	memset(&context->res_ctx, 0, sizeof(context->res_ctx));
-	memset(&context->pp_display_cfg, 0, sizeof(context->pp_display_cfg));
-	memset(&context->dcn_bw_vars, 0, sizeof(context->dcn_bw_vars));
-	context->clk_mgr = NULL;
-	memset(&context->bw_ctx.bw, 0, sizeof(context->bw_ctx.bw));
-	memset(context->block_sequence, 0, sizeof(context->block_sequence));
-	context->block_sequence_steps = 0;
-	memset(context->dc_dmub_cmd, 0, sizeof(context->dc_dmub_cmd));
-	context->dmub_cmd_count = 0;
-	memset(&context->perf_params, 0, sizeof(context->perf_params));
-	memset(&context->scratch, 0, sizeof(context->scratch));
-}
-
-void dc_resource_state_copy_construct(
-		const struct dc_state *src_ctx,
-		struct dc_state *dst_ctx)
-{
-	int i, j;
-	struct kref refcount = dst_ctx->refcount;
-#ifdef CONFIG_DRM_AMD_DC_FP
-	struct dml2_context *dml2 = NULL;
-
-	// Need to preserve allocated dml2 context
-	if (src_ctx->clk_mgr->ctx->dc->debug.using_dml2)
-		dml2 = dst_ctx->bw_ctx.dml2;
-#endif
-
-	*dst_ctx = *src_ctx;
-
-#ifdef CONFIG_DRM_AMD_DC_FP
-	// Preserve allocated dml2 context
-	if (src_ctx->clk_mgr->ctx->dc->debug.using_dml2)
-		dst_ctx->bw_ctx.dml2 = dml2;
-#endif
-
-	for (i = 0; i < MAX_PIPES; i++) {
-		struct pipe_ctx *cur_pipe = &dst_ctx->res_ctx.pipe_ctx[i];
-
-		if (cur_pipe->top_pipe)
-			cur_pipe->top_pipe =  &dst_ctx->res_ctx.pipe_ctx[cur_pipe->top_pipe->pipe_idx];
-
-		if (cur_pipe->bottom_pipe)
-			cur_pipe->bottom_pipe = &dst_ctx->res_ctx.pipe_ctx[cur_pipe->bottom_pipe->pipe_idx];
-
-		if (cur_pipe->next_odm_pipe)
-			cur_pipe->next_odm_pipe =  &dst_ctx->res_ctx.pipe_ctx[cur_pipe->next_odm_pipe->pipe_idx];
-
-		if (cur_pipe->prev_odm_pipe)
-			cur_pipe->prev_odm_pipe = &dst_ctx->res_ctx.pipe_ctx[cur_pipe->prev_odm_pipe->pipe_idx];
-	}
-
-	for (i = 0; i < dst_ctx->stream_count; i++) {
-		dc_stream_retain(dst_ctx->streams[i]);
-		for (j = 0; j < dst_ctx->stream_status[i].plane_count; j++)
-			dc_plane_state_retain(
-				dst_ctx->stream_status[i].plane_states[j]);
-	}
-
-	/* context refcount should not be overridden */
-	dst_ctx->refcount = refcount;
-
-}
-
 struct clock_source *dc_resource_find_first_free_pll(
 		struct resource_context *res_ctx,
 		const struct resource_pool *pool)
@@ -4731,7 +4452,7 @@ void resource_build_bit_depth_reduction_params(struct dc_stream_state *stream,
 			option = DITHER_OPTION_SPATIAL8;
 			break;
 		case COLOR_DEPTH_101010:
-			option = DITHER_OPTION_SPATIAL10;
+			option = DITHER_OPTION_TRUN10;
 			break;
 		default:
 			option = DITHER_OPTION_DISABLE;
@@ -4757,6 +4478,8 @@ void resource_build_bit_depth_reduction_params(struct dc_stream_state *stream,
 			option == DITHER_OPTION_TRUN10_SPATIAL8_FM6) {
 		fmt_bit_depth->flags.TRUNCATE_ENABLED = 1;
 		fmt_bit_depth->flags.TRUNCATE_DEPTH = 2;
+		if (option == DITHER_OPTION_TRUN10)
+			fmt_bit_depth->flags.TRUNCATE_MODE = 1;
 	}
 
 	/* special case - Formatter can only reduce by 4 bits at most.
@@ -5274,7 +4997,7 @@ bool check_subvp_sw_cursor_fallback_req(const struct dc *dc, struct dc_stream_st
 	if (dc->current_state->stream_count == 1 && stream->timing.v_addressable >= 2880 &&
 			((stream->timing.pix_clk_100hz * 100) / stream->timing.v_total / stream->timing.h_total) < 120)
 		return true;
-	else if (dc->current_state->stream_count > 1 && stream->timing.v_addressable >= 2160 &&
+	else if (dc->current_state->stream_count > 1 && stream->timing.v_addressable >= 1080 &&
 			((stream->timing.pix_clk_100hz * 100) / stream->timing.v_total / stream->timing.h_total) < 120)
 		return true;
 
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
new file mode 100644
index 000000000000..180ac47868c2
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
@@ -0,0 +1,870 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+#include "core_types.h"
+#include "core_status.h"
+#include "dc_state.h"
+#include "dc_state_priv.h"
+#include "dc_stream_priv.h"
+#include "dc_plane_priv.h"
+
+#include "dm_services.h"
+#include "resource.h"
+#include "link_enc_cfg.h"
+
+#include "dml2/dml2_wrapper.h"
+#include "dml2/dml2_internal_types.h"
+
+#define DC_LOGGER \
+	dc->ctx->logger
+#define DC_LOGGER_INIT(logger)
+
+/* Private dc_state helper functions */
+static bool dc_state_track_phantom_stream(struct dc_state *state,
+		struct dc_stream_state *phantom_stream)
+{
+	if (state->phantom_stream_count >= MAX_PHANTOM_PIPES)
+		return false;
+
+	state->phantom_streams[state->phantom_stream_count++] = phantom_stream;
+
+	return true;
+}
+
+static bool dc_state_untrack_phantom_stream(struct dc_state *state, struct dc_stream_state *phantom_stream)
+{
+	bool res = false;
+	int i;
+
+	/* first find phantom stream in the dc_state */
+	for (i = 0; i < state->phantom_stream_count; i++) {
+		if (state->phantom_streams[i] == phantom_stream) {
+			state->phantom_streams[i] = NULL;
+			res = true;
+			break;
+		}
+	}
+
+	/* failed to find stream in state */
+	if (!res)
+		return res;
+
+	/* trim back phantom streams */
+	state->phantom_stream_count--;
+	for (; i < state->phantom_stream_count; i++)
+		state->phantom_streams[i] = state->phantom_streams[i + 1];
+
+	return res;
+}
+
+static bool dc_state_is_phantom_stream_tracked(struct dc_state *state, struct dc_stream_state *phantom_stream)
+{
+	int i;
+
+	for (i = 0; i < state->phantom_stream_count; i++) {
+		if (state->phantom_streams[i] == phantom_stream)
+			return true;
+	}
+
+	return false;
+}
+
+static bool dc_state_track_phantom_plane(struct dc_state *state,
+		struct dc_plane_state *phantom_plane)
+{
+	if (state->phantom_plane_count >= MAX_PHANTOM_PIPES)
+		return false;
+
+	state->phantom_planes[state->phantom_plane_count++] = phantom_plane;
+
+	return true;
+}
+
+static bool dc_state_untrack_phantom_plane(struct dc_state *state, struct dc_plane_state *phantom_plane)
+{
+	bool res = false;
+	int i;
+
+	/* first find phantom plane in the dc_state */
+	for (i = 0; i < state->phantom_plane_count; i++) {
+		if (state->phantom_planes[i] == phantom_plane) {
+			state->phantom_planes[i] = NULL;
+			res = true;
+			break;
+		}
+	}
+
+	/* failed to find plane in state */
+	if (!res)
+		return res;
+
+	/* trim back phantom planes */
+	state->phantom_plane_count--;
+	for (; i < state->phantom_plane_count; i++)
+		state->phantom_planes[i] = state->phantom_planes[i + 1];
+
+	return res;
+}
+
+static bool dc_state_is_phantom_plane_tracked(struct dc_state *state, struct dc_plane_state *phantom_plane)
+{
+	int i;
+
+	for (i = 0; i < state->phantom_plane_count; i++) {
+		if (state->phantom_planes[i] == phantom_plane)
+			return true;
+	}
+
+	return false;
+}
+
+static void dc_state_copy_internal(struct dc_state *dst_state, struct dc_state *src_state)
+{
+	int i, j;
+
+	memcpy(dst_state, src_state, sizeof(struct dc_state));
+
+	for (i = 0; i < MAX_PIPES; i++) {
+		struct pipe_ctx *cur_pipe = &dst_state->res_ctx.pipe_ctx[i];
+
+		if (cur_pipe->top_pipe)
+			cur_pipe->top_pipe =  &dst_state->res_ctx.pipe_ctx[cur_pipe->top_pipe->pipe_idx];
+
+		if (cur_pipe->bottom_pipe)
+			cur_pipe->bottom_pipe = &dst_state->res_ctx.pipe_ctx[cur_pipe->bottom_pipe->pipe_idx];
+
+		if (cur_pipe->prev_odm_pipe)
+			cur_pipe->prev_odm_pipe =  &dst_state->res_ctx.pipe_ctx[cur_pipe->prev_odm_pipe->pipe_idx];
+
+		if (cur_pipe->next_odm_pipe)
+			cur_pipe->next_odm_pipe = &dst_state->res_ctx.pipe_ctx[cur_pipe->next_odm_pipe->pipe_idx];
+	}
+
+	/* retain phantoms */
+	for (i = 0; i < dst_state->phantom_stream_count; i++)
+		dc_stream_retain(dst_state->phantom_streams[i]);
+
+	for (i = 0; i < dst_state->phantom_plane_count; i++)
+		dc_plane_state_retain(dst_state->phantom_planes[i]);
+
+	/* retain streams and planes */
+	for (i = 0; i < dst_state->stream_count; i++) {
+		dc_stream_retain(dst_state->streams[i]);
+		for (j = 0; j < dst_state->stream_status[i].plane_count; j++)
+			dc_plane_state_retain(
+					dst_state->stream_status[i].plane_states[j]);
+	}
+
+}
+
+static void init_state(struct dc *dc, struct dc_state *state)
+{
+	/* Each context must have their own instance of VBA and in order to
+	 * initialize and obtain IP and SOC the base DML instance from DC is
+	 * initially copied into every context
+	 */
+	memcpy(&state->bw_ctx.dml, &dc->dml, sizeof(struct display_mode_lib));
+}
+
+/* Public dc_state functions */
+struct dc_state *dc_state_create(struct dc *dc)
+{
+	struct dc_state *state = kvzalloc(sizeof(struct dc_state),
+			GFP_KERNEL);
+
+	if (!state)
+		return NULL;
+
+	init_state(dc, state);
+	dc_state_construct(dc, state);
+
+#ifdef CONFIG_DRM_AMD_DC_FP
+	if (dc->debug.using_dml2)
+		dml2_create(dc, &dc->dml2_options, &state->bw_ctx.dml2);
+#endif
+
+	kref_init(&state->refcount);
+
+	return state;
+}
+
+void dc_state_copy(struct dc_state *dst_state, struct dc_state *src_state)
+{
+	struct kref refcount = dst_state->refcount;
+#ifdef CONFIG_DRM_AMD_DC_FP
+	struct dml2_context *dst_dml2 = dst_state->bw_ctx.dml2;
+#endif
+
+	dc_state_copy_internal(dst_state, src_state);
+
+#ifdef CONFIG_DRM_AMD_DC_FP
+	dst_state->bw_ctx.dml2 = dst_dml2;
+	if (src_state->bw_ctx.dml2)
+		dml2_copy(dst_state->bw_ctx.dml2, src_state->bw_ctx.dml2);
+#endif
+
+	/* context refcount should not be overridden */
+	dst_state->refcount = refcount;
+}
+
+struct dc_state *dc_state_create_copy(struct dc_state *src_state)
+{
+	struct dc_state *new_state;
+
+	new_state = kvmalloc(sizeof(struct dc_state),
+			GFP_KERNEL);
+	if (!new_state)
+		return NULL;
+
+	dc_state_copy_internal(new_state, src_state);
+
+#ifdef CONFIG_DRM_AMD_DC_FP
+	if (src_state->bw_ctx.dml2 &&
+			!dml2_create_copy(&new_state->bw_ctx.dml2, src_state->bw_ctx.dml2)) {
+		dc_state_release(new_state);
+		return NULL;
+	}
+#endif
+
+	kref_init(&new_state->refcount);
+
+	return new_state;
+}
+
+void dc_state_copy_current(struct dc *dc, struct dc_state *dst_state)
+{
+	dc_state_copy(dst_state, dc->current_state);
+}
+
+struct dc_state *dc_state_create_current_copy(struct dc *dc)
+{
+	return dc_state_create_copy(dc->current_state);
+}
+
+void dc_state_construct(struct dc *dc, struct dc_state *state)
+{
+	state->clk_mgr = dc->clk_mgr;
+
+	/* Initialise DIG link encoder resource tracking variables. */
+	if (dc->res_pool)
+		link_enc_cfg_init(dc, state);
+}
+
+void dc_state_destruct(struct dc_state *state)
+{
+	int i, j;
+
+	for (i = 0; i < state->stream_count; i++) {
+		for (j = 0; j < state->stream_status[i].plane_count; j++)
+			dc_plane_state_release(
+					state->stream_status[i].plane_states[j]);
+
+		state->stream_status[i].plane_count = 0;
+		dc_stream_release(state->streams[i]);
+		state->streams[i] = NULL;
+	}
+	state->stream_count = 0;
+
+	/* release tracked phantoms */
+	for (i = 0; i < state->phantom_stream_count; i++) {
+		dc_stream_release(state->phantom_streams[i]);
+		state->phantom_streams[i] = NULL;
+	}
+	state->phantom_stream_count = 0;
+
+	for (i = 0; i < state->phantom_plane_count; i++) {
+		dc_plane_state_release(state->phantom_planes[i]);
+		state->phantom_planes[i] = NULL;
+	}
+	state->phantom_plane_count = 0;
+
+	state->stream_mask = 0;
+	memset(&state->res_ctx, 0, sizeof(state->res_ctx));
+	memset(&state->pp_display_cfg, 0, sizeof(state->pp_display_cfg));
+	memset(&state->dcn_bw_vars, 0, sizeof(state->dcn_bw_vars));
+	state->clk_mgr = NULL;
+	memset(&state->bw_ctx.bw, 0, sizeof(state->bw_ctx.bw));
+	memset(state->block_sequence, 0, sizeof(state->block_sequence));
+	state->block_sequence_steps = 0;
+	memset(state->dc_dmub_cmd, 0, sizeof(state->dc_dmub_cmd));
+	state->dmub_cmd_count = 0;
+	memset(&state->perf_params, 0, sizeof(state->perf_params));
+	memset(&state->scratch, 0, sizeof(state->scratch));
+}
+
+void dc_state_retain(struct dc_state *state)
+{
+	kref_get(&state->refcount);
+}
+
+static void dc_state_free(struct kref *kref)
+{
+	struct dc_state *state = container_of(kref, struct dc_state, refcount);
+
+	dc_state_destruct(state);
+
+#ifdef CONFIG_DRM_AMD_DC_FP
+	dml2_destroy(state->bw_ctx.dml2);
+	state->bw_ctx.dml2 = 0;
+#endif
+
+	kvfree(state);
+}
+
+void dc_state_release(struct dc_state *state)
+{
+	kref_put(&state->refcount, dc_state_free);
+}
+/*
+ * dc_state_add_stream() - Add a new dc_stream_state to a dc_state.
+ */
+enum dc_status dc_state_add_stream(
+		struct dc *dc,
+		struct dc_state *state,
+		struct dc_stream_state *stream)
+{
+	enum dc_status res;
+
+	DC_LOGGER_INIT(dc->ctx->logger);
+
+	if (state->stream_count >= dc->res_pool->timing_generator_count) {
+		DC_LOG_WARNING("Max streams reached, can't add stream %p !\n", stream);
+		return DC_ERROR_UNEXPECTED;
+	}
+
+	state->streams[state->stream_count] = stream;
+	dc_stream_retain(stream);
+	state->stream_count++;
+
+	res = resource_add_otg_master_for_stream_output(
+			state, dc->res_pool, stream);
+	if (res != DC_OK)
+		DC_LOG_WARNING("Adding stream %p to context failed with err %d!\n", stream, res);
+
+	return res;
+}
+
+/*
+ * dc_state_remove_stream() - Remove a stream from a dc_state.
+ */
+enum dc_status dc_state_remove_stream(
+		struct dc *dc,
+		struct dc_state *state,
+		struct dc_stream_state *stream)
+{
+	int i;
+	struct pipe_ctx *del_pipe = resource_get_otg_master_for_stream(
+			&state->res_ctx, stream);
+
+	if (!del_pipe) {
+		dm_error("Pipe not found for stream %p !\n", stream);
+		return DC_ERROR_UNEXPECTED;
+	}
+
+	resource_update_pipes_for_stream_with_slice_count(state,
+			dc->current_state, dc->res_pool, stream, 1);
+	resource_remove_otg_master_for_stream_output(
+			state, dc->res_pool, stream);
+
+	for (i = 0; i < state->stream_count; i++)
+		if (state->streams[i] == stream)
+			break;
+
+	if (state->streams[i] != stream) {
+		dm_error("Context doesn't have stream %p !\n", stream);
+		return DC_ERROR_UNEXPECTED;
+	}
+
+	dc_stream_release(state->streams[i]);
+	state->stream_count--;
+
+	/* Trim back arrays */
+	for (; i < state->stream_count; i++) {
+		state->streams[i] = state->streams[i + 1];
+		state->stream_status[i] = state->stream_status[i + 1];
+	}
+
+	state->streams[state->stream_count] = NULL;
+	memset(
+			&state->stream_status[state->stream_count],
+			0,
+			sizeof(state->stream_status[0]));
+
+	return DC_OK;
+}
+
+bool dc_state_add_plane(
+		const struct dc *dc,
+		struct dc_stream_state *stream,
+		struct dc_plane_state *plane_state,
+		struct dc_state *state)
+{
+	struct resource_pool *pool = dc->res_pool;
+	struct pipe_ctx *otg_master_pipe;
+	struct dc_stream_status *stream_status = NULL;
+	bool added = false;
+
+	stream_status = dc_state_get_stream_status(state, stream);
+	if (stream_status == NULL) {
+		dm_error("Existing stream not found; failed to attach surface!\n");
+		goto out;
+	} else if (stream_status->plane_count == MAX_SURFACE_NUM) {
+		dm_error("Surface: can not attach plane_state %p! Maximum is: %d\n",
+				plane_state, MAX_SURFACE_NUM);
+		goto out;
+	}
+
+	otg_master_pipe = resource_get_otg_master_for_stream(
+			&state->res_ctx, stream);
+	if (otg_master_pipe)
+		added = resource_append_dpp_pipes_for_plane_composition(state,
+				dc->current_state, pool, otg_master_pipe, plane_state);
+
+	if (added) {
+		stream_status->plane_states[stream_status->plane_count] =
+				plane_state;
+		stream_status->plane_count++;
+		dc_plane_state_retain(plane_state);
+	}
+
+out:
+	return added;
+}
+
+bool dc_state_remove_plane(
+		const struct dc *dc,
+		struct dc_stream_state *stream,
+		struct dc_plane_state *plane_state,
+		struct dc_state *state)
+{
+	int i;
+	struct dc_stream_status *stream_status = NULL;
+	struct resource_pool *pool = dc->res_pool;
+
+	if (!plane_state)
+		return true;
+
+	for (i = 0; i < state->stream_count; i++)
+		if (state->streams[i] == stream) {
+			stream_status = &state->stream_status[i];
+			break;
+		}
+
+	if (stream_status == NULL) {
+		dm_error("Existing stream not found; failed to remove plane.\n");
+		return false;
+	}
+
+	resource_remove_dpp_pipes_for_plane_composition(
+			state, pool, plane_state);
+
+	for (i = 0; i < stream_status->plane_count; i++) {
+		if (stream_status->plane_states[i] == plane_state) {
+			dc_plane_state_release(stream_status->plane_states[i]);
+			break;
+		}
+	}
+
+	if (i == stream_status->plane_count) {
+		dm_error("Existing plane_state not found; failed to detach it!\n");
+		return false;
+	}
+
+	stream_status->plane_count--;
+
+	/* Start at the plane we've just released, and move all the planes one index forward to "trim" the array */
+	for (; i < stream_status->plane_count; i++)
+		stream_status->plane_states[i] = stream_status->plane_states[i + 1];
+
+	stream_status->plane_states[stream_status->plane_count] = NULL;
+
+	if (stream_status->plane_count == 0 && dc->config.enable_windowed_mpo_odm)
+		/* ODM combine could prevent us from supporting more planes
+		 * we will reset ODM slice count back to 1 when all planes have
+		 * been removed to maximize the amount of planes supported when
+		 * new planes are added.
+		 */
+		resource_update_pipes_for_stream_with_slice_count(
+				state, dc->current_state, dc->res_pool, stream, 1);
+
+	return true;
+}
+
+/**
+ * dc_state_rem_all_planes_for_stream - Remove planes attached to the target stream.
+ *
+ * @dc: Current dc state.
+ * @stream: Target stream, which we want to remove the attached plans.
+ * @state: context from which the planes are to be removed.
+ *
+ * Return:
+ * Return true if DC was able to remove all planes from the target
+ * stream, otherwise, return false.
+ */
+bool dc_state_rem_all_planes_for_stream(
+		const struct dc *dc,
+		struct dc_stream_state *stream,
+		struct dc_state *state)
+{
+	int i, old_plane_count;
+	struct dc_stream_status *stream_status = NULL;
+	struct dc_plane_state *del_planes[MAX_SURFACE_NUM] = { 0 };
+
+	for (i = 0; i < state->stream_count; i++)
+		if (state->streams[i] == stream) {
+			stream_status = &state->stream_status[i];
+			break;
+		}
+
+	if (stream_status == NULL) {
+		dm_error("Existing stream %p not found!\n", stream);
+		return false;
+	}
+
+	old_plane_count = stream_status->plane_count;
+
+	for (i = 0; i < old_plane_count; i++)
+		del_planes[i] = stream_status->plane_states[i];
+
+	for (i = 0; i < old_plane_count; i++)
+		if (!dc_state_remove_plane(dc, stream, del_planes[i], state))
+			return false;
+
+	return true;
+}
+
+bool dc_state_add_all_planes_for_stream(
+		const struct dc *dc,
+		struct dc_stream_state *stream,
+		struct dc_plane_state * const *plane_states,
+		int plane_count,
+		struct dc_state *state)
+{
+	int i;
+	bool result = true;
+
+	for (i = 0; i < plane_count; i++)
+		if (!dc_state_add_plane(dc, stream, plane_states[i], state)) {
+			result = false;
+			break;
+		}
+
+	return result;
+}
+
+/* Private dc_state functions */
+
+/**
+ * dc_state_get_stream_status - Get stream status from given dc state
+ * @state: DC state to find the stream status in
+ * @stream: The stream to get the stream status for
+ *
+ * The given stream is expected to exist in the given dc state. Otherwise, NULL
+ * will be returned.
+ */
+struct dc_stream_status *dc_state_get_stream_status(
+		struct dc_state *state,
+		struct dc_stream_state *stream)
+{
+	uint8_t i;
+
+	if (state == NULL)
+		return NULL;
+
+	for (i = 0; i < state->stream_count; i++) {
+		if (stream == state->streams[i])
+			return &state->stream_status[i];
+	}
+
+	return NULL;
+}
+
+enum mall_stream_type dc_state_get_pipe_subvp_type(const struct dc_state *state,
+		const struct pipe_ctx *pipe_ctx)
+{
+	return dc_state_get_stream_subvp_type(state, pipe_ctx->stream);
+}
+
+enum mall_stream_type dc_state_get_stream_subvp_type(const struct dc_state *state,
+		const struct dc_stream_state *stream)
+{
+	int i;
+
+	enum mall_stream_type type = SUBVP_NONE;
+
+	for (i = 0; i < state->stream_count; i++) {
+		if (state->streams[i] == stream) {
+			type = state->stream_status[i].mall_stream_config.type;
+			break;
+		}
+	}
+
+	return type;
+}
+
+struct dc_stream_state *dc_state_get_paired_subvp_stream(const struct dc_state *state,
+		const struct dc_stream_state *stream)
+{
+	int i;
+
+	struct dc_stream_state *paired_stream = NULL;
+
+	for (i = 0; i < state->stream_count; i++) {
+		if (state->streams[i] == stream) {
+			paired_stream = state->stream_status[i].mall_stream_config.paired_stream;
+			break;
+		}
+	}
+
+	return paired_stream;
+}
+
+struct dc_stream_state *dc_state_create_phantom_stream(const struct dc *dc,
+		struct dc_state *state,
+		struct dc_stream_state *main_stream)
+{
+	struct dc_stream_state *phantom_stream;
+
+	DC_LOGGER_INIT(dc->ctx->logger);
+
+	phantom_stream = dc_create_stream_for_sink(main_stream->sink);
+
+	if (!phantom_stream) {
+		DC_LOG_ERROR("Failed to allocate phantom stream.\n");
+		return NULL;
+	}
+
+	/* track phantom stream in dc_state */
+	dc_state_track_phantom_stream(state, phantom_stream);
+
+	phantom_stream->is_phantom = true;
+	phantom_stream->signal = SIGNAL_TYPE_VIRTUAL;
+	phantom_stream->dpms_off = true;
+
+	return phantom_stream;
+}
+
+void dc_state_release_phantom_stream(const struct dc *dc,
+		struct dc_state *state,
+		struct dc_stream_state *phantom_stream)
+{
+	DC_LOGGER_INIT(dc->ctx->logger);
+
+	if (!dc_state_untrack_phantom_stream(state, phantom_stream)) {
+		DC_LOG_ERROR("Failed to free phantom stream %p in dc state %p.\n", phantom_stream, state);
+		return;
+	}
+
+	dc_stream_release(phantom_stream);
+}
+
+struct dc_plane_state *dc_state_create_phantom_plane(struct dc *dc,
+		struct dc_state *state,
+		struct dc_plane_state *main_plane)
+{
+	struct dc_plane_state *phantom_plane = dc_create_plane_state(dc);
+
+	DC_LOGGER_INIT(dc->ctx->logger);
+
+	if (!phantom_plane) {
+		DC_LOG_ERROR("Failed to allocate phantom plane.\n");
+		return NULL;
+	}
+
+	/* track phantom inside dc_state */
+	dc_state_track_phantom_plane(state, phantom_plane);
+
+	phantom_plane->is_phantom = true;
+
+	return phantom_plane;
+}
+
+void dc_state_release_phantom_plane(const struct dc *dc,
+		struct dc_state *state,
+		struct dc_plane_state *phantom_plane)
+{
+	DC_LOGGER_INIT(dc->ctx->logger);
+
+	if (!dc_state_untrack_phantom_plane(state, phantom_plane)) {
+		DC_LOG_ERROR("Failed to free phantom plane %p in dc state %p.\n", phantom_plane, state);
+		return;
+	}
+
+	dc_plane_state_release(phantom_plane);
+}
+
+/* add phantom streams to context and generate correct meta inside dc_state */
+enum dc_status dc_state_add_phantom_stream(struct dc *dc,
+		struct dc_state *state,
+		struct dc_stream_state *phantom_stream,
+		struct dc_stream_state *main_stream)
+{
+	struct dc_stream_status *main_stream_status;
+	struct dc_stream_status *phantom_stream_status;
+	enum dc_status res = dc_state_add_stream(dc, state, phantom_stream);
+
+	/* check if stream is tracked */
+	if (res == DC_OK && !dc_state_is_phantom_stream_tracked(state, phantom_stream)) {
+		/* stream must be tracked if added to state */
+		dc_state_track_phantom_stream(state, phantom_stream);
+	}
+
+	/* setup subvp meta */
+	main_stream_status = dc_state_get_stream_status(state, main_stream);
+	phantom_stream_status = dc_state_get_stream_status(state, phantom_stream);
+	phantom_stream_status->mall_stream_config.type = SUBVP_PHANTOM;
+	phantom_stream_status->mall_stream_config.paired_stream = main_stream;
+	main_stream_status->mall_stream_config.type = SUBVP_MAIN;
+	main_stream_status->mall_stream_config.paired_stream = phantom_stream;
+
+	return res;
+}
+
+enum dc_status dc_state_remove_phantom_stream(struct dc *dc,
+		struct dc_state *state,
+		struct dc_stream_state *phantom_stream)
+{
+	struct dc_stream_status *main_stream_status;
+	struct dc_stream_status *phantom_stream_status;
+
+	/* reset subvp meta */
+	phantom_stream_status = dc_state_get_stream_status(state, phantom_stream);
+	main_stream_status = dc_state_get_stream_status(state, phantom_stream_status->mall_stream_config.paired_stream);
+	phantom_stream_status->mall_stream_config.type = SUBVP_NONE;
+	phantom_stream_status->mall_stream_config.paired_stream = NULL;
+	if (main_stream_status) {
+		main_stream_status->mall_stream_config.type = SUBVP_NONE;
+		main_stream_status->mall_stream_config.paired_stream = NULL;
+	}
+
+	/* remove stream from state */
+	return dc_state_remove_stream(dc, state, phantom_stream);
+}
+
+bool dc_state_add_phantom_plane(
+		const struct dc *dc,
+		struct dc_stream_state *phantom_stream,
+		struct dc_plane_state *phantom_plane,
+		struct dc_state *state)
+{
+	bool res = dc_state_add_plane(dc, phantom_stream, phantom_plane, state);
+
+	/* check if stream is tracked */
+	if (res && !dc_state_is_phantom_plane_tracked(state, phantom_plane)) {
+		/* stream must be tracked if added to state */
+		dc_state_track_phantom_plane(state, phantom_plane);
+	}
+
+	return res;
+}
+
+bool dc_state_remove_phantom_plane(
+		const struct dc *dc,
+		struct dc_stream_state *phantom_stream,
+		struct dc_plane_state *phantom_plane,
+		struct dc_state *state)
+{
+	return dc_state_remove_plane(dc, phantom_stream, phantom_plane, state);
+}
+
+bool dc_state_rem_all_phantom_planes_for_stream(
+		const struct dc *dc,
+		struct dc_stream_state *phantom_stream,
+		struct dc_state *state,
+		bool should_release_planes)
+{
+	int i, old_plane_count;
+	struct dc_stream_status *stream_status = NULL;
+	struct dc_plane_state *del_planes[MAX_SURFACE_NUM] = { 0 };
+
+	for (i = 0; i < state->stream_count; i++)
+		if (state->streams[i] == phantom_stream) {
+			stream_status = &state->stream_status[i];
+			break;
+		}
+
+	if (stream_status == NULL) {
+		dm_error("Existing stream %p not found!\n", phantom_stream);
+		return false;
+	}
+
+	old_plane_count = stream_status->plane_count;
+
+	for (i = 0; i < old_plane_count; i++)
+		del_planes[i] = stream_status->plane_states[i];
+
+	for (i = 0; i < old_plane_count; i++) {
+		if (!dc_state_remove_plane(dc, phantom_stream, del_planes[i], state))
+			return false;
+		if (should_release_planes)
+			dc_state_release_phantom_plane(dc, state, del_planes[i]);
+	}
+
+	return true;
+}
+
+bool dc_state_add_all_phantom_planes_for_stream(
+		const struct dc *dc,
+		struct dc_stream_state *phantom_stream,
+		struct dc_plane_state * const *phantom_planes,
+		int plane_count,
+		struct dc_state *state)
+{
+	return dc_state_add_all_planes_for_stream(dc, phantom_stream, phantom_planes, plane_count, state);
+}
+
+bool dc_state_remove_phantom_streams_and_planes(
+	struct dc *dc,
+	struct dc_state *state)
+{
+	int i;
+	bool removed_phantom = false;
+	struct dc_stream_state *phantom_stream = NULL;
+
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i];
+
+		if (pipe->plane_state && pipe->stream && dc_state_get_pipe_subvp_type(state, pipe) == SUBVP_PHANTOM) {
+			phantom_stream = pipe->stream;
+
+			dc_state_rem_all_phantom_planes_for_stream(dc, phantom_stream, state, false);
+			dc_state_remove_phantom_stream(dc, state, phantom_stream);
+			removed_phantom = true;
+		}
+	}
+	return removed_phantom;
+}
+
+void dc_state_release_phantom_streams_and_planes(
+		struct dc *dc,
+		struct dc_state *state)
+{
+	int i;
+
+	for (i = 0; i < state->phantom_stream_count; i++)
+		dc_state_release_phantom_stream(dc, state, state->phantom_streams[i]);
+
+	for (i = 0; i < state->phantom_plane_count; i++)
+		dc_state_release_phantom_plane(dc, state, state->phantom_planes[i]);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
index 4bdf105d1d71..54670e0b1518 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
@@ -31,6 +31,8 @@
 #include "ipp.h"
 #include "timing_generator.h"
 #include "dc_dmub_srv.h"
+#include "dc_state_priv.h"
+#include "dc_stream_priv.h"
 
 #define DC_LOGGER dc->ctx->logger
 
@@ -54,7 +56,7 @@ void update_stream_signal(struct dc_stream_state *stream, struct dc_sink *sink)
 	}
 }
 
-static bool dc_stream_construct(struct dc_stream_state *stream,
+bool dc_stream_construct(struct dc_stream_state *stream,
 	struct dc_sink *dc_sink_data)
 {
 	uint32_t i = 0;
@@ -121,13 +123,12 @@ static bool dc_stream_construct(struct dc_stream_state *stream,
 	}
 	stream->out_transfer_func->type = TF_TYPE_BYPASS;
 
-	stream->stream_id = stream->ctx->dc_stream_id_count;
-	stream->ctx->dc_stream_id_count++;
+	dc_stream_assign_stream_id(stream);
 
 	return true;
 }
 
-static void dc_stream_destruct(struct dc_stream_state *stream)
+void dc_stream_destruct(struct dc_stream_state *stream)
 {
 	dc_sink_release(stream->sink);
 	if (stream->out_transfer_func != NULL) {
@@ -136,6 +137,13 @@ static void dc_stream_destruct(struct dc_stream_state *stream)
 	}
 }
 
+void dc_stream_assign_stream_id(struct dc_stream_state *stream)
+{
+	/* MSB is reserved to indicate phantoms */
+	stream->stream_id = stream->ctx->dc_stream_id_count;
+	stream->ctx->dc_stream_id_count++;
+}
+
 void dc_stream_retain(struct dc_stream_state *stream)
 {
 	kref_get(&stream->refcount);
@@ -196,8 +204,7 @@ struct dc_stream_state *dc_copy_stream(const struct dc_stream_state *stream)
 	if (new_stream->out_transfer_func)
 		dc_transfer_func_retain(new_stream->out_transfer_func);
 
-	new_stream->stream_id = new_stream->ctx->dc_stream_id_count;
-	new_stream->ctx->dc_stream_id_count++;
+	dc_stream_assign_stream_id(new_stream);
 
 	/* If using dynamic encoder assignment, wait till stream committed to assign encoder. */
 	if (new_stream->ctx->dc->res_pool->funcs->link_encs_assign)
@@ -209,31 +216,6 @@ struct dc_stream_state *dc_copy_stream(const struct dc_stream_state *stream)
 }
 
 /**
- * dc_stream_get_status_from_state - Get stream status from given dc state
- * @state: DC state to find the stream status in
- * @stream: The stream to get the stream status for
- *
- * The given stream is expected to exist in the given dc state. Otherwise, NULL
- * will be returned.
- */
-struct dc_stream_status *dc_stream_get_status_from_state(
-	struct dc_state *state,
-	struct dc_stream_state *stream)
-{
-	uint8_t i;
-
-	if (state == NULL)
-		return NULL;
-
-	for (i = 0; i < state->stream_count; i++) {
-		if (stream == state->streams[i])
-			return &state->stream_status[i];
-	}
-
-	return NULL;
-}
-
-/**
  * dc_stream_get_status() - Get current stream status of the given stream state
  * @stream: The stream to get the stream status for.
  *
@@ -244,7 +226,7 @@ struct dc_stream_status *dc_stream_get_status(
 	struct dc_stream_state *stream)
 {
 	struct dc *dc = stream->ctx->dc;
-	return dc_stream_get_status_from_state(dc->current_state, stream);
+	return dc_state_get_stream_status(dc->current_state, stream);
 }
 
 static void program_cursor_attributes(
@@ -465,16 +447,37 @@ bool dc_stream_add_writeback(struct dc *dc,
 	if (dc->hwss.enable_writeback) {
 		struct dc_stream_status *stream_status = dc_stream_get_status(stream);
 		struct dwbc *dwb = dc->res_pool->dwbc[wb_info->dwb_pipe_inst];
-		dwb->otg_inst = stream_status->primary_otg_inst;
+		if (stream_status)
+			dwb->otg_inst = stream_status->primary_otg_inst;
 	}
+
+	if (!dc->hwss.update_bandwidth(dc, dc->current_state)) {
+		dm_error("DC: update_bandwidth failed!\n");
+		return false;
+	}
+
+	/* enable writeback */
+	if (dc->hwss.enable_writeback) {
+		struct dwbc *dwb = dc->res_pool->dwbc[wb_info->dwb_pipe_inst];
+
+		if (dwb->funcs->is_enabled(dwb)) {
+			/* writeback pipe already enabled, only need to update */
+			dc->hwss.update_writeback(dc, wb_info, dc->current_state);
+		} else {
+			/* Enable writeback pipe from scratch*/
+			dc->hwss.enable_writeback(dc, wb_info, dc->current_state);
+		}
+	}
+
 	return true;
 }
 
-bool dc_stream_remove_writeback(struct dc *dc,
+bool dc_stream_fc_disable_writeback(struct dc *dc,
 		struct dc_stream_state *stream,
 		uint32_t dwb_pipe_inst)
 {
-	int i = 0, j = 0;
+	struct dwbc *dwb = dc->res_pool->dwbc[dwb_pipe_inst];
+
 	if (stream == NULL) {
 		dm_error("DC: dc_stream is NULL!\n");
 		return false;
@@ -490,27 +493,63 @@ bool dc_stream_remove_writeback(struct dc *dc,
 		return false;
 	}
 
-//	stream->writeback_info[dwb_pipe_inst].wb_enabled = false;
-	for (i = 0; i < stream->num_wb_info; i++) {
-		/*dynamic update*/
-		if (stream->writeback_info[i].wb_enabled &&
-			stream->writeback_info[i].dwb_pipe_inst == dwb_pipe_inst) {
-			stream->writeback_info[i].wb_enabled = false;
-		}
+	if (dwb->funcs->set_fc_enable)
+		dwb->funcs->set_fc_enable(dwb, DWB_FRAME_CAPTURE_DISABLE);
+
+	return true;
+}
+
+bool dc_stream_remove_writeback(struct dc *dc,
+		struct dc_stream_state *stream,
+		uint32_t dwb_pipe_inst)
+{
+	int i = 0, j = 0;
+	if (stream == NULL) {
+		dm_error("DC: dc_stream is NULL!\n");
+		return false;
+	}
+
+	if (dwb_pipe_inst >= MAX_DWB_PIPES) {
+		dm_error("DC: writeback pipe is invalid!\n");
+		return false;
+	}
+
+	if (stream->num_wb_info > MAX_DWB_PIPES) {
+		dm_error("DC: num_wb_info is invalid!\n");
+		return false;
 	}
 
 	/* remove writeback info for disabled writeback pipes from stream */
 	for (i = 0, j = 0; i < stream->num_wb_info; i++) {
 		if (stream->writeback_info[i].wb_enabled) {
-			if (j < i)
-				/* trim the array */
+
+			if (stream->writeback_info[i].dwb_pipe_inst == dwb_pipe_inst)
+				stream->writeback_info[i].wb_enabled = false;
+
+			/* trim the array */
+			if (j < i) {
 				memcpy(&stream->writeback_info[j], &stream->writeback_info[i],
 						sizeof(struct dc_writeback_info));
-			j++;
+				j++;
+			}
 		}
 	}
 	stream->num_wb_info = j;
 
+	/* recalculate and apply DML parameters */
+	if (!dc->hwss.update_bandwidth(dc, dc->current_state)) {
+		dm_error("DC: update_bandwidth failed!\n");
+		return false;
+	}
+
+	/* disable writeback */
+	if (dc->hwss.disable_writeback) {
+		struct dwbc *dwb = dc->res_pool->dwbc[dwb_pipe_inst];
+
+		if (dwb->funcs->is_enabled(dwb))
+			dc->hwss.disable_writeback(dc, dwb_pipe_inst);
+	}
+
 	return true;
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c
index a80e45300783..19a2c7140ae8 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c
@@ -32,10 +32,12 @@
 #include "transform.h"
 #include "dpp.h"
 
+#include "dc_plane_priv.h"
+
 /*******************************************************************************
  * Private functions
  ******************************************************************************/
-static void dc_plane_construct(struct dc_context *ctx, struct dc_plane_state *plane_state)
+void dc_plane_construct(struct dc_context *ctx, struct dc_plane_state *plane_state)
 {
 	plane_state->ctx = ctx;
 
@@ -63,7 +65,7 @@ static void dc_plane_construct(struct dc_context *ctx, struct dc_plane_state *pl
 
 }
 
-static void dc_plane_destruct(struct dc_plane_state *plane_state)
+void dc_plane_destruct(struct dc_plane_state *plane_state)
 {
 	if (plane_state->gamma_correction != NULL) {
 		dc_gamma_release(&plane_state->gamma_correction);
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 9316b737a8ba..c9317ea0258e 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -27,6 +27,8 @@
 #define DC_INTERFACE_H_
 
 #include "dc_types.h"
+#include "dc_state.h"
+#include "dc_plane.h"
 #include "grph_object_defs.h"
 #include "logger_types.h"
 #include "hdcp_msg_types.h"
@@ -49,7 +51,7 @@ struct aux_payload;
 struct set_config_cmd_payload;
 struct dmub_notification;
 
-#define DC_VER "3.2.259"
+#define DC_VER "3.2.266"
 
 #define MAX_SURFACES 3
 #define MAX_PLANES 6
@@ -432,6 +434,7 @@ struct dc_config {
 	bool EnableMinDispClkODM;
 	bool enable_auto_dpm_test_logs;
 	unsigned int disable_ips;
+	unsigned int disable_ips_in_vpb;
 };
 
 enum visual_confirm {
@@ -461,6 +464,12 @@ enum dml_hostvm_override_opts {
 	DML_HOSTVM_OVERRIDE_TRUE = 0x2,
 };
 
+enum dc_replay_power_opts {
+	replay_power_opt_invalid		= 0x0,
+	replay_power_opt_smu_opt_static_screen	= 0x1,
+	replay_power_opt_z10_static_screen	= 0x10,
+};
+
 enum dcc_option {
 	DCC_ENABLE = 0,
 	DCC_DISABLE = 1,
@@ -874,6 +883,7 @@ struct dc_debug_options {
 	unsigned int seamless_boot_odm_combine;
 	unsigned int force_odm_combine_4to1; //bit vector based on otg inst
 	int minimum_z8_residency_time;
+	int minimum_z10_residency_time;
 	bool disable_z9_mpc;
 	unsigned int force_fclk_khz;
 	bool enable_tri_buf;
@@ -955,7 +965,6 @@ struct dc_debug_options {
 	unsigned int min_prefetch_in_strobe_ns;
 	bool disable_unbounded_requesting;
 	bool dig_fifo_off_in_blank;
-	bool temp_mst_deallocation_sequence;
 	bool override_dispclk_programming;
 	bool otg_crc_db;
 	bool disallow_dispclk_dppclk_ds;
@@ -978,6 +987,9 @@ struct dc_debug_options {
 	bool psp_disabled_wa;
 	unsigned int ips2_eval_delay_us;
 	unsigned int ips2_entry_delay_us;
+	bool disable_timeout;
+	bool disable_extblankadj;
+	unsigned int static_screen_wait_frames;
 };
 
 struct gpu_info_soc_bounding_box_v1_0;
@@ -1388,13 +1400,6 @@ struct dc_surface_update {
 /*
  * Create a new surface with default parameters;
  */
-struct dc_plane_state *dc_create_plane_state(struct dc *dc);
-const struct dc_plane_status *dc_plane_get_status(
-		const struct dc_plane_state *plane_state);
-
-void dc_plane_state_retain(struct dc_plane_state *plane_state);
-void dc_plane_state_release(struct dc_plane_state *plane_state);
-
 void dc_gamma_retain(struct dc_gamma *dc_gamma);
 void dc_gamma_release(struct dc_gamma **dc_gamma);
 struct dc_gamma *dc_create_gamma(void);
@@ -1458,37 +1463,20 @@ enum dc_status dc_validate_global_state(
 		struct dc_state *new_ctx,
 		bool fast_validate);
 
-
-void dc_resource_state_construct(
-		const struct dc *dc,
-		struct dc_state *dst_ctx);
-
 bool dc_acquire_release_mpc_3dlut(
 		struct dc *dc, bool acquire,
 		struct dc_stream_state *stream,
 		struct dc_3dlut **lut,
 		struct dc_transfer_func **shaper);
 
-void dc_resource_state_copy_construct(
-		const struct dc_state *src_ctx,
-		struct dc_state *dst_ctx);
-
-void dc_resource_state_copy_construct_current(
-		const struct dc *dc,
-		struct dc_state *dst_ctx);
-
-void dc_resource_state_destruct(struct dc_state *context);
-
 bool dc_resource_is_dsc_encoding_supported(const struct dc *dc);
+void get_audio_check(struct audio_info *aud_modes,
+	struct audio_check *aud_chk);
 
 enum dc_status dc_commit_streams(struct dc *dc,
 				 struct dc_stream_state *streams[],
 				 uint8_t stream_count);
 
-struct dc_state *dc_create_state(struct dc *dc);
-struct dc_state *dc_copy_state(struct dc_state *src_ctx);
-void dc_retain_state(struct dc_state *context);
-void dc_release_state(struct dc_state *context);
 
 struct dc_plane_state *dc_get_surface_for_mpcc(struct dc *dc,
 		struct dc_stream_state *stream,
@@ -1540,7 +1528,13 @@ struct dc_link {
 	bool is_dig_mapping_flexible;
 	bool hpd_status; /* HPD status of link without physical HPD pin. */
 	bool is_hpd_pending; /* Indicates a new received hpd */
-	bool is_automated; /* Indicates automated testing */
+
+	/* USB4 DPIA links skip verifying link cap, instead performing the fallback method
+	 * for every link training. This is incompatible with DP LL compliance automation,
+	 * which expects the same link settings to be used every retry on a link loss.
+	 * This flag is used to skip the fallback when link loss occurs during automation.
+	 */
+	bool skip_fallback_on_link_loss;
 
 	bool edp_sink_present;
 
@@ -1608,7 +1602,6 @@ struct dc_link {
 	enum edp_revision edp_revision;
 	union dpcd_sink_ext_caps dpcd_sink_ext_caps;
 
-	struct backlight_settings backlight_settings;
 	struct psr_settings psr_settings;
 
 	struct replay_settings replay_settings;
@@ -2092,6 +2085,20 @@ bool dc_link_setup_psr(struct dc_link *dc_link,
 		const struct dc_stream_state *stream, struct psr_config *psr_config,
 		struct psr_context *psr_context);
 
+/*
+ * Communicate with DMUB to allow or disallow Panel Replay on the specified link:
+ *
+ * @link: pointer to the dc_link struct instance
+ * @enable: enable(active) or disable(inactive) replay
+ * @wait: state transition need to wait the active set completed.
+ * @force_static: force disable(inactive) the replay
+ * @power_opts: set power optimazation parameters to DMUB.
+ *
+ * return: allow Replay active will return true, else will return false.
+ */
+bool dc_link_set_replay_allow_active(struct dc_link *dc_link, const bool *enable,
+		bool wait, bool force_static, const unsigned int *power_opts);
+
 bool dc_link_get_replay_state(const struct dc_link *dc_link, uint64_t *state);
 
 /* On eDP links this function call will stall until T12 has elapsed.
@@ -2187,11 +2194,11 @@ int dc_link_dp_dpia_handle_usb4_bandwidth_allocation_for_link(
  *
  * @dc: pointer to dc struct
  * @stream: pointer to all possible streams
- * @num_streams: number of valid DPIA streams
+ * @count: number of valid DPIA streams
  *
  * return: TRUE if bw used by DPIAs doesn't exceed available BW else return FALSE
  */
-bool dc_link_validate(struct dc *dc, const struct dc_stream_state *streams,
+bool dc_link_dp_dpia_validate(struct dc *dc, const struct dc_stream_state *streams,
 		const unsigned int count);
 
 /* Sink Interfaces - A sink corresponds to a display output device */
@@ -2336,6 +2343,9 @@ void dc_hardware_release(struct dc *dc);
 void dc_mclk_switch_using_fw_based_vblank_stretch_shut_down(struct dc *dc);
 
 bool dc_set_psr_allow_active(struct dc *dc, bool enable);
+
+bool dc_set_replay_allow_active(struct dc *dc, bool active);
+
 void dc_z10_restore(const struct dc *dc);
 void dc_z10_save_init(struct dc *dc);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h
index be9aa1a71847..26940d94d8fb 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h
@@ -140,7 +140,7 @@ struct dc_vbios_funcs {
 	enum bp_result (*enable_lvtma_control)(
 		struct dc_bios *bios,
 		uint8_t uc_pwr_on,
-		uint8_t panel_instance,
+		uint8_t pwrseq_instance,
 		uint8_t bypass_panel_control_wait);
 
 	enum bp_result (*get_soc_bb_info)(
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index 0e07699c1e83..363d522603a2 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -33,6 +33,7 @@
 #include "cursor_reg_cache.h"
 #include "resource.h"
 #include "clk_mgr.h"
+#include "dc_state_priv.h"
 
 #define CTX dc_dmub_srv->ctx
 #define DC_LOGGER CTX->logger
@@ -124,7 +125,7 @@ bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv,
 		unsigned int count,
 		union dmub_rb_cmd *cmd_list)
 {
-	struct dc_context *dc_ctx = dc_dmub_srv->ctx;
+	struct dc_context *dc_ctx;
 	struct dmub_srv *dmub;
 	enum dmub_status status;
 	int i;
@@ -132,6 +133,7 @@ bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv,
 	if (!dc_dmub_srv || !dc_dmub_srv->dmub)
 		return false;
 
+	dc_ctx = dc_dmub_srv->ctx;
 	dmub = dc_dmub_srv->dmub;
 
 	for (i = 0 ; i < count; i++) {
@@ -140,7 +142,10 @@ bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv,
 
 		if (status == DMUB_STATUS_QUEUE_FULL) {
 			/* Execute and wait for queue to become empty again. */
-			dmub_srv_cmd_execute(dmub);
+			status = dmub_srv_cmd_execute(dmub);
+			if (status == DMUB_STATUS_POWER_STATE_D3)
+				return false;
+
 			dmub_srv_wait_for_idle(dmub, 100000);
 
 			/* Requeue the command. */
@@ -148,16 +153,20 @@ bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv,
 		}
 
 		if (status != DMUB_STATUS_OK) {
-			DC_ERROR("Error queueing DMUB command: status=%d\n", status);
-			dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+			if (status != DMUB_STATUS_POWER_STATE_D3) {
+				DC_ERROR("Error queueing DMUB command: status=%d\n", status);
+				dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+			}
 			return false;
 		}
 	}
 
 	status = dmub_srv_cmd_execute(dmub);
 	if (status != DMUB_STATUS_OK) {
-		DC_ERROR("Error starting DMUB execution: status=%d\n", status);
-		dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+		if (status != DMUB_STATUS_POWER_STATE_D3) {
+			DC_ERROR("Error starting DMUB execution: status=%d\n", status);
+			dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+		}
 		return false;
 	}
 
@@ -218,7 +227,10 @@ bool dc_dmub_srv_cmd_run_list(struct dc_dmub_srv *dc_dmub_srv, unsigned int coun
 
 		if (status == DMUB_STATUS_QUEUE_FULL) {
 			/* Execute and wait for queue to become empty again. */
-			dmub_srv_cmd_execute(dmub);
+			status = dmub_srv_cmd_execute(dmub);
+			if (status == DMUB_STATUS_POWER_STATE_D3)
+				return false;
+
 			dmub_srv_wait_for_idle(dmub, 100000);
 
 			/* Requeue the command. */
@@ -226,22 +238,31 @@ bool dc_dmub_srv_cmd_run_list(struct dc_dmub_srv *dc_dmub_srv, unsigned int coun
 		}
 
 		if (status != DMUB_STATUS_OK) {
-			DC_ERROR("Error queueing DMUB command: status=%d\n", status);
-			dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+			if (status != DMUB_STATUS_POWER_STATE_D3) {
+				DC_ERROR("Error queueing DMUB command: status=%d\n", status);
+				dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+			}
 			return false;
 		}
 	}
 
 	status = dmub_srv_cmd_execute(dmub);
 	if (status != DMUB_STATUS_OK) {
-		DC_ERROR("Error starting DMUB execution: status=%d\n", status);
-		dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+		if (status != DMUB_STATUS_POWER_STATE_D3) {
+			DC_ERROR("Error starting DMUB execution: status=%d\n", status);
+			dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+		}
 		return false;
 	}
 
 	// Wait for DMUB to process command
 	if (wait_type != DM_DMUB_WAIT_TYPE_NO_WAIT) {
-		status = dmub_srv_wait_for_idle(dmub, 100000);
+		if (dc_dmub_srv->ctx->dc->debug.disable_timeout) {
+			do {
+				status = dmub_srv_wait_for_idle(dmub, 100000);
+			} while (status != DMUB_STATUS_OK);
+		} else
+			status = dmub_srv_wait_for_idle(dmub, 100000);
 
 		if (status != DMUB_STATUS_OK) {
 			DC_LOG_DEBUG("No reply for DMUB command: status=%d\n", status);
@@ -282,17 +303,11 @@ bool dc_dmub_srv_optimized_init_done(struct dc_dmub_srv *dc_dmub_srv)
 bool dc_dmub_srv_notify_stream_mask(struct dc_dmub_srv *dc_dmub_srv,
 				    unsigned int stream_mask)
 {
-	struct dmub_srv *dmub;
-	const uint32_t timeout = 30;
-
 	if (!dc_dmub_srv || !dc_dmub_srv->dmub)
 		return false;
 
-	dmub = dc_dmub_srv->dmub;
-
-	return dmub_srv_send_gpint_command(
-		       dmub, DMUB_GPINT__IDLE_OPT_NOTIFY_STREAM_MASK,
-		       stream_mask, timeout) == DMUB_STATUS_OK;
+	return dc_wake_and_execute_gpint(dc_dmub_srv->ctx, DMUB_GPINT__IDLE_OPT_NOTIFY_STREAM_MASK,
+					 stream_mask, NULL, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 bool dc_dmub_srv_is_restore_required(struct dc_dmub_srv *dc_dmub_srv)
@@ -341,7 +356,7 @@ void dc_dmub_srv_drr_update_cmd(struct dc *dc, uint32_t tg_inst, uint32_t vtotal
 	cmd.drr_update.header.payload_bytes = sizeof(cmd.drr_update) - sizeof(cmd.drr_update.header);
 
 	// Send the command to the DMCUB.
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 void dc_dmub_srv_set_drr_manual_trigger_cmd(struct dc *dc, uint32_t tg_inst)
@@ -355,7 +370,7 @@ void dc_dmub_srv_set_drr_manual_trigger_cmd(struct dc *dc, uint32_t tg_inst)
 	cmd.drr_update.header.payload_bytes = sizeof(cmd.drr_update) - sizeof(cmd.drr_update.header);
 
 	// Send the command to the DMCUB.
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 static uint8_t dc_dmub_srv_get_pipes_for_stream(struct dc *dc, struct dc_stream_state *stream)
@@ -448,7 +463,7 @@ bool dc_dmub_srv_p_state_delegate(struct dc *dc, bool should_manage_pstate, stru
 		sizeof(cmd.fw_assisted_mclk_switch) - sizeof(cmd.fw_assisted_mclk_switch.header);
 
 	// Send the command to the DMCUB.
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	return true;
 }
@@ -469,7 +484,7 @@ void dc_dmub_srv_query_caps_cmd(struct dc_dmub_srv *dc_dmub_srv)
 	cmd.query_feature_caps.header.payload_bytes = sizeof(struct dmub_cmd_query_feature_caps_data);
 
 	/* If command was processed, copy feature caps to dmub srv */
-	if (dm_execute_dmub_cmd(dc_dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) &&
+	if (dc_wake_and_execute_dmub_cmd(dc_dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) &&
 	    cmd.query_feature_caps.header.ret_status == 0) {
 		memcpy(&dc_dmub_srv->dmub->feature_caps,
 		       &cmd.query_feature_caps.query_feature_caps_data,
@@ -494,7 +509,7 @@ void dc_dmub_srv_get_visual_confirm_color_cmd(struct dc *dc, struct pipe_ctx *pi
 	cmd.visual_confirm_color.visual_confirm_color_data.visual_confirm_color.panel_inst = panel_inst;
 
 	// If command was processed, copy feature caps to dmub srv
-	if (dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) &&
+	if (dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) &&
 		cmd.visual_confirm_color.header.ret_status == 0) {
 		memcpy(&dc->ctx->dmub_srv->dmub->visual_confirm_color,
 			&cmd.visual_confirm_color.visual_confirm_color_data,
@@ -505,10 +520,11 @@ void dc_dmub_srv_get_visual_confirm_color_cmd(struct dc *dc, struct pipe_ctx *pi
 /**
  * populate_subvp_cmd_drr_info - Helper to populate DRR pipe info for the DMCUB subvp command
  *
- * @dc: [in] current dc state
+ * @dc: [in] pointer to dc object
  * @subvp_pipe: [in] pipe_ctx for the SubVP pipe
  * @vblank_pipe: [in] pipe_ctx for the DRR pipe
  * @pipe_data: [in] Pipe data which stores the VBLANK/DRR info
+ * @context: [in] DC state for access to phantom stream
  *
  * Populate the DMCUB SubVP command with DRR pipe info. All the information
  * required for calculating the SubVP + DRR microschedule is populated here.
@@ -519,12 +535,14 @@ void dc_dmub_srv_get_visual_confirm_color_cmd(struct dc *dc, struct pipe_ctx *pi
  * 3. Populate the drr_info with the min and max supported vtotal values
  */
 static void populate_subvp_cmd_drr_info(struct dc *dc,
+		struct dc_state *context,
 		struct pipe_ctx *subvp_pipe,
 		struct pipe_ctx *vblank_pipe,
 		struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 *pipe_data)
 {
+	struct dc_stream_state *phantom_stream = dc_state_get_paired_subvp_stream(context, subvp_pipe->stream);
 	struct dc_crtc_timing *main_timing = &subvp_pipe->stream->timing;
-	struct dc_crtc_timing *phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing;
+	struct dc_crtc_timing *phantom_timing = &phantom_stream->timing;
 	struct dc_crtc_timing *drr_timing = &vblank_pipe->stream->timing;
 	uint16_t drr_frame_us = 0;
 	uint16_t min_drr_supported_us = 0;
@@ -612,7 +630,7 @@ static void populate_subvp_cmd_vblank_pipe_info(struct dc *dc,
 			continue;
 
 		// Find the SubVP pipe
-		if (pipe->stream->mall_stream_config.type == SUBVP_MAIN)
+		if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN)
 			break;
 	}
 
@@ -629,7 +647,7 @@ static void populate_subvp_cmd_vblank_pipe_info(struct dc *dc,
 
 	if (vblank_pipe->stream->ignore_msa_timing_param &&
 		(vblank_pipe->stream->allow_freesync || vblank_pipe->stream->vrr_active_variable || vblank_pipe->stream->vrr_active_fixed))
-		populate_subvp_cmd_drr_info(dc, pipe, vblank_pipe, pipe_data);
+		populate_subvp_cmd_drr_info(dc, context, pipe, vblank_pipe, pipe_data);
 }
 
 /**
@@ -654,10 +672,17 @@ static void update_subvp_prefetch_end_to_mall_start(struct dc *dc,
 	uint32_t subvp0_prefetch_us = 0;
 	uint32_t subvp1_prefetch_us = 0;
 	uint32_t prefetch_delta_us = 0;
-	struct dc_crtc_timing *phantom_timing0 = &subvp_pipes[0]->stream->mall_stream_config.paired_stream->timing;
-	struct dc_crtc_timing *phantom_timing1 = &subvp_pipes[1]->stream->mall_stream_config.paired_stream->timing;
+	struct dc_stream_state *phantom_stream0 = NULL;
+	struct dc_stream_state *phantom_stream1 = NULL;
+	struct dc_crtc_timing *phantom_timing0 = NULL;
+	struct dc_crtc_timing *phantom_timing1 = NULL;
 	struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 *pipe_data = NULL;
 
+	phantom_stream0 = dc_state_get_paired_subvp_stream(context, subvp_pipes[0]->stream);
+	phantom_stream1 = dc_state_get_paired_subvp_stream(context, subvp_pipes[1]->stream);
+	phantom_timing0 = &phantom_stream0->timing;
+	phantom_timing1 = &phantom_stream1->timing;
+
 	subvp0_prefetch_us = div64_u64(((uint64_t)(phantom_timing0->v_total - phantom_timing0->v_front_porch) *
 			(uint64_t)phantom_timing0->h_total * 1000000),
 			(((uint64_t)phantom_timing0->pix_clk_100hz * 100) + dc->caps.subvp_prefetch_end_to_mall_start_us));
@@ -707,8 +732,9 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc,
 	uint32_t j;
 	struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 *pipe_data =
 			&cmd->fw_assisted_mclk_switch_v2.config_data.pipe_data[cmd_pipe_index];
+	struct dc_stream_state *phantom_stream = dc_state_get_paired_subvp_stream(context, subvp_pipe->stream);
 	struct dc_crtc_timing *main_timing = &subvp_pipe->stream->timing;
-	struct dc_crtc_timing *phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing;
+	struct dc_crtc_timing *phantom_timing = &phantom_stream->timing;
 	uint32_t out_num_stream, out_den_stream, out_num_plane, out_den_plane, out_num, out_den;
 
 	pipe_data->mode = SUBVP;
@@ -762,7 +788,7 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc,
 	for (j = 0; j < dc->res_pool->pipe_count; j++) {
 		struct pipe_ctx *phantom_pipe = &context->res_ctx.pipe_ctx[j];
 
-		if (phantom_pipe->stream == subvp_pipe->stream->mall_stream_config.paired_stream) {
+		if (phantom_pipe->stream == dc_state_get_paired_subvp_stream(context, subvp_pipe->stream)) {
 			pipe_data->pipe_config.subvp_data.phantom_pipe_index = phantom_pipe->stream_res.tg->inst;
 			if (phantom_pipe->bottom_pipe) {
 				pipe_data->pipe_config.subvp_data.phantom_split_pipe_index = phantom_pipe->bottom_pipe->plane_res.hubp->inst;
@@ -796,6 +822,7 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc,
 	union dmub_rb_cmd cmd;
 	struct pipe_ctx *subvp_pipes[2];
 	uint32_t wm_val_refclk = 0;
+	enum mall_stream_type pipe_mall_type;
 
 	memset(&cmd, 0, sizeof(cmd));
 	// FW command for SUBVP
@@ -811,7 +838,7 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc,
 		 */
 		if (resource_is_pipe_type(pipe, OTG_MASTER) &&
 				resource_is_pipe_type(pipe, DPP_PIPE) &&
-				pipe->stream->mall_stream_config.type == SUBVP_MAIN)
+				dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN)
 			subvp_pipes[subvp_count++] = pipe;
 	}
 
@@ -819,6 +846,7 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc,
 		// For each pipe that is a "main" SUBVP pipe, fill in pipe data for DMUB SUBVP cmd
 		for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
 			struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+			pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe);
 
 			if (!pipe->stream)
 				continue;
@@ -829,12 +857,11 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc,
 			 */
 			if (resource_is_pipe_type(pipe, OTG_MASTER) &&
 					resource_is_pipe_type(pipe, DPP_PIPE) &&
-					pipe->stream->mall_stream_config.paired_stream &&
-					pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
+					pipe_mall_type == SUBVP_MAIN) {
 				populate_subvp_cmd_pipe_info(dc, context, &cmd, pipe, cmd_pipe_index++);
 			} else if (resource_is_pipe_type(pipe, OTG_MASTER) &&
 					resource_is_pipe_type(pipe, DPP_PIPE) &&
-					pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+					pipe_mall_type == SUBVP_NONE) {
 				// Don't need to check for ActiveDRAMClockChangeMargin < 0, not valid in cases where
 				// we run through DML without calculating "natural" P-state support
 				populate_subvp_cmd_vblank_pipe_info(dc, context, &cmd, pipe, cmd_pipe_index++);
@@ -856,7 +883,7 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc,
 		cmd.fw_assisted_mclk_switch_v2.config_data.watermark_a_cache = wm_val_refclk < 0xFFFF ? wm_val_refclk : 0xFFFF;
 	}
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 bool dc_dmub_srv_get_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv, struct dmub_diagnostic_data *diag_data)
@@ -1093,7 +1120,7 @@ void dc_send_update_cursor_info_to_dmu(
 				pipe_idx, pCtx->plane_res.hubp, pCtx->plane_res.dpp);
 
 		/* Combine 2nd cmds update_curosr_info to DMU */
-		dm_execute_dmub_cmd_list(pCtx->stream->ctx, 2, cmd, DM_DMUB_WAIT_TYPE_WAIT);
+		dc_wake_and_execute_dmub_cmd_list(pCtx->stream->ctx, 2, cmd, DM_DMUB_WAIT_TYPE_WAIT);
 	}
 }
 
@@ -1107,25 +1134,20 @@ bool dc_dmub_check_min_version(struct dmub_srv *srv)
 void dc_dmub_srv_enable_dpia_trace(const struct dc *dc)
 {
 	struct dc_dmub_srv *dc_dmub_srv = dc->ctx->dmub_srv;
-	struct dmub_srv *dmub;
-	enum dmub_status status;
-	static const uint32_t timeout_us = 30;
 
 	if (!dc_dmub_srv || !dc_dmub_srv->dmub) {
 		DC_LOG_ERROR("%s: invalid parameters.", __func__);
 		return;
 	}
 
-	dmub = dc_dmub_srv->dmub;
-
-	status = dmub_srv_send_gpint_command(dmub, DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD1, 0x0010, timeout_us);
-	if (status != DMUB_STATUS_OK) {
+	if (!dc_wake_and_execute_gpint(dc->ctx, DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD1,
+				       0x0010, NULL, DM_DMUB_WAIT_TYPE_WAIT)) {
 		DC_LOG_ERROR("timeout updating trace buffer mask word\n");
 		return;
 	}
 
-	status = dmub_srv_send_gpint_command(dmub, DMUB_GPINT__UPDATE_TRACE_BUFFER_MASK, 0x0000, timeout_us);
-	if (status != DMUB_STATUS_OK) {
+	if (!dc_wake_and_execute_gpint(dc->ctx, DMUB_GPINT__UPDATE_TRACE_BUFFER_MASK,
+				       0x0000, NULL, DM_DMUB_WAIT_TYPE_WAIT)) {
 		DC_LOG_ERROR("timeout updating trace buffer mask word\n");
 		return;
 	}
@@ -1140,17 +1162,28 @@ void dc_dmub_srv_subvp_save_surf_addr(const struct dc_dmub_srv *dc_dmub_srv, con
 
 bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait)
 {
-	struct dc_context *dc_ctx = dc_dmub_srv->ctx;
+	struct dc_context *dc_ctx;
 	enum dmub_status status;
 
+	if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+		return true;
+
 	if (dc_dmub_srv->ctx->dc->debug.dmcub_emulation)
 		return true;
 
+	dc_ctx = dc_dmub_srv->ctx;
+
 	if (wait) {
-		status = dmub_srv_wait_for_hw_pwr_up(dc_dmub_srv->dmub, 500000);
-		if (status != DMUB_STATUS_OK) {
-			DC_ERROR("Error querying DMUB hw power up status: error=%d\n", status);
-			return false;
+		if (dc_dmub_srv->ctx->dc->debug.disable_timeout) {
+			do {
+				status = dmub_srv_wait_for_hw_pwr_up(dc_dmub_srv->dmub, 500000);
+			} while (status != DMUB_STATUS_OK);
+		} else {
+			status = dmub_srv_wait_for_hw_pwr_up(dc_dmub_srv->dmub, 500000);
+			if (status != DMUB_STATUS_OK) {
+				DC_ERROR("Error querying DMUB hw power up status: error=%d\n", status);
+				return false;
+			}
 		}
 	} else
 		return dmub_srv_is_hw_pwr_up(dc_dmub_srv->dmub);
@@ -1158,7 +1191,7 @@ bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait)
 	return true;
 }
 
-void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle)
+static void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle)
 {
 	union dmub_rb_cmd cmd = {0};
 
@@ -1179,20 +1212,20 @@ void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle)
 			dc->hwss.set_idle_state(dc, true);
 	}
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	/* NOTE: This does not use the "wake" interface since this is part of the wake path. */
+	/* We also do not perform a wait since DMCUB could enter idle after the notification. */
+	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
 }
 
-void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
+static void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
 {
-	const uint32_t max_num_polls = 10000;
 	uint32_t allow_state = 0;
 	uint32_t commit_state = 0;
-	uint32_t i;
 
 	if (dc->debug.dmcub_emulation)
 		return;
 
-	if (!dc->idle_optimizations_allowed)
+	if (!dc->ctx->dmub_srv || !dc->ctx->dmub_srv->dmub)
 		return;
 
 	if (dc->hwss.get_idle_state &&
@@ -1204,8 +1237,16 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
 
 		if (!(allow_state & DMUB_IPS2_ALLOW_MASK)) {
 			// Wait for evaluation time
-			udelay(dc->debug.ips2_eval_delay_us);
-			commit_state = dc->hwss.get_idle_state(dc);
+			for (;;) {
+				udelay(dc->debug.ips2_eval_delay_us);
+				commit_state = dc->hwss.get_idle_state(dc);
+				if (commit_state & DMUB_IPS2_ALLOW_MASK)
+					break;
+
+				/* allow was still set, retry eval delay */
+				dc->hwss.set_idle_state(dc, false);
+			}
+
 			if (!(commit_state & DMUB_IPS2_COMMIT_MASK)) {
 				// Tell PMFW to exit low power state
 				dc->clk_mgr->funcs->exit_low_power_state(dc->clk_mgr);
@@ -1214,14 +1255,13 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
 				udelay(dc->debug.ips2_entry_delay_us);
 				dc->clk_mgr->funcs->exit_low_power_state(dc->clk_mgr);
 
-				for (i = 0; i < max_num_polls; ++i) {
+				for (;;) {
 					commit_state = dc->hwss.get_idle_state(dc);
 					if (commit_state & DMUB_IPS2_COMMIT_MASK)
 						break;
 
 					udelay(1);
 				}
-				ASSERT(i < max_num_polls);
 
 				if (!dc_dmub_srv_is_hw_pwr_up(dc->ctx->dmub_srv, true))
 					ASSERT(0);
@@ -1236,14 +1276,13 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
 
 		dc_dmub_srv_notify_idle(dc, false);
 		if (!(allow_state & DMUB_IPS1_ALLOW_MASK)) {
-			for (i = 0; i < max_num_polls; ++i) {
+			for (;;) {
 				commit_state = dc->hwss.get_idle_state(dc);
 				if (commit_state & DMUB_IPS1_COMMIT_MASK)
 					break;
 
 				udelay(1);
 			}
-			ASSERT(i < max_num_polls);
 		}
 	}
 
@@ -1251,3 +1290,131 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
 		ASSERT(0);
 }
 
+void dc_dmub_srv_set_power_state(struct dc_dmub_srv *dc_dmub_srv, enum dc_acpi_cm_power_state powerState)
+{
+	struct dmub_srv *dmub;
+
+	if (!dc_dmub_srv)
+		return;
+
+	dmub = dc_dmub_srv->dmub;
+
+	if (powerState == DC_ACPI_CM_POWER_STATE_D0)
+		dmub_srv_set_power_state(dmub, DMUB_POWER_STATE_D0);
+	else
+		dmub_srv_set_power_state(dmub, DMUB_POWER_STATE_D3);
+}
+
+void dc_dmub_srv_apply_idle_power_optimizations(const struct dc *dc, bool allow_idle)
+{
+	struct dc_dmub_srv *dc_dmub_srv = dc->ctx->dmub_srv;
+
+	if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+		return;
+
+	if (dc_dmub_srv->idle_allowed == allow_idle)
+		return;
+
+	/*
+	 * Entering a low power state requires a driver notification.
+	 * Powering up the hardware requires notifying PMFW and DMCUB.
+	 * Clearing the driver idle allow requires a DMCUB command.
+	 * DMCUB commands requires the DMCUB to be powered up and restored.
+	 *
+	 * Exit out early to prevent an infinite loop of DMCUB commands
+	 * triggering exit low power - use software state to track this.
+	 */
+	dc_dmub_srv->idle_allowed = allow_idle;
+
+	if (!allow_idle)
+		dc_dmub_srv_exit_low_power_state(dc);
+	else
+		dc_dmub_srv_notify_idle(dc, allow_idle);
+}
+
+bool dc_wake_and_execute_dmub_cmd(const struct dc_context *ctx, union dmub_rb_cmd *cmd,
+				  enum dm_dmub_wait_type wait_type)
+{
+	return dc_wake_and_execute_dmub_cmd_list(ctx, 1, cmd, wait_type);
+}
+
+bool dc_wake_and_execute_dmub_cmd_list(const struct dc_context *ctx, unsigned int count,
+				       union dmub_rb_cmd *cmd, enum dm_dmub_wait_type wait_type)
+{
+	struct dc_dmub_srv *dc_dmub_srv = ctx->dmub_srv;
+	bool result = false, reallow_idle = false;
+
+	if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+		return false;
+
+	if (count == 0)
+		return true;
+
+	if (dc_dmub_srv->idle_allowed) {
+		dc_dmub_srv_apply_idle_power_optimizations(ctx->dc, false);
+		reallow_idle = true;
+	}
+
+	/*
+	 * These may have different implementations in DM, so ensure
+	 * that we guide it to the expected helper.
+	 */
+	if (count > 1)
+		result = dm_execute_dmub_cmd_list(ctx, count, cmd, wait_type);
+	else
+		result = dm_execute_dmub_cmd(ctx, cmd, wait_type);
+
+	if (result && reallow_idle)
+		dc_dmub_srv_apply_idle_power_optimizations(ctx->dc, true);
+
+	return result;
+}
+
+static bool dc_dmub_execute_gpint(const struct dc_context *ctx, enum dmub_gpint_command command_code,
+				  uint16_t param, uint32_t *response, enum dm_dmub_wait_type wait_type)
+{
+	struct dc_dmub_srv *dc_dmub_srv = ctx->dmub_srv;
+	const uint32_t wait_us = wait_type == DM_DMUB_WAIT_TYPE_NO_WAIT ? 0 : 30;
+	enum dmub_status status;
+
+	if (response)
+		*response = 0;
+
+	if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+		return false;
+
+	status = dmub_srv_send_gpint_command(dc_dmub_srv->dmub, command_code, param, wait_us);
+	if (status != DMUB_STATUS_OK) {
+		if (status == DMUB_STATUS_TIMEOUT && wait_type == DM_DMUB_WAIT_TYPE_NO_WAIT)
+			return true;
+
+		return false;
+	}
+
+	if (response && wait_type == DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)
+		dmub_srv_get_gpint_response(dc_dmub_srv->dmub, response);
+
+	return true;
+}
+
+bool dc_wake_and_execute_gpint(const struct dc_context *ctx, enum dmub_gpint_command command_code,
+			       uint16_t param, uint32_t *response, enum dm_dmub_wait_type wait_type)
+{
+	struct dc_dmub_srv *dc_dmub_srv = ctx->dmub_srv;
+	bool result = false, reallow_idle = false;
+
+	if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+		return false;
+
+	if (dc_dmub_srv->idle_allowed) {
+		dc_dmub_srv_apply_idle_power_optimizations(ctx->dc, false);
+		reallow_idle = true;
+	}
+
+	result = dc_dmub_execute_gpint(ctx, command_code, param, response, wait_type);
+
+	if (result && reallow_idle)
+		dc_dmub_srv_apply_idle_power_optimizations(ctx->dc, true);
+
+	return result;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
index d4a60f53faab..952bfb368886 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
@@ -50,6 +50,8 @@ struct dc_dmub_srv {
 
 	struct dc_context *ctx;
 	void *dm;
+
+	bool idle_allowed;
 };
 
 void dc_dmub_srv_wait_idle(struct dc_dmub_srv *dc_dmub_srv);
@@ -100,6 +102,59 @@ void dc_dmub_srv_enable_dpia_trace(const struct dc *dc);
 void dc_dmub_srv_subvp_save_surf_addr(const struct dc_dmub_srv *dc_dmub_srv, const struct dc_plane_address *addr, uint8_t subvp_index);
 
 bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait);
-void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle);
-void dc_dmub_srv_exit_low_power_state(const struct dc *dc);
+
+void dc_dmub_srv_apply_idle_power_optimizations(const struct dc *dc, bool allow_idle);
+
+void dc_dmub_srv_set_power_state(struct dc_dmub_srv *dc_dmub_srv, enum dc_acpi_cm_power_state powerState);
+
+/**
+ * dc_wake_and_execute_dmub_cmd() - Wrapper for DMUB command execution.
+ *
+ * Refer to dc_wake_and_execute_dmub_cmd_list() for usage and limitations,
+ * This function is a convenience wrapper for a single command execution.
+ *
+ * @ctx: DC context
+ * @cmd: The command to send/receive
+ * @wait_type: The wait behavior for the execution
+ *
+ * Return: true on command submission success, false otherwise
+ */
+bool dc_wake_and_execute_dmub_cmd(const struct dc_context *ctx, union dmub_rb_cmd *cmd,
+				  enum dm_dmub_wait_type wait_type);
+
+/**
+ * dc_wake_and_execute_dmub_cmd_list() - Wrapper for DMUB command list execution.
+ *
+ * If the DMCUB hardware was asleep then it wakes the DMUB before
+ * executing the command and attempts to re-enter if the command
+ * submission was successful.
+ *
+ * This should be the preferred command submission interface provided
+ * the DC lock is acquired.
+ *
+ * Entry/exit out of idle power optimizations would need to be
+ * manually performed otherwise through dc_allow_idle_optimizations().
+ *
+ * @ctx: DC context
+ * @count: Number of commands to send/receive
+ * @cmd: Array of commands to send
+ * @wait_type: The wait behavior for the execution
+ *
+ * Return: true on command submission success, false otherwise
+ */
+bool dc_wake_and_execute_dmub_cmd_list(const struct dc_context *ctx, unsigned int count,
+				       union dmub_rb_cmd *cmd, enum dm_dmub_wait_type wait_type);
+
+/**
+ * dc_wake_and_execute_gpint()
+ *
+ * @ctx: DC context
+ * @command_code: The command ID to send to DMCUB
+ * @param: The parameter to message DMCUB
+ * @response: Optional response out value - may be NULL.
+ * @wait_type: The wait behavior for the execution
+ */
+bool dc_wake_and_execute_gpint(const struct dc_context *ctx, enum dmub_gpint_command command_code,
+			       uint16_t param, uint32_t *response, enum dm_dmub_wait_type wait_type);
+
 #endif /* _DMUB_DC_SRV_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
index eeeeeef4d717..1cb7765f593a 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
@@ -1377,6 +1377,12 @@ struct dp_trace {
 #ifndef DP_TUNNELING_STATUS
 #define DP_TUNNELING_STATUS				0xE0025 /* 1.4a */
 #endif
+#ifndef DP_TUNNELING_MAX_LINK_RATE
+#define DP_TUNNELING_MAX_LINK_RATE			0xE0028 /* 1.4a */
+#endif
+#ifndef DP_TUNNELING_MAX_LANE_COUNT
+#define DP_TUNNELING_MAX_LANE_COUNT			0xE0029 /* 1.4a */
+#endif
 #ifndef DPTX_BW_ALLOCATION_MODE_CONTROL
 #define DPTX_BW_ALLOCATION_MODE_CONTROL			0xE0030 /* 1.4a */
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c
index cb6eaddab720..8f9a67825615 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c
@@ -50,7 +50,7 @@ static inline void submit_dmub_read_modify_write(
 	cmd_buf->header.payload_bytes =
 			sizeof(struct dmub_cmd_read_modify_write_sequence) * offload->reg_seq_count;
 
-	dm_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT);
+	dc_wake_and_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT);
 
 	memset(cmd_buf, 0, sizeof(*cmd_buf));
 
@@ -67,7 +67,7 @@ static inline void submit_dmub_burst_write(
 	cmd_buf->header.payload_bytes =
 			sizeof(uint32_t) * offload->reg_seq_count;
 
-	dm_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT);
+	dc_wake_and_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT);
 
 	memset(cmd_buf, 0, sizeof(*cmd_buf));
 
@@ -80,7 +80,7 @@ static inline void submit_dmub_reg_wait(
 {
 	struct dmub_rb_cmd_reg_wait *cmd_buf = &offload->cmd_data.reg_wait;
 
-	dm_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT);
+	dc_wake_and_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT);
 
 	memset(cmd_buf, 0, sizeof(*cmd_buf));
 	offload->reg_seq_count = 0;
diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
index 9649934ea186..811474f4419b 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
@@ -244,7 +244,7 @@ enum pixel_format {
 #define DC_MAX_DIRTY_RECTS 3
 struct dc_flip_addrs {
 	struct dc_plane_address address;
-	unsigned int flip_timestamp_in_us;
+	unsigned long long flip_timestamp_in_us;
 	bool flip_immediate;
 	/* TODO: add flip duration for FreeSync */
 	bool triplebuffer_flips;
@@ -465,6 +465,7 @@ struct dc_cursor_mi_param {
 	struct fixed31_32 v_scale_ratio;
 	enum dc_rotation_angle rotation;
 	bool mirror;
+	struct dc_stream_state *stream;
 };
 
 /* IPP related types */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_plane.h b/drivers/gpu/drm/amd/display/dc/dc_plane.h
new file mode 100644
index 000000000000..ef380cae816a
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_plane.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DC_PLANE_H_
+#define _DC_PLANE_H_
+
+#include "dc.h"
+#include "dc_hw_types.h"
+
+struct dc_plane_state *dc_create_plane_state(struct dc *dc);
+const struct dc_plane_status *dc_plane_get_status(
+		const struct dc_plane_state *plane_state);
+void dc_plane_state_retain(struct dc_plane_state *plane_state);
+void dc_plane_state_release(struct dc_plane_state *plane_state);
+
+#endif /* _DC_PLANE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_plane_priv.h b/drivers/gpu/drm/amd/display/dc/dc_plane_priv.h
new file mode 100644
index 000000000000..9ee184c1df00
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_plane_priv.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DC_PLANE_PRIV_H_
+#define _DC_PLANE_PRIV_H_
+
+#include "dc_plane.h"
+
+void dc_plane_construct(struct dc_context *ctx, struct dc_plane_state *plane_state);
+void dc_plane_destruct(struct dc_plane_state *plane_state);
+
+#endif /* _DC_PLANE_PRIV_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_state.h b/drivers/gpu/drm/amd/display/dc/dc_state.h
new file mode 100644
index 000000000000..d167fdbfa8a9
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_state.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DC_STATE_H_
+#define _DC_STATE_H_
+
+#include "dc.h"
+#include "inc/core_status.h"
+
+struct dc_state *dc_state_create(struct dc *dc);
+void dc_state_copy(struct dc_state *dst_state, struct dc_state *src_state);
+struct dc_state *dc_state_create_copy(struct dc_state *src_state);
+void dc_state_copy_current(struct dc *dc, struct dc_state *dst_state);
+struct dc_state *dc_state_create_current_copy(struct dc *dc);
+void dc_state_construct(struct dc *dc, struct dc_state *state);
+void dc_state_destruct(struct dc_state *state);
+void dc_state_retain(struct dc_state *state);
+void dc_state_release(struct dc_state *state);
+
+enum dc_status dc_state_add_stream(struct dc *dc,
+				    struct dc_state *state,
+				    struct dc_stream_state *stream);
+
+enum dc_status dc_state_remove_stream(
+		struct dc *dc,
+		struct dc_state *state,
+		struct dc_stream_state *stream);
+
+bool dc_state_add_plane(
+		const struct dc *dc,
+		struct dc_stream_state *stream,
+		struct dc_plane_state *plane_state,
+		struct dc_state *state);
+
+bool dc_state_remove_plane(
+		const struct dc *dc,
+		struct dc_stream_state *stream,
+		struct dc_plane_state *plane_state,
+		struct dc_state *state);
+
+bool dc_state_rem_all_planes_for_stream(
+		const struct dc *dc,
+		struct dc_stream_state *stream,
+		struct dc_state *state);
+
+bool dc_state_add_all_planes_for_stream(
+		const struct dc *dc,
+		struct dc_stream_state *stream,
+		struct dc_plane_state * const *plane_states,
+		int plane_count,
+		struct dc_state *state);
+
+struct dc_stream_status *dc_state_get_stream_status(
+	struct dc_state *state,
+	struct dc_stream_state *stream);
+#endif /* _DC_STATE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_state_priv.h b/drivers/gpu/drm/amd/display/dc/dc_state_priv.h
new file mode 100644
index 000000000000..c1f44e09a6c1
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_state_priv.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DC_STATE_PRIV_H_
+#define _DC_STATE_PRIV_H_
+
+#include "dc_state.h"
+#include "dc_stream.h"
+
+/* Get the type of the provided resource (none, phantom, main) based on the provided
+ * context. If the context is unavailable, determine only if phantom or not.
+ */
+enum mall_stream_type dc_state_get_pipe_subvp_type(const struct dc_state *state,
+		const struct pipe_ctx *pipe_ctx);
+enum mall_stream_type dc_state_get_stream_subvp_type(const struct dc_state *state,
+		const struct dc_stream_state *stream);
+
+/* Gets the phantom stream if main is provided, gets the main if phantom is provided.*/
+struct dc_stream_state *dc_state_get_paired_subvp_stream(const struct dc_state *state,
+		const struct dc_stream_state *stream);
+
+/* allocate's phantom stream or plane and returns pointer to the object */
+struct dc_stream_state *dc_state_create_phantom_stream(const struct dc *dc,
+		struct dc_state *state,
+		struct dc_stream_state *main_stream);
+struct dc_plane_state *dc_state_create_phantom_plane(struct dc *dc,
+		struct dc_state *state,
+		struct dc_plane_state *main_plane);
+
+/* deallocate's phantom stream or plane */
+void dc_state_release_phantom_stream(const struct dc *dc,
+		struct dc_state *state,
+		struct dc_stream_state *phantom_stream);
+void dc_state_release_phantom_plane(const struct dc *dc,
+		struct dc_state *state,
+		struct dc_plane_state *phantom_plane);
+
+/* add/remove phantom stream to context and generate subvp meta data */
+enum dc_status dc_state_add_phantom_stream(struct dc *dc,
+		struct dc_state *state,
+		struct dc_stream_state *phantom_stream,
+		struct dc_stream_state *main_stream);
+enum dc_status dc_state_remove_phantom_stream(struct dc *dc,
+		struct dc_state *state,
+		struct dc_stream_state *phantom_stream);
+
+bool dc_state_add_phantom_plane(
+		const struct dc *dc,
+		struct dc_stream_state *phantom_stream,
+		struct dc_plane_state *phantom_plane,
+		struct dc_state *state);
+
+bool dc_state_remove_phantom_plane(
+		const struct dc *dc,
+		struct dc_stream_state *phantom_stream,
+		struct dc_plane_state *phantom_plane,
+		struct dc_state *state);
+
+bool dc_state_rem_all_phantom_planes_for_stream(
+		const struct dc *dc,
+		struct dc_stream_state *phantom_stream,
+		struct dc_state *state,
+		bool should_release_planes);
+
+bool dc_state_add_all_phantom_planes_for_stream(
+		const struct dc *dc,
+		struct dc_stream_state *phantom_stream,
+		struct dc_plane_state * const *phantom_planes,
+		int plane_count,
+		struct dc_state *state);
+
+bool dc_state_remove_phantom_streams_and_planes(
+		struct dc *dc,
+		struct dc_state *state);
+
+void dc_state_release_phantom_streams_and_planes(
+		struct dc *dc,
+		struct dc_state *state);
+
+#endif /* _DC_STATE_PRIV_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h
index e61eea6db29c..ee10941caa59 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_stream.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h
@@ -38,6 +38,14 @@ struct timing_sync_info {
 	bool master;
 };
 
+struct mall_stream_config {
+	/* MALL stream config to indicate if the stream is phantom or not.
+	 * We will use a phantom stream to indicate that the pipe is phantom.
+	 */
+	enum mall_stream_type type;
+	struct dc_stream_state *paired_stream;	// master / slave stream
+};
+
 struct dc_stream_status {
 	int primary_otg_inst;
 	int stream_enc_inst;
@@ -50,6 +58,7 @@ struct dc_stream_status {
 	struct timing_sync_info timing_sync_info;
 	struct dc_plane_state *plane_states[MAX_SURFACE_NUM];
 	bool is_abm_supported;
+	struct mall_stream_config mall_stream_config;
 };
 
 enum hubp_dmdata_mode {
@@ -147,31 +156,6 @@ struct test_pattern {
 
 #define SUBVP_DRR_MARGIN_US 100 // 100us for DRR margin (SubVP + DRR)
 
-enum mall_stream_type {
-	SUBVP_NONE, // subvp not in use
-	SUBVP_MAIN, // subvp in use, this stream is main stream
-	SUBVP_PHANTOM, // subvp in use, this stream is a phantom stream
-};
-
-struct mall_stream_config {
-	/* MALL stream config to indicate if the stream is phantom or not.
-	 * We will use a phantom stream to indicate that the pipe is phantom.
-	 */
-	enum mall_stream_type type;
-	struct dc_stream_state *paired_stream;	// master / slave stream
-};
-
-/* Temp struct used to save and restore MALL config
- * during validation.
- *
- * TODO: Move MALL config into dc_state instead of stream struct
- * to avoid needing to save/restore.
- */
-struct mall_temp_config {
-	struct mall_stream_config mall_stream_config[MAX_PIPES];
-	bool is_phantom_plane[MAX_PIPES];
-};
-
 struct dc_stream_debug_options {
 	char force_odm_combine_segments;
 };
@@ -301,7 +285,7 @@ struct dc_stream_state {
 	bool has_non_synchronizable_pclk;
 	bool vblank_synchronized;
 	bool fpo_in_use;
-	struct mall_stream_config mall_stream_config;
+	bool is_phantom;
 };
 
 #define ABM_LEVEL_IMMEDIATE_DISABLE 255
@@ -415,45 +399,14 @@ bool dc_stream_get_scanoutpos(const struct dc_stream_state *stream,
 				  uint32_t *h_position,
 				  uint32_t *v_position);
 
-enum dc_status dc_add_stream_to_ctx(
-			struct dc *dc,
-		struct dc_state *new_ctx,
-		struct dc_stream_state *stream);
-
-enum dc_status dc_remove_stream_from_ctx(
-		struct dc *dc,
-			struct dc_state *new_ctx,
-			struct dc_stream_state *stream);
-
-
-bool dc_add_plane_to_context(
-		const struct dc *dc,
-		struct dc_stream_state *stream,
-		struct dc_plane_state *plane_state,
-		struct dc_state *context);
-
-bool dc_remove_plane_from_context(
-		const struct dc *dc,
-		struct dc_stream_state *stream,
-		struct dc_plane_state *plane_state,
-		struct dc_state *context);
-
-bool dc_rem_all_planes_for_stream(
-		const struct dc *dc,
-		struct dc_stream_state *stream,
-		struct dc_state *context);
-
-bool dc_add_all_planes_for_stream(
-		const struct dc *dc,
-		struct dc_stream_state *stream,
-		struct dc_plane_state * const *plane_states,
-		int plane_count,
-		struct dc_state *context);
-
 bool dc_stream_add_writeback(struct dc *dc,
 		struct dc_stream_state *stream,
 		struct dc_writeback_info *wb_info);
 
+bool dc_stream_fc_disable_writeback(struct dc *dc,
+		struct dc_stream_state *stream,
+		uint32_t dwb_pipe_inst);
+
 bool dc_stream_remove_writeback(struct dc *dc,
 		struct dc_stream_state *stream,
 		uint32_t dwb_pipe_inst);
@@ -514,9 +467,6 @@ void update_stream_signal(struct dc_stream_state *stream, struct dc_sink *sink);
 void dc_stream_retain(struct dc_stream_state *dc_stream);
 void dc_stream_release(struct dc_stream_state *dc_stream);
 
-struct dc_stream_status *dc_stream_get_status_from_state(
-	struct dc_state *state,
-	struct dc_stream_state *stream);
 struct dc_stream_status *dc_stream_get_status(
 	struct dc_stream_state *dc_stream);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream_priv.h b/drivers/gpu/drm/amd/display/dc/dc_stream_priv.h
new file mode 100644
index 000000000000..7476fd52ce2b
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream_priv.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DC_STREAM_PRIV_H_
+#define _DC_STREAM_PRIV_H_
+
+#include "dc_stream.h"
+
+bool dc_stream_construct(struct dc_stream_state *stream,
+	struct dc_sink *dc_sink_data);
+void dc_stream_destruct(struct dc_stream_state *stream);
+
+void dc_stream_assign_stream_id(struct dc_stream_state *stream);
+
+#endif // _DC_STREAM_PRIV_H_
diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h
index fcb825e4f1bb..9900dda2eef5 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
@@ -991,10 +991,6 @@ struct link_mst_stream_allocation_table {
 	struct link_mst_stream_allocation stream_allocations[MAX_CONTROLLER_NUM];
 };
 
-struct backlight_settings {
-	uint32_t backlight_millinits;
-};
-
 /* PSR feature flags */
 struct psr_settings {
 	bool psr_feature_enabled;		// PSR is supported by sink
@@ -1022,6 +1018,25 @@ enum replay_coasting_vtotal_type {
 	PR_COASTING_TYPE_NUM,
 };
 
+enum replay_link_off_frame_count_level {
+	PR_LINK_OFF_FRAME_COUNT_FAIL = 0x0,
+	PR_LINK_OFF_FRAME_COUNT_GOOD = 0x2,
+	PR_LINK_OFF_FRAME_COUNT_BEST = 0x6,
+};
+
+/*
+ * This is general Interface for Replay to
+ * set an 32 bit variable to dmub
+ * The Message_type indicates which variable
+ * passed to DMUB.
+ */
+enum replay_FW_Message_type {
+	Replay_Msg_Not_Support = -1,
+	Replay_Set_Timing_Sync_Supported,
+	Replay_Set_Residency_Frameupdate_Timer,
+	Replay_Set_Pseudo_VTotal,
+};
+
 union replay_error_status {
 	struct {
 		unsigned char STATE_TRANSITION_ERROR    :1;
@@ -1033,26 +1048,52 @@ union replay_error_status {
 };
 
 struct replay_config {
-	bool replay_supported;                          // Replay feature is supported
-	unsigned int replay_power_opt_supported;        // Power opt flags that are supported
-	bool replay_smu_opt_supported;                  // SMU optimization is supported
-	unsigned int replay_enable_option;              // Replay enablement option
-	uint32_t debug_flags;                           // Replay debug flags
-	bool replay_timing_sync_supported; // Replay desync is supported
-	bool force_disable_desync_error_check;             // Replay desync is supported
-	bool received_desync_error_hpd; //Replay Received Desync Error HPD.
-	union replay_error_status replay_error_status; // Replay error status
-};
-
-/* Replay feature flags */
+	/* Replay feature is supported */
+	bool replay_supported;
+	/* Power opt flags that are supported */
+	unsigned int replay_power_opt_supported;
+	/* SMU optimization is supported */
+	bool replay_smu_opt_supported;
+	/* Replay enablement option */
+	unsigned int replay_enable_option;
+	/* Replay debug flags */
+	uint32_t debug_flags;
+	/* Replay sync is supported */
+	bool replay_timing_sync_supported;
+	/* Replay Disable desync error check. */
+	bool force_disable_desync_error_check;
+	/* Replay Received Desync Error HPD. */
+	bool received_desync_error_hpd;
+	/* Replay feature is supported long vblank */
+	bool replay_support_fast_resync_in_ultra_sleep_mode;
+	/* Replay error status */
+	union replay_error_status replay_error_status;
+};
+
+/* Replay feature flags*/
 struct replay_settings {
-	struct replay_config config;            // Replay configuration
-	bool replay_feature_enabled;            // Replay feature is ready for activating
-	bool replay_allow_active;               // Replay is currently active
-	unsigned int replay_power_opt_active;   // Power opt flags that are activated currently
-	bool replay_smu_opt_enable;             // SMU optimization is enabled
-	uint16_t coasting_vtotal;               // Current Coasting vtotal
-	uint16_t coasting_vtotal_table[PR_COASTING_TYPE_NUM]; // Coasting vtotal table
+	/* Replay configuration */
+	struct replay_config config;
+	/* Replay feature is ready for activating */
+	bool replay_feature_enabled;
+	/* Replay is currently active */
+	bool replay_allow_active;
+	/* Replay is currently active */
+	bool replay_allow_long_vblank;
+	/* Power opt flags that are activated currently */
+	unsigned int replay_power_opt_active;
+	/* SMU optimization is enabled */
+	bool replay_smu_opt_enable;
+	/* Current Coasting vtotal */
+	uint16_t coasting_vtotal;
+	/* Coasting vtotal table */
+	uint16_t coasting_vtotal_table[PR_COASTING_TYPE_NUM];
+	/* Maximum link off frame count */
+	enum replay_link_off_frame_count_level link_off_frame_count_level;
+	/* Replay pseudo vtotal for abm + ips on full screen video which can improve ips residency */
+	uint16_t abm_with_ips_on_full_screen_video_pseudo_vtotal;
+	/* Replay last pseudo vtotal set to DMUB */
+	uint16_t last_pseudo_vtotal;
 };
 
 /* To split out "global" and "per-panel" config settings.
@@ -1104,25 +1145,34 @@ struct dc_panel_config {
 	} ilr;
 };
 
+#define MAX_SINKS_PER_LINK 4
+
 /*
  *  USB4 DPIA BW ALLOCATION STRUCTS
  */
 struct dc_dpia_bw_alloc {
-	int sink_verified_bw;  // The Verified BW that sink can allocated and use that has been verified already
-	int sink_allocated_bw; // The Actual Allocated BW that sink currently allocated
-	int sink_max_bw;       // The Max BW that sink can require/support
+	int remote_sink_req_bw[MAX_SINKS_PER_LINK]; // BW requested by remote sinks
+	int link_verified_bw;  // The Verified BW that link can allocated and use that has been verified already
+	int link_max_bw;       // The Max BW that link can require/support
+	int allocated_bw;      // The Actual Allocated BW for this DPIA
 	int estimated_bw;      // The estimated available BW for this DPIA
 	int bw_granularity;    // BW Granularity
+	int dp_overhead;       // DP overhead in dp tunneling
 	bool bw_alloc_enabled; // The BW Alloc Mode Support is turned ON for all 3:  DP-Tx & Dpia & CM
 	bool response_ready;   // Response ready from the CM side
+	uint8_t nrd_max_lane_count; // Non-reduced max lane count
+	uint8_t nrd_max_link_rate; // Non-reduced max link rate
 };
 
-#define MAX_SINKS_PER_LINK 4
-
 enum dc_hpd_enable_select {
 	HPD_EN_FOR_ALL_EDP = 0,
 	HPD_EN_FOR_PRIMARY_EDP_ONLY,
 	HPD_EN_FOR_SECONDARY_EDP_ONLY,
 };
 
+enum mall_stream_type {
+	SUBVP_NONE, // subvp not in use
+	SUBVP_MAIN, // subvp in use, this stream is main stream
+	SUBVP_PHANTOM, // subvp in use, this stream is a phantom stream
+};
 #endif /* DC_TYPES_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
index 874b132fe1d7..a6006776333d 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
@@ -135,7 +135,7 @@ static void dmcu_set_backlight_level(
 			0, 1, 80000);
 }
 
-static void dce_abm_init(struct abm *abm, uint32_t backlight)
+static void dce_abm_init(struct abm *abm, uint32_t backlight, uint32_t user_level)
 {
 	struct dce_abm *abm_dce = TO_DCE_ABM(abm);
 
@@ -162,7 +162,7 @@ static void dce_abm_init(struct abm *abm, uint32_t backlight)
 			BL1_PWM_TARGET_ABM_LEVEL, backlight);
 
 	REG_UPDATE(BL1_PWM_USER_LEVEL,
-			BL1_PWM_USER_LEVEL, backlight);
+			BL1_PWM_USER_LEVEL, user_level);
 
 	REG_UPDATE_2(DC_ABM1_LS_MIN_MAX_PIXEL_VALUE_THRES,
 			ABM1_LS_MIN_PIXEL_VALUE_THRES, 0,
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c
index 140598f18bbd..f0458b8f00af 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c
@@ -782,7 +782,7 @@ static void get_azalia_clock_info_dp(
 	/*audio_dto_module = dpDtoSourceClockInkhz * 10,000;
 	 *  [khz] ->[100Hz] */
 	azalia_clock_info->audio_dto_module =
-		pll_info->dp_dto_source_clock_in_khz * 10;
+		pll_info->audio_dto_source_clock_in_khz * 10;
 }
 
 void dce_aud_wall_dto_setup(
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
index 5d3f6fa1011e..970644b695cd 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
@@ -975,6 +975,9 @@ static bool dcn31_program_pix_clk(
 			look_up_in_video_optimized_rate_tlb(pix_clk_params->requested_pix_clk_100hz / 10);
 	struct bp_pixel_clock_parameters bp_pc_params = {0};
 	enum transmitter_color_depth bp_pc_colour_depth = TRANSMITTER_COLOR_DEPTH_24;
+
+	if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0)
+		dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz;
 	// For these signal types Driver to program DP_DTO without calling VBIOS Command table
 	if (dc_is_dp_signal(pix_clk_params->signal_type) || dc_is_virtual_signal(pix_clk_params->signal_type)) {
 		if (e) {
@@ -1088,6 +1091,10 @@ static bool get_pixel_clk_frequency_100hz(
 	struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(clock_source);
 	unsigned int clock_hz = 0;
 	unsigned int modulo_hz = 0;
+	unsigned int dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dprefclk_khz;
+
+	if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0)
+		dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz;
 
 	if (clock_source->id == CLOCK_SOURCE_ID_DP_DTO) {
 		clock_hz = REG_READ(PHASE[inst]);
@@ -1100,7 +1107,7 @@ static bool get_pixel_clk_frequency_100hz(
 			modulo_hz = REG_READ(MODULO[inst]);
 			if (modulo_hz)
 				*pixel_clk_khz = div_u64((uint64_t)clock_hz*
-					clock_source->ctx->dc->clk_mgr->dprefclk_khz*10,
+					dp_dto_ref_khz*10,
 					modulo_hz);
 			else
 				*pixel_clk_khz = 0;
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c
index e8570060d007..5bca67407c5b 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c
@@ -290,4 +290,5 @@ void dce_panel_cntl_construct(
 	dce_panel_cntl->base.funcs = &dce_link_panel_cntl_funcs;
 	dce_panel_cntl->base.ctx = init_data->ctx;
 	dce_panel_cntl->base.inst = init_data->inst;
+	dce_panel_cntl->base.pwrseq_inst = 0;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c
index d3e6544022b7..ccc154b0281c 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c
@@ -57,18 +57,22 @@ static unsigned int abm_feature_support(struct abm *abm, unsigned int panel_inst
 	return ret;
 }
 
-static void dmub_abm_init_ex(struct abm *abm, uint32_t backlight)
+static void dmub_abm_init_ex(struct abm *abm, uint32_t backlight, uint32_t user_level)
 {
-	dmub_abm_init(abm, backlight);
+	dmub_abm_init(abm, backlight, user_level);
 }
 
 static unsigned int dmub_abm_get_current_backlight_ex(struct abm *abm)
 {
+	dc_allow_idle_optimizations(abm->ctx->dc, false);
+
 	return dmub_abm_get_current_backlight(abm);
 }
 
 static unsigned int dmub_abm_get_target_backlight_ex(struct abm *abm)
 {
+	dc_allow_idle_optimizations(abm->ctx->dc, false);
+
 	return dmub_abm_get_target_backlight(abm);
 }
 
@@ -145,7 +149,11 @@ static bool dmub_abm_save_restore_ex(
 	return ret;
 }
 
-static bool dmub_abm_set_pipe_ex(struct abm *abm, uint32_t otg_inst, uint32_t option, uint32_t panel_inst)
+static bool dmub_abm_set_pipe_ex(struct abm *abm,
+		uint32_t otg_inst,
+		uint32_t option,
+		uint32_t panel_inst,
+		uint32_t pwrseq_inst)
 {
 	bool ret = false;
 	unsigned int feature_support;
@@ -153,7 +161,7 @@ static bool dmub_abm_set_pipe_ex(struct abm *abm, uint32_t otg_inst, uint32_t op
 	feature_support = abm_feature_support(abm, panel_inst);
 
 	if (feature_support == ABM_LCD_SUPPORT)
-		ret = dmub_abm_set_pipe(abm, otg_inst, option, panel_inst);
+		ret = dmub_abm_set_pipe(abm, otg_inst, option, panel_inst, pwrseq_inst);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c
index 592a8f7a1c6d..f9d6a181164a 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c
@@ -76,10 +76,10 @@ static void dmub_abm_enable_fractional_pwm(struct dc_context *dc)
 	cmd.abm_set_pwm_frac.abm_set_pwm_frac_data.panel_mask = panel_mask;
 	cmd.abm_set_pwm_frac.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_pwm_frac_data);
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
-void dmub_abm_init(struct abm *abm, uint32_t backlight)
+void dmub_abm_init(struct abm *abm, uint32_t backlight, uint32_t user_level)
 {
 	struct dce_abm *dce_abm = TO_DMUB_ABM(abm);
 
@@ -106,7 +106,7 @@ void dmub_abm_init(struct abm *abm, uint32_t backlight)
 			BL1_PWM_TARGET_ABM_LEVEL, backlight);
 
 	REG_UPDATE(BL1_PWM_USER_LEVEL,
-			BL1_PWM_USER_LEVEL, backlight);
+			BL1_PWM_USER_LEVEL, user_level);
 
 	REG_UPDATE_2(DC_ABM1_LS_MIN_MAX_PIXEL_VALUE_THRES,
 			ABM1_LS_MIN_PIXEL_VALUE_THRES, 0,
@@ -155,7 +155,7 @@ bool dmub_abm_set_level(struct abm *abm, uint32_t level, uint8_t panel_mask)
 	cmd.abm_set_level.abm_set_level_data.panel_mask = panel_mask;
 	cmd.abm_set_level.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_level_data);
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	return true;
 }
@@ -186,7 +186,7 @@ void dmub_abm_init_config(struct abm *abm,
 
 	cmd.abm_init_config.header.payload_bytes = sizeof(struct dmub_cmd_abm_init_config_data);
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 }
 
@@ -203,7 +203,7 @@ bool dmub_abm_set_pause(struct abm *abm, bool pause, unsigned int panel_inst, un
 	cmd.abm_pause.abm_pause_data.panel_mask = panel_mask;
 	cmd.abm_set_level.header.payload_bytes = sizeof(struct dmub_cmd_abm_pause_data);
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	return true;
 }
@@ -246,7 +246,7 @@ bool dmub_abm_save_restore(
 
 	cmd.abm_save_restore.header.payload_bytes = sizeof(struct dmub_rb_cmd_abm_save_restore);
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	// Copy iramtable data into local structure
 	memcpy((void *)pData, dc->dmub_srv->dmub->scratch_mem_fb.cpu_addr, bytes);
@@ -254,7 +254,11 @@ bool dmub_abm_save_restore(
 	return true;
 }
 
-bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t option, uint32_t panel_inst)
+bool dmub_abm_set_pipe(struct abm *abm,
+		uint32_t otg_inst,
+		uint32_t option,
+		uint32_t panel_inst,
+		uint32_t pwrseq_inst)
 {
 	union dmub_rb_cmd cmd;
 	struct dc_context *dc = abm->ctx;
@@ -264,12 +268,13 @@ bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t option, uint
 	cmd.abm_set_pipe.header.type = DMUB_CMD__ABM;
 	cmd.abm_set_pipe.header.sub_type = DMUB_CMD__ABM_SET_PIPE;
 	cmd.abm_set_pipe.abm_set_pipe_data.otg_inst = otg_inst;
+	cmd.abm_set_pipe.abm_set_pipe_data.pwrseq_inst = pwrseq_inst;
 	cmd.abm_set_pipe.abm_set_pipe_data.set_pipe_option = option;
 	cmd.abm_set_pipe.abm_set_pipe_data.panel_inst = panel_inst;
 	cmd.abm_set_pipe.abm_set_pipe_data.ramping_boundary = ramping_boundary;
 	cmd.abm_set_pipe.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_pipe_data);
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	return true;
 }
@@ -291,7 +296,7 @@ bool dmub_abm_set_backlight_level(struct abm *abm,
 	cmd.abm_set_backlight.abm_set_backlight_data.panel_mask = (0x01 << panel_inst);
 	cmd.abm_set_backlight.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_backlight_data);
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	return true;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h
index 853564d7f471..761685e5b8c9 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h
@@ -30,7 +30,7 @@
 
 struct abm_save_restore;
 
-void dmub_abm_init(struct abm *abm, uint32_t backlight);
+void dmub_abm_init(struct abm *abm, uint32_t backlight, uint32_t user_level);
 bool dmub_abm_set_level(struct abm *abm, uint32_t level, uint8_t panel_mask);
 unsigned int dmub_abm_get_current_backlight(struct abm *abm);
 unsigned int dmub_abm_get_target_backlight(struct abm *abm);
@@ -44,7 +44,7 @@ bool dmub_abm_save_restore(
 		struct dc_context *dc,
 		unsigned int panel_inst,
 		struct abm_save_restore *pData);
-bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t option, uint32_t panel_inst);
+bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t option, uint32_t panel_inst, uint32_t pwrseq_inst);
 bool dmub_abm_set_backlight_level(struct abm *abm,
 		unsigned int backlight_pwm_u16_16,
 		unsigned int frame_ramp,
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c
index 2aa0e01a6891..ba1fec3016d5 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c
@@ -47,7 +47,7 @@ void dmub_hw_lock_mgr_cmd(struct dc_dmub_srv *dmub_srv,
 	if (!lock)
 		cmd.lock_hw.lock_hw_data.should_release = 1;
 
-	dm_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 void dmub_hw_lock_mgr_inbox0_cmd(struct dc_dmub_srv *dmub_srv,
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.c
index d8009b2dc56a..98a778996e1a 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.c
@@ -48,5 +48,5 @@ void dmub_enable_outbox_notification(struct dc_dmub_srv *dmub_srv)
 		sizeof(cmd.outbox1_enable.header);
 	cmd.outbox1_enable.enable = true;
 
-	dm_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c
index 9d4170a356a2..3e243e407bb8 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c
@@ -105,23 +105,18 @@ static enum dc_psr_state convert_psr_state(uint32_t raw_state)
  */
 static void dmub_psr_get_state(struct dmub_psr *dmub, enum dc_psr_state *state, uint8_t panel_inst)
 {
-	struct dmub_srv *srv = dmub->ctx->dmub_srv->dmub;
 	uint32_t raw_state = 0;
 	uint32_t retry_count = 0;
-	enum dmub_status status;
 
 	do {
 		// Send gpint command and wait for ack
-		status = dmub_srv_send_gpint_command(srv, DMUB_GPINT__GET_PSR_STATE, panel_inst, 30);
-
-		if (status == DMUB_STATUS_OK) {
-			// GPINT was executed, get response
-			dmub_srv_get_gpint_response(srv, &raw_state);
+		if (dc_wake_and_execute_gpint(dmub->ctx, DMUB_GPINT__GET_PSR_STATE, panel_inst, &raw_state,
+					      DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) {
 			*state = convert_psr_state(raw_state);
-		} else
+		} else {
 			// Return invalid state when GPINT times out
 			*state = PSR_STATE_INVALID;
-
+		}
 	} while (++retry_count <= 1000 && *state == PSR_STATE_INVALID);
 
 	// Assert if max retry hit
@@ -171,7 +166,7 @@ static bool dmub_psr_set_version(struct dmub_psr *dmub, struct dc_stream_state *
 	cmd.psr_set_version.psr_set_version_data.panel_inst = panel_inst;
 	cmd.psr_set_version.header.payload_bytes = sizeof(struct dmub_cmd_psr_set_version_data);
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	return true;
 }
@@ -199,7 +194,7 @@ static void dmub_psr_enable(struct dmub_psr *dmub, bool enable, bool wait, uint8
 
 	cmd.psr_enable.header.payload_bytes = 0; // Send header only
 
-	dm_execute_dmub_cmd(dc->dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	/* Below loops 1000 x 500us = 500 ms.
 	 *  Exit PSR may need to wait 1-2 frames to power up. Timeout after at
@@ -248,7 +243,7 @@ static void dmub_psr_set_level(struct dmub_psr *dmub, uint16_t psr_level, uint8_
 	cmd.psr_set_level.psr_set_level_data.psr_level = psr_level;
 	cmd.psr_set_level.psr_set_level_data.cmd_version = DMUB_CMD_PSR_CONTROL_VERSION_1;
 	cmd.psr_set_level.psr_set_level_data.panel_inst = panel_inst;
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 /*
@@ -267,7 +262,7 @@ static void dmub_psr_set_sink_vtotal_in_psr_active(struct dmub_psr *dmub,
 	cmd.psr_set_vtotal.psr_set_vtotal_data.psr_vtotal_idle = psr_vtotal_idle;
 	cmd.psr_set_vtotal.psr_set_vtotal_data.psr_vtotal_su = psr_vtotal_su;
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 /*
@@ -286,7 +281,7 @@ static void dmub_psr_set_power_opt(struct dmub_psr *dmub, unsigned int power_opt
 	cmd.psr_set_power_opt.psr_set_power_opt_data.power_opt = power_opt;
 	cmd.psr_set_power_opt.psr_set_power_opt_data.panel_inst = panel_inst;
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 /*
@@ -423,7 +418,7 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub,
 		copy_settings_data->relock_delay_frame_cnt = 2;
 	copy_settings_data->dsc_slice_height = psr_context->dsc_slice_height;
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	return true;
 }
@@ -444,7 +439,7 @@ static void dmub_psr_force_static(struct dmub_psr *dmub, uint8_t panel_inst)
 	cmd.psr_force_static.header.sub_type = DMUB_CMD__PSR_FORCE_STATIC;
 	cmd.psr_enable.header.payload_bytes = 0;
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 /*
@@ -452,13 +447,11 @@ static void dmub_psr_force_static(struct dmub_psr *dmub, uint8_t panel_inst)
  */
 static void dmub_psr_get_residency(struct dmub_psr *dmub, uint32_t *residency, uint8_t panel_inst)
 {
-	struct dmub_srv *srv = dmub->ctx->dmub_srv->dmub;
 	uint16_t param = (uint16_t)(panel_inst << 8);
 
 	/* Send gpint command and wait for ack */
-	dmub_srv_send_gpint_command(srv, DMUB_GPINT__PSR_RESIDENCY, param, 30);
-
-	dmub_srv_get_gpint_response(srv, residency);
+	dc_wake_and_execute_gpint(dmub->ctx, DMUB_GPINT__PSR_RESIDENCY, param, residency,
+				  DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY);
 }
 
 static const struct dmub_psr_funcs psr_funcs = {
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c
index 28149e53c2a6..38e4797e9476 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c
@@ -258,13 +258,97 @@ static void dmub_replay_residency(struct dmub_replay *dmub, uint8_t panel_inst,
 		*residency = 0;
 }
 
+/**
+ * Set REPLAY power optimization flags and coasting vtotal.
+ */
+static void dmub_replay_set_power_opt_and_coasting_vtotal(struct dmub_replay *dmub,
+		unsigned int power_opt, uint8_t panel_inst, uint16_t coasting_vtotal)
+{
+	union dmub_rb_cmd cmd;
+	struct dc_context *dc = dmub->ctx;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.replay_set_power_opt_and_coasting_vtotal.header.type = DMUB_CMD__REPLAY;
+	cmd.replay_set_power_opt_and_coasting_vtotal.header.sub_type =
+		DMUB_CMD__REPLAY_SET_POWER_OPT_AND_COASTING_VTOTAL;
+	cmd.replay_set_power_opt_and_coasting_vtotal.header.payload_bytes =
+		sizeof(struct dmub_rb_cmd_replay_set_power_opt_and_coasting_vtotal);
+	cmd.replay_set_power_opt_and_coasting_vtotal.replay_set_power_opt_data.power_opt = power_opt;
+	cmd.replay_set_power_opt_and_coasting_vtotal.replay_set_power_opt_data.panel_inst = panel_inst;
+	cmd.replay_set_power_opt_and_coasting_vtotal.replay_set_coasting_vtotal_data.coasting_vtotal = coasting_vtotal;
+
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
+
+/**
+ * send Replay general cmd to DMUB.
+ */
+static void dmub_replay_send_cmd(struct dmub_replay *dmub,
+		enum replay_FW_Message_type msg, union dmub_replay_cmd_set *cmd_element)
+{
+	union dmub_rb_cmd cmd;
+	struct dc_context *ctx = NULL;
+
+	if (dmub == NULL || cmd_element == NULL)
+		return;
+
+	ctx = dmub->ctx;
+	if (ctx != NULL) {
+
+		if (msg != Replay_Msg_Not_Support) {
+			memset(&cmd, 0, sizeof(cmd));
+			//Header
+			cmd.replay_set_timing_sync.header.type = DMUB_CMD__REPLAY;
+		} else
+			return;
+	} else
+		return;
+
+	switch (msg) {
+	case Replay_Set_Timing_Sync_Supported:
+		//Header
+		cmd.replay_set_timing_sync.header.sub_type =
+			DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED;
+		cmd.replay_set_timing_sync.header.payload_bytes =
+			sizeof(struct dmub_rb_cmd_replay_set_timing_sync);
+		//Cmd Body
+		cmd.replay_set_timing_sync.replay_set_timing_sync_data.panel_inst =
+						cmd_element->sync_data.panel_inst;
+		cmd.replay_set_timing_sync.replay_set_timing_sync_data.timing_sync_supported =
+						cmd_element->sync_data.timing_sync_supported;
+		break;
+	case Replay_Set_Residency_Frameupdate_Timer:
+		//Header
+		cmd.replay_set_frameupdate_timer.header.sub_type =
+			DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER;
+		cmd.replay_set_frameupdate_timer.header.payload_bytes =
+			sizeof(struct dmub_rb_cmd_replay_set_frameupdate_timer);
+		//Cmd Body
+		cmd.replay_set_frameupdate_timer.data.panel_inst =
+						cmd_element->panel_inst;
+		cmd.replay_set_frameupdate_timer.data.enable =
+						cmd_element->timer_data.enable;
+		cmd.replay_set_frameupdate_timer.data.frameupdate_count =
+						cmd_element->timer_data.frameupdate_count;
+		break;
+	case Replay_Msg_Not_Support:
+	default:
+		return;
+		break;
+	}
+
+	dc_wake_and_execute_dmub_cmd(ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
+
 static const struct dmub_replay_funcs replay_funcs = {
-	.replay_copy_settings		= dmub_replay_copy_settings,
-	.replay_enable			= dmub_replay_enable,
-	.replay_get_state		= dmub_replay_get_state,
-	.replay_set_power_opt		= dmub_replay_set_power_opt,
-	.replay_set_coasting_vtotal	= dmub_replay_set_coasting_vtotal,
-	.replay_residency		= dmub_replay_residency,
+	.replay_copy_settings				= dmub_replay_copy_settings,
+	.replay_enable					= dmub_replay_enable,
+	.replay_get_state				= dmub_replay_get_state,
+	.replay_set_power_opt				= dmub_replay_set_power_opt,
+	.replay_set_coasting_vtotal			= dmub_replay_set_coasting_vtotal,
+	.replay_residency				= dmub_replay_residency,
+	.replay_set_power_opt_and_coasting_vtotal	= dmub_replay_set_power_opt_and_coasting_vtotal,
+	.replay_send_cmd				= dmub_replay_send_cmd,
 };
 
 /*
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h
index e8385bbf51fc..3613aff994d7 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h
@@ -45,10 +45,14 @@ struct dmub_replay_funcs {
 		struct replay_context *replay_context, uint8_t panel_inst);
 	void (*replay_set_power_opt)(struct dmub_replay *dmub, unsigned int power_opt,
 		uint8_t panel_inst);
+	void (*replay_send_cmd)(struct dmub_replay *dmub,
+		enum replay_FW_Message_type msg, union dmub_replay_cmd_set *cmd_element);
 	void (*replay_set_coasting_vtotal)(struct dmub_replay *dmub, uint16_t coasting_vtotal,
 		uint8_t panel_inst);
 	void (*replay_residency)(struct dmub_replay *dmub,
 		uint8_t panel_inst, uint32_t *residency, const bool is_start, const bool is_alpm);
+	void (*replay_set_power_opt_and_coasting_vtotal)(struct dmub_replay *dmub,
+		unsigned int power_opt, uint8_t panel_inst, uint16_t coasting_vtotal);
 };
 
 struct dmub_replay *dmub_replay_create(struct dc_context *ctx);
diff --git a/drivers/gpu/drm/amd/display/dc/dce100/Makefile b/drivers/gpu/drm/amd/display/dc/dce100/Makefile
deleted file mode 100644
index 0d2f6bbf7558..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dce100/Makefile
+++ /dev/null
@@ -1,46 +0,0 @@
-#
-# Copyright 2017 Advanced Micro Devices, Inc.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
-# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-#
-#
-# Makefile for the 'controller' sub-component of DAL.
-# It provides the control and status of HW CRTC block.
-
-CFLAGS_$(AMDDALPATH)/dc/dce100/dce100_resource.o = $(call cc-disable-warning, override-init)
-
-DCE100 = dce100_resource.o
-
-AMD_DAL_DCE100 = $(addprefix $(AMDDALPATH)/dc/dce100/,$(DCE100))
-
-AMD_DISPLAY_FILES += $(AMD_DAL_DCE100)
-
-
-###############################################################################
-# DCE 10x
-###############################################################################
-ifdef 0#CONFIG_DRM_AMD_DC_DCE11_0
-TG_DCE100 = dce100_resource.o
-
-AMD_DAL_TG_DCE100 = $(addprefix \
-	$(AMDDALPATH)/dc/dce100/,$(TG_DCE100))
-
-AMD_DISPLAY_FILES += $(AMD_DAL_TG_DCE100)
-endif
-
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/Makefile b/drivers/gpu/drm/amd/display/dc/dce110/Makefile
index 695a50ed5ad2..f0777d61c2cb 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce110/Makefile
@@ -26,8 +26,8 @@
 CFLAGS_$(AMDDALPATH)/dc/dce110/dce110_resource.o = $(call cc-disable-warning, override-init)
 
 DCE110 = dce110_timing_generator.o \
-dce110_compressor.o dce110_resource.o \
-dce110_opp_regamma_v.o dce110_opp_csc_v.o dce110_timing_generator_v.o \
+dce110_compressor.o dce110_opp_regamma_v.o \
+dce110_opp_csc_v.o dce110_timing_generator_v.o \
 dce110_mem_input_v.o dce110_opp_v.o dce110_transform_v.o
 
 AMD_DAL_DCE110 = $(addprefix $(AMDDALPATH)/dc/dce110/,$(DCE110))
diff --git a/drivers/gpu/drm/amd/display/dc/dce112/Makefile b/drivers/gpu/drm/amd/display/dc/dce112/Makefile
index e846ef58cab3..7e92effec894 100644
--- a/drivers/gpu/drm/amd/display/dc/dce112/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce112/Makefile
@@ -25,8 +25,7 @@
 
 CFLAGS_$(AMDDALPATH)/dc/dce112/dce112_resource.o = $(call cc-disable-warning, override-init)
 
-DCE112 = dce112_compressor.o \
-dce112_resource.o
+DCE112 = dce112_compressor.o
 
 AMD_DAL_DCE112 = $(addprefix $(AMDDALPATH)/dc/dce112/,$(DCE112))
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/Makefile b/drivers/gpu/drm/amd/display/dc/dce120/Makefile
index 097cf407a15d..1e3ef68a452a 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce120/Makefile
@@ -26,7 +26,7 @@
 
 CFLAGS_$(AMDDALPATH)/dc/dce120/dce120_resource.o = $(call cc-disable-warning, override-init)
 
-DCE120 = dce120_resource.o dce120_timing_generator.o \
+DCE120 = dce120_timing_generator.o
 
 AMD_DAL_DCE120 = $(addprefix $(AMDDALPATH)/dc/dce120/,$(DCE120))
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/Makefile b/drivers/gpu/drm/amd/display/dc/dce80/Makefile
index 93dd68c31275..7eefffbdc925 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce80/Makefile
@@ -25,8 +25,7 @@
 
 CFLAGS_$(AMDDALPATH)/dc/dce80/dce80_resource.o = $(call cc-disable-warning, override-init)
 
-DCE80 = dce80_timing_generator.o \
-	dce80_resource.o
+DCE80 = dce80_timing_generator.o
 
 AMD_DAL_DCE80 = $(addprefix $(AMDDALPATH)/dc/dce80/,$(DCE80))
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
index 2d2007c3e2b6..ae6a131be71b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
@@ -22,9 +22,9 @@
 #
 # Makefile for DCN.
 
-DCN10 = dcn10_init.o dcn10_resource.o dcn10_ipp.o \
+DCN10 = dcn10_ipp.o \
 		dcn10_hw_sequencer_debug.o \
-		dcn10_dpp.o dcn10_opp.o dcn10_optc.o \
+		dcn10_dpp.o dcn10_opp.o \
 		dcn10_hubp.o dcn10_mpc.o \
 		dcn10_dpp_dscl.o dcn10_dpp_cm.o dcn10_cm_common.o \
 		dcn10_hubbub.o dcn10_stream_encoder.o dcn10_link_encoder.o
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c
index 92fdab731f4a..9033b39e0e0c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c
@@ -32,7 +32,7 @@
 #include "dce/dce_hwseq.h"
 #include "abm.h"
 #include "dmcu.h"
-#include "dcn10_optc.h"
+#include "dcn10/dcn10_optc.h"
 #include "dcn10/dcn10_dpp.h"
 #include "dcn10/dcn10_mpc.h"
 #include "timing_generator.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
index d7dc9696a8c8..3dae3943b056 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
@@ -2,13 +2,11 @@
 #
 # Makefile for DCN.
 
-DCN20 = dcn20_resource.o dcn20_init.o dcn20_dpp.o dcn20_dpp_cm.o dcn20_hubp.o \
-		dcn20_mpc.o dcn20_opp.o dcn20_hubbub.o dcn20_optc.o dcn20_mmhubbub.o \
+DCN20 = dcn20_dpp.o dcn20_dpp_cm.o dcn20_hubp.o \
+		dcn20_mpc.o dcn20_opp.o dcn20_hubbub.o dcn20_mmhubbub.o \
 		dcn20_stream_encoder.o dcn20_link_encoder.o dcn20_dccg.o \
 		dcn20_vmid.o dcn20_dwb.o dcn20_dwb_scl.o
 
-DCN20 += dcn20_dsc.o
-
 AMD_DAL_DCN20 = $(addprefix $(AMDDALPATH)/dc/dcn20/,$(DCN20))
 
 AMD_DISPLAY_FILES += $(AMD_DAL_DCN20)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h
index ab6d09c6fe34..ef5c22f41563 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h
@@ -291,7 +291,43 @@
 	type SYMCLKB_FE_SRC_SEL;\
 	type SYMCLKC_FE_SRC_SEL;\
 	type SYMCLKD_FE_SRC_SEL;\
-	type SYMCLKE_FE_SRC_SEL;
+	type SYMCLKE_FE_SRC_SEL;\
+	type DTBCLK_P0_GATE_DISABLE;\
+	type DTBCLK_P1_GATE_DISABLE;\
+	type DTBCLK_P2_GATE_DISABLE;\
+	type DTBCLK_P3_GATE_DISABLE;\
+	type DSCCLK0_ROOT_GATE_DISABLE;\
+	type DSCCLK1_ROOT_GATE_DISABLE;\
+	type DSCCLK2_ROOT_GATE_DISABLE;\
+	type DSCCLK3_ROOT_GATE_DISABLE;\
+	type SYMCLKA_FE_ROOT_GATE_DISABLE;\
+	type SYMCLKB_FE_ROOT_GATE_DISABLE;\
+	type SYMCLKC_FE_ROOT_GATE_DISABLE;\
+	type SYMCLKD_FE_ROOT_GATE_DISABLE;\
+	type SYMCLKE_FE_ROOT_GATE_DISABLE;\
+	type DPPCLK0_ROOT_GATE_DISABLE;\
+	type DPPCLK1_ROOT_GATE_DISABLE;\
+	type DPPCLK2_ROOT_GATE_DISABLE;\
+	type DPPCLK3_ROOT_GATE_DISABLE;\
+	type HDMISTREAMCLK0_ROOT_GATE_DISABLE;\
+	type SYMCLKA_ROOT_GATE_DISABLE;\
+	type SYMCLKB_ROOT_GATE_DISABLE;\
+	type SYMCLKC_ROOT_GATE_DISABLE;\
+	type SYMCLKD_ROOT_GATE_DISABLE;\
+	type SYMCLKE_ROOT_GATE_DISABLE;\
+	type PHYA_REFCLK_ROOT_GATE_DISABLE;\
+	type PHYB_REFCLK_ROOT_GATE_DISABLE;\
+	type PHYC_REFCLK_ROOT_GATE_DISABLE;\
+	type PHYD_REFCLK_ROOT_GATE_DISABLE;\
+	type PHYE_REFCLK_ROOT_GATE_DISABLE;\
+	type DPSTREAMCLK0_ROOT_GATE_DISABLE;\
+	type DPSTREAMCLK1_ROOT_GATE_DISABLE;\
+	type DPSTREAMCLK2_ROOT_GATE_DISABLE;\
+	type DPSTREAMCLK3_ROOT_GATE_DISABLE;\
+	type DPSTREAMCLK0_GATE_DISABLE;\
+	type DPSTREAMCLK1_GATE_DISABLE;\
+	type DPSTREAMCLK2_GATE_DISABLE;\
+	type DPSTREAMCLK3_GATE_DISABLE;\
 
 struct dccg_shift {
 	DCCG_REG_FIELD_LIST(uint8_t)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c
index 139cf31d2e45..89c3bf0fe0c9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c
@@ -1077,8 +1077,16 @@ void hubp2_cursor_set_position(
 	if (src_y_offset < 0)
 		src_y_offset = 0;
 	/* Save necessary cursor info x, y position. w, h is saved in attribute func. */
-	hubp->cur_rect.x = src_x_offset + param->viewport.x;
-	hubp->cur_rect.y = src_y_offset + param->viewport.y;
+	if (param->stream->link->psr_settings.psr_version >= DC_PSR_VERSION_SU_1 &&
+	    param->rotation != ROTATION_ANGLE_0) {
+		hubp->cur_rect.x = 0;
+		hubp->cur_rect.y = 0;
+		hubp->cur_rect.w = param->stream->timing.h_addressable;
+		hubp->cur_rect.h = param->stream->timing.v_addressable;
+	} else {
+		hubp->cur_rect.x = src_x_offset + param->viewport.x;
+		hubp->cur_rect.y = src_y_offset + param->viewport.y;
+	}
 }
 
 void hubp2_clk_cntl(struct hubp *hubp, bool enable)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/Makefile b/drivers/gpu/drm/amd/display/dc/dcn201/Makefile
index 3a41a97b0729..2b0b4f32e13b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn201/Makefile
@@ -1,9 +1,8 @@
 # SPDX-License-Identifier: MIT
 #
 # Makefile for DCN.
-DCN201 = dcn201_init.o dcn201_resource.o \
-	dcn201_hubbub.o\
-	dcn201_mpc.o dcn201_hubp.o dcn201_opp.o dcn201_optc.o dcn201_dpp.o \
+DCN201 = dcn201_hubbub.o\
+	dcn201_mpc.o dcn201_hubp.o dcn201_opp.o dcn201_dpp.o \
 	dcn201_dccg.o dcn201_link_encoder.o
 
 AMD_DAL_DCN201 = $(addprefix $(AMDDALPATH)/dc/dcn201/,$(DCN201))
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile
index ce1be0afae4a..ca92f5c8e7fb 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile
@@ -2,7 +2,7 @@
 #
 # Makefile for DCN21.
 
-DCN21 = dcn21_init.o dcn21_hubp.o dcn21_hubbub.o dcn21_resource.o \
+DCN21 = dcn21_hubp.o dcn21_hubbub.o \
 	 dcn21_link_encoder.o dcn21_dccg.o
 
 AMD_DAL_DCN21 = $(addprefix $(AMDDALPATH)/dc/dcn21/,$(DCN21))
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c
index 68cad55c72ab..e13d69a22c1c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c
@@ -691,7 +691,7 @@ static void dmcub_PLAT_54186_wa(struct hubp *hubp,
 	cmd.PLAT_54186_wa.flip.flip_params.vmid = flip_regs->vmid;
 
 	PERF_TRACE();  // TODO: remove after performance is stable.
-	dm_execute_dmub_cmd(hubp->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(hubp->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 	PERF_TRACE();  // TODO: remove after performance is stable.
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile
index af4d2065d2c1..b5b2aa3b3783 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile
@@ -23,12 +23,9 @@
 #
 #
 
-DCN30 := \
-	dcn30_init.o \
-	dcn30_hubbub.o \
+DCN30 := dcn30_hubbub.o \
 	dcn30_hubp.o \
 	dcn30_dpp.o \
-	dcn30_optc.o \
 	dcn30_dccg.o \
 	dcn30_mpc.o dcn30_vpg.o \
 	dcn30_afmt.o \
@@ -38,7 +35,6 @@ DCN30 := \
 	dcn30_dwb_cm.o \
 	dcn30_cm_common.o \
 	dcn30_mmhubbub.o \
-	dcn30_resource.o \
 	dcn30_dio_link_encoder.o
 
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp_cm.c
index e43f77c11c00..5f97a868ada3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp_cm.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp_cm.c
@@ -56,16 +56,13 @@ static void dpp3_enable_cm_block(
 
 static enum dc_lut_mode dpp30_get_gamcor_current(struct dpp *dpp_base)
 {
-	enum dc_lut_mode mode;
+	enum dc_lut_mode mode = LUT_BYPASS;
 	uint32_t state_mode;
 	uint32_t lut_mode;
 	struct dcn3_dpp *dpp = TO_DCN30_DPP(dpp_base);
 
 	REG_GET(CM_GAMCOR_CONTROL, CM_GAMCOR_MODE_CURRENT, &state_mode);
 
-	if (state_mode == 0)
-		mode = LUT_BYPASS;
-
 	if (state_mode == 2) {//Programmable RAM LUT
 		REG_GET(CM_GAMCOR_CONTROL, CM_GAMCOR_SELECT_CURRENT, &lut_mode);
 		if (lut_mode == 0)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.c
index 0d98918bf0fc..1b9d9495f76d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.c
@@ -130,6 +130,28 @@ bool dwb3_disable(struct dwbc *dwbc)
 	return true;
 }
 
+void dwb3_set_fc_enable(struct dwbc *dwbc, enum dwb_frame_capture_enable enable)
+{
+	struct dcn30_dwbc *dwbc30 = TO_DCN30_DWBC(dwbc);
+	unsigned int pre_locked;
+
+	REG_GET(DWB_UPDATE_CTRL, DWB_UPDATE_LOCK, &pre_locked);
+
+	/* Lock DWB registers */
+	if (pre_locked == 0)
+		REG_UPDATE(DWB_UPDATE_CTRL, DWB_UPDATE_LOCK, 1);
+
+	/* Disable FC */
+	REG_UPDATE(FC_MODE_CTRL, FC_FRAME_CAPTURE_EN, enable);
+
+	/* Unlock DWB registers */
+	if (pre_locked == 0)
+		REG_UPDATE(DWB_UPDATE_CTRL, DWB_UPDATE_LOCK, 0);
+
+	DC_LOG_DWB("%s dwb3_fc_disabled at inst = %d", __func__, dwbc->inst);
+}
+
+
 bool dwb3_update(struct dwbc *dwbc, struct dc_dwb_params *params)
 {
 	struct dcn30_dwbc *dwbc30 = TO_DCN30_DWBC(dwbc);
@@ -226,6 +248,7 @@ static const struct dwbc_funcs dcn30_dwbc_funcs = {
 	.disable		= dwb3_disable,
 	.update			= dwb3_update,
 	.is_enabled		= dwb3_is_enabled,
+	.set_fc_enable		= dwb3_set_fc_enable,
 	.set_stereo		= dwb3_set_stereo,
 	.set_new_content	= dwb3_set_new_content,
 	.dwb_program_output_csc	= NULL,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h
index a5d1b81e768d..332634b76aac 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h
@@ -877,6 +877,8 @@ bool dwb3_update(struct dwbc *dwbc, struct dc_dwb_params *params);
 
 bool dwb3_is_enabled(struct dwbc *dwbc);
 
+void dwb3_set_fc_enable(struct dwbc *dwbc, enum dwb_frame_capture_enable enable);
+
 void dwb3_set_stereo(struct dwbc *dwbc,
 	struct dwb_stereo_params *stereo_params);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c
index 701c7d8bc038..03a50c32fcfe 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c
@@ -243,6 +243,9 @@ static bool dwb3_program_ogam_lut(
 		return false;
 	}
 
+	if (params->hw_points_num == 0)
+		return false;
+
 	REG_SET(DWB_OGAM_CONTROL, 0, DWB_OGAM_MODE, 2);
 
 	current_mode = dwb3_get_ogam_current(dwbc30);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile
index 30fbc5e06dca..d241f665e40a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile
@@ -10,9 +10,8 @@
 #
 # Makefile for dcn30.
 
-DCN301 = dcn301_init.o dcn301_resource.o dcn301_dccg.o \
-		dcn301_dio_link_encoder.o dcn301_panel_cntl.o dcn301_hubbub.o \
-		dcn301_optc.o
+DCN301 = dcn301_dccg.o \
+		dcn301_dio_link_encoder.o dcn301_panel_cntl.o dcn301_hubbub.o
 
 AMD_DAL_DCN301 = $(addprefix $(AMDDALPATH)/dc/dcn301/,$(DCN301))
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c
index ad0df1a72a90..9e96a3ace207 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c
@@ -215,4 +215,5 @@ void dcn301_panel_cntl_construct(
 	dcn301_panel_cntl->base.funcs = &dcn301_link_panel_cntl_funcs;
 	dcn301_panel_cntl->base.ctx = init_data->ctx;
 	dcn301_panel_cntl->base.inst = init_data->inst;
+	dcn301_panel_cntl->base.pwrseq_inst = 0;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile b/drivers/gpu/drm/amd/display/dc/dcn302/Makefile
deleted file mode 100644
index 95b66baf39e9..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile
+++ /dev/null
@@ -1,12 +0,0 @@
-#
-# (c) Copyright 2020 Advanced Micro Devices, Inc. All the rights reserved
-#
-#  Authors: AMD
-#
-# Makefile for dcn302.
-
-DCN3_02 = dcn302_init.o dcn302_resource.o
-
-AMD_DAL_DCN3_02 = $(addprefix $(AMDDALPATH)/dc/dcn302/,$(DCN3_02))
-
-AMD_DISPLAY_FILES += $(AMD_DAL_DCN3_02)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/Makefile b/drivers/gpu/drm/amd/display/dc/dcn303/Makefile
index d7b3ad780e5d..a954e316aca2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn303/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn303/Makefile
@@ -6,7 +6,7 @@
 #
 # Makefile for dcn303.
 
-DCN3_03 = dcn303_init.o dcn303_resource.o
+DCN3_03 = dcn303_init.o
 
 AMD_DAL_DCN3_03 = $(addprefix $(AMDDALPATH)/dc/dcn303/,$(DCN3_03))
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
index 96e45c9efb46..5d93ac16c03a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
@@ -10,8 +10,8 @@
 #
 # Makefile for dcn31.
 
-DCN31 = dcn31_resource.o dcn31_hubbub.o dcn31_init.o dcn31_hubp.o \
-	dcn31_dccg.o dcn31_optc.o dcn31_dio_link_encoder.o dcn31_panel_cntl.o \
+DCN31 = dcn31_hubbub.o dcn31_hubp.o \
+	dcn31_dccg.o dcn31_dio_link_encoder.o dcn31_panel_cntl.o \
 	dcn31_apg.o dcn31_hpo_dp_stream_encoder.o dcn31_hpo_dp_link_encoder.o \
 	dcn31_afmt.o dcn31_vpg.o
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c
index 4596f3bac1b4..26be5fee7411 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c
@@ -125,7 +125,7 @@ static bool query_dp_alt_from_dmub(struct link_encoder *enc,
 	cmd->query_dp_alt.header.payload_bytes = sizeof(cmd->query_dp_alt.data);
 	cmd->query_dp_alt.data.phy_id = phy_id_from_transmitter(enc10->base.transmitter);
 
-	if (!dm_execute_dmub_cmd(enc->ctx, cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
+	if (!dc_wake_and_execute_dmub_cmd(enc->ctx, cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
 		return false;
 
 	return true;
@@ -436,7 +436,7 @@ static bool link_dpia_control(struct dc_context *dc_ctx,
 
 	cmd.dig1_dpia_control.dpia_control = *dpia_control;
 
-	dm_execute_dmub_cmd(dc_ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc_ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	return true;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c
index 217acd4e292a..281be20b1a10 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c
@@ -50,9 +50,9 @@ static bool dcn31_query_backlight_info(struct panel_cntl *panel_cntl, union dmub
 	cmd->panel_cntl.header.type = DMUB_CMD__PANEL_CNTL;
 	cmd->panel_cntl.header.sub_type = DMUB_CMD__PANEL_CNTL_QUERY_BACKLIGHT_INFO;
 	cmd->panel_cntl.header.payload_bytes = sizeof(cmd->panel_cntl.data);
-	cmd->panel_cntl.data.inst = dcn31_panel_cntl->base.inst;
+	cmd->panel_cntl.data.pwrseq_inst = dcn31_panel_cntl->base.pwrseq_inst;
 
-	return dm_execute_dmub_cmd(dc_dmub_srv->ctx, cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY);
+	return dc_wake_and_execute_dmub_cmd(dc_dmub_srv->ctx, cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY);
 }
 
 static uint32_t dcn31_get_16_bit_backlight_from_pwm(struct panel_cntl *panel_cntl)
@@ -78,14 +78,14 @@ static uint32_t dcn31_panel_cntl_hw_init(struct panel_cntl *panel_cntl)
 	cmd.panel_cntl.header.type = DMUB_CMD__PANEL_CNTL;
 	cmd.panel_cntl.header.sub_type = DMUB_CMD__PANEL_CNTL_HW_INIT;
 	cmd.panel_cntl.header.payload_bytes = sizeof(cmd.panel_cntl.data);
-	cmd.panel_cntl.data.inst = dcn31_panel_cntl->base.inst;
+	cmd.panel_cntl.data.pwrseq_inst = dcn31_panel_cntl->base.pwrseq_inst;
 	cmd.panel_cntl.data.bl_pwm_cntl = panel_cntl->stored_backlight_registers.BL_PWM_CNTL;
 	cmd.panel_cntl.data.bl_pwm_period_cntl = panel_cntl->stored_backlight_registers.BL_PWM_PERIOD_CNTL;
 	cmd.panel_cntl.data.bl_pwm_ref_div1 =
 		panel_cntl->stored_backlight_registers.LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV;
 	cmd.panel_cntl.data.bl_pwm_ref_div2 =
 		panel_cntl->stored_backlight_registers.PANEL_PWRSEQ_REF_DIV2;
-	if (!dm_execute_dmub_cmd(dc_dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
+	if (!dc_wake_and_execute_dmub_cmd(dc_dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
 		return 0;
 
 	panel_cntl->stored_backlight_registers.BL_PWM_CNTL = cmd.panel_cntl.data.bl_pwm_cntl;
@@ -154,7 +154,24 @@ void dcn31_panel_cntl_construct(
 	struct dcn31_panel_cntl *dcn31_panel_cntl,
 	const struct panel_cntl_init_data *init_data)
 {
+	uint8_t pwrseq_inst = 0xF;
+
 	dcn31_panel_cntl->base.funcs = &dcn31_link_panel_cntl_funcs;
 	dcn31_panel_cntl->base.ctx = init_data->ctx;
 	dcn31_panel_cntl->base.inst = init_data->inst;
+
+	switch (init_data->eng_id) {
+	case ENGINE_ID_DIGA:
+		pwrseq_inst = 0;
+		break;
+	case ENGINE_ID_DIGB:
+		pwrseq_inst = 1;
+		break;
+	default:
+		DC_LOG_WARNING("Unsupported pwrseq engine id: %d!\n", init_data->eng_id);
+		ASSERT(false);
+		break;
+	}
+
+	dcn31_panel_cntl->base.pwrseq_inst = pwrseq_inst;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile b/drivers/gpu/drm/amd/display/dc/dcn314/Makefile
index 72456debb99f..b134ab05aa71 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/Makefile
@@ -10,8 +10,7 @@
 #
 # Makefile for dcn314.
 
-DCN314 = dcn314_resource.o dcn314_init.o \
-		dcn314_dio_stream_encoder.o dcn314_dccg.o dcn314_optc.o
+DCN314 = dcn314_dio_stream_encoder.o dcn314_dccg.o
 
 AMD_DAL_DCN314 = $(addprefix $(AMDDALPATH)/dc/dcn314/,$(DCN314))
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/Makefile b/drivers/gpu/drm/amd/display/dc/dcn315/Makefile
deleted file mode 100644
index 59381d24800b..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn315/Makefile
+++ /dev/null
@@ -1,30 +0,0 @@
-#
-# Copyright © 2021 Advanced Micro Devices, Inc.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
-# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-#
-# Authors: AMD
-#
-# Makefile for dcn315.
-
-DCN315 = dcn315_resource.o
-
-AMD_DAL_DCN315 = $(addprefix $(AMDDALPATH)/dc/dcn315/,$(DCN315))
-
-AMD_DISPLAY_FILES += $(AMD_DAL_DCN315)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/Makefile b/drivers/gpu/drm/amd/display/dc/dcn316/Makefile
deleted file mode 100644
index 819d44a9439b..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn316/Makefile
+++ /dev/null
@@ -1,30 +0,0 @@
-#
-# Copyright 2021 Advanced Micro Devices, Inc.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
-# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-#
-# Authors: AMD
-#
-# Makefile for dcn316.
-
-DCN316 = dcn316_resource.o
-
-AMD_DAL_DCN316 = $(addprefix $(AMDDALPATH)/dc/dcn316/,$(DCN316))
-
-AMD_DISPLAY_FILES += $(AMD_DAL_DCN316)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/Makefile b/drivers/gpu/drm/amd/display/dc/dcn32/Makefile
index 8bb251307247..5314770fff1c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/Makefile
@@ -10,10 +10,10 @@
 #
 # Makefile for dcn32.
 
-DCN32 = dcn32_resource.o dcn32_hubbub.o dcn32_init.o dcn32_dccg.o \
-		dcn32_dccg.o dcn32_optc.o dcn32_mmhubbub.o dcn32_hubp.o dcn32_dpp.o \
-		dcn32_dio_stream_encoder.o dcn32_dio_link_encoder.o dcn32_hpo_dp_link_encoder.o \
-		dcn32_resource_helpers.o dcn32_mpc.o
+DCN32 = dcn32_hubbub.o dcn32_dccg.o \
+		dcn32_mmhubbub.o dcn32_dpp.o dcn32_hubp.o dcn32_mpc.o \
+		dcn32_dio_stream_encoder.o dcn32_dio_link_encoder.o dcn32_resource_helpers.o \
+		dcn32_hpo_dp_link_encoder.o
 
 AMD_DAL_DCN32 = $(addprefix $(AMDDALPATH)/dc/dcn32/,$(DCN32))
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c
index 501388014855..d761b0df2878 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c
@@ -203,12 +203,12 @@ void dcn32_link_encoder_construct(
 	enc10->base.hpd_source = init_data->hpd_source;
 	enc10->base.connector = init_data->connector;
 
-	if (enc10->base.connector.id == CONNECTOR_ID_USBC)
-		enc10->base.features.flags.bits.DP_IS_USB_C = 1;
 
 	enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
 
 	enc10->base.features = *enc_features;
+	if (enc10->base.connector.id == CONNECTOR_ID_USBC)
+		enc10->base.features.flags.bits.DP_IS_USB_C = 1;
 
 	enc10->base.transmitter = init_data->transmitter;
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c
index 994b21ed272f..e789e654c387 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c
@@ -71,12 +71,13 @@ void mpc32_power_on_blnd_lut(
 {
 	struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc);
 
+	REG_SET(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], 0, MPCC_MCM_1DLUT_MEM_PWR_DIS, power_on);
+
 	if (mpc->ctx->dc->debug.enable_mem_low_power.bits.cm) {
 		if (power_on) {
 			REG_UPDATE(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], MPCC_MCM_1DLUT_MEM_PWR_FORCE, 0);
 			REG_WAIT(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], MPCC_MCM_1DLUT_MEM_PWR_STATE, 0, 1, 5);
 		} else if (!mpc->ctx->dc->debug.disable_mem_low_power) {
-			ASSERT(false);
 			/* TODO: change to mpc
 			 *  dpp_base->ctx->dc->optimized_required = true;
 			 *  dpp_base->deferred_reg_writes.bits.disable_blnd_lut = true;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
index bc5f0db23d0c..87760600e154 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
@@ -24,10 +24,11 @@
  */
 
 // header file of functions being implemented
-#include "dcn32_resource.h"
+#include "dcn32/dcn32_resource.h"
 #include "dcn20/dcn20_resource.h"
 #include "dml/dcn32/display_mode_vba_util_32.h"
 #include "dml/dcn32/dcn32_fpu.h"
+#include "dc_state_priv.h"
 
 static bool is_dual_plane(enum surface_pixel_format format)
 {
@@ -190,7 +191,7 @@ bool dcn32_subvp_in_use(struct dc *dc,
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 
-		if (pipe->stream && pipe->stream->mall_stream_config.type != SUBVP_NONE)
+		if (dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_NONE)
 			return true;
 	}
 	return false;
@@ -264,18 +265,17 @@ static void override_det_for_subvp(struct dc *dc, struct dc_state *context, uint
 
 	// Do not override if a stream has multiple planes
 	for (i = 0; i < context->stream_count; i++) {
-		if (context->stream_status[i].plane_count > 1) {
+		if (context->stream_status[i].plane_count > 1)
 			return;
-		}
-		if (context->streams[i]->mall_stream_config.type != SUBVP_PHANTOM) {
+
+		if (dc_state_get_stream_subvp_type(context, context->streams[i]) != SUBVP_PHANTOM)
 			stream_count++;
-		}
 	}
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
 
-		if (pipe_ctx->stream && pipe_ctx->plane_state && pipe_ctx->stream->mall_stream_config.type != SUBVP_PHANTOM) {
+		if (pipe_ctx->stream && pipe_ctx->plane_state && dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) {
 			if (dcn32_allow_subvp_high_refresh_rate(dc, context, pipe_ctx)) {
 
 				if (pipe_ctx->stream->timing.v_addressable == 1080 && pipe_ctx->stream->timing.h_addressable == 1920) {
@@ -290,7 +290,7 @@ static void override_det_for_subvp(struct dc *dc, struct dc_state *context, uint
 		for (i = 0; i < dc->res_pool->pipe_count; i++) {
 			struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
 
-			if (pipe_ctx->stream && pipe_ctx->plane_state && pipe_ctx->stream->mall_stream_config.type != SUBVP_PHANTOM) {
+			if (pipe_ctx->stream && pipe_ctx->plane_state && dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) {
 				if (pipe_ctx->stream->timing.v_addressable == 1080 && pipe_ctx->stream->timing.h_addressable == 1920) {
 					if (pipe_segments[i] > 4)
 						pipe_segments[i] = 4;
@@ -337,14 +337,14 @@ void dcn32_determine_det_override(struct dc *dc,
 
 	for (i = 0; i < context->stream_count; i++) {
 		/* Don't count SubVP streams for DET allocation */
-		if (context->streams[i]->mall_stream_config.type != SUBVP_PHANTOM)
+		if (dc_state_get_stream_subvp_type(context, context->streams[i]) != SUBVP_PHANTOM)
 			stream_count++;
 	}
 
 	if (stream_count > 0) {
 		stream_segments = 18 / stream_count;
 		for (i = 0; i < context->stream_count; i++) {
-			if (context->streams[i]->mall_stream_config.type == SUBVP_PHANTOM)
+			if (dc_state_get_stream_subvp_type(context, context->streams[i]) == SUBVP_PHANTOM)
 				continue;
 
 			if (context->stream_status[i].plane_count > 0)
@@ -430,71 +430,6 @@ void dcn32_set_det_allocations(struct dc *dc, struct dc_state *context,
 		dcn32_determine_det_override(dc, context, pipes);
 }
 
-/**
- * dcn32_save_mall_state(): Save MALL (SubVP) state for fast validation cases
- *
- * This function saves the MALL (SubVP) case for fast validation cases. For fast validation,
- * there are situations where a shallow copy of the dc->current_state is created for the
- * validation. In this case we want to save and restore the mall config because we always
- * teardown subvp at the beginning of validation (and don't attempt to add it back if it's
- * fast validation). If we don't restore the subvp config in cases of fast validation +
- * shallow copy of the dc->current_state, the dc->current_state will have a partially
- * removed subvp state when we did not intend to remove it.
- *
- * NOTE: This function ONLY works if the streams are not moved to a different pipe in the
- *       validation. We don't expect this to happen in fast_validation=1 cases.
- *
- * @dc: Current DC state
- * @context: New DC state to be programmed
- * @temp_config: struct used to cache the existing MALL state
- *
- * Return: void
- */
-void dcn32_save_mall_state(struct dc *dc,
-		struct dc_state *context,
-		struct mall_temp_config *temp_config)
-{
-	uint32_t i;
-
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
-		if (pipe->stream)
-			temp_config->mall_stream_config[i] = pipe->stream->mall_stream_config;
-
-		if (pipe->plane_state)
-			temp_config->is_phantom_plane[i] = pipe->plane_state->is_phantom;
-	}
-}
-
-/**
- * dcn32_restore_mall_state(): Restore MALL (SubVP) state for fast validation cases
- *
- * Restore the MALL state based on the previously saved state from dcn32_save_mall_state
- *
- * @dc: Current DC state
- * @context: New DC state to be programmed, restore MALL state into here
- * @temp_config: struct that has the cached MALL state
- *
- * Return: void
- */
-void dcn32_restore_mall_state(struct dc *dc,
-		struct dc_state *context,
-		struct mall_temp_config *temp_config)
-{
-	uint32_t i;
-
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
-		if (pipe->stream)
-			pipe->stream->mall_stream_config = temp_config->mall_stream_config[i];
-
-		if (pipe->plane_state)
-			pipe->plane_state->is_phantom = temp_config->is_phantom_plane[i];
-	}
-}
-
 #define MAX_STRETCHED_V_BLANK 1000 // in micro-seconds (must ensure to match value in FW)
 /*
  * Scaling factor for v_blank stretch calculations considering timing in
@@ -589,13 +524,14 @@ static int get_refresh_rate(struct dc_stream_state *fpo_candidate_stream)
  *
  * Return: Pointer to FPO stream candidate if config can support FPO, otherwise NULL
  */
-struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, const struct dc_state *context)
+struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context)
 {
 	int refresh_rate = 0;
 	const int minimum_refreshrate_supported = 120;
 	struct dc_stream_state *fpo_candidate_stream = NULL;
 	bool is_fpo_vactive = false;
 	uint32_t fpo_vactive_margin_us = 0;
+	struct dc_stream_status *fpo_stream_status = NULL;
 
 	if (context == NULL)
 		return NULL;
@@ -618,16 +554,28 @@ struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stre
 		DC_FP_START();
 		dcn32_assign_fpo_vactive_candidate(dc, context, &fpo_candidate_stream);
 		DC_FP_END();
-
+		if (fpo_candidate_stream)
+			fpo_stream_status = dc_state_get_stream_status(context, fpo_candidate_stream);
 		DC_FP_START();
 		is_fpo_vactive = dcn32_find_vactive_pipe(dc, context, dc->debug.fpo_vactive_min_active_margin_us);
 		DC_FP_END();
 		if (!is_fpo_vactive || dc->debug.disable_fpo_vactive)
 			return NULL;
-	} else
+	} else {
 		fpo_candidate_stream = context->streams[0];
+		if (fpo_candidate_stream)
+			fpo_stream_status = dc_state_get_stream_status(context, fpo_candidate_stream);
+	}
 
-	if (!fpo_candidate_stream)
+	/* In DCN32/321, FPO uses per-pipe P-State force.
+	 * If there's no planes, HUBP is power gated and
+	 * therefore programming UCLK_PSTATE_FORCE does
+	 * nothing (P-State will always be asserted naturally
+	 * on a pipe that has HUBP power gated. Therefore we
+	 * only want to enable FPO if the FPO pipe has both
+	 * a stream and a plane.
+	 */
+	if (!fpo_candidate_stream || !fpo_stream_status || fpo_stream_status->plane_count == 0)
 		return NULL;
 
 	if (fpo_candidate_stream->sink->edid_caps.panel_patch.disable_fams)
@@ -666,6 +614,30 @@ bool dcn32_check_native_scaling_for_res(struct pipe_ctx *pipe, unsigned int widt
 }
 
 /**
+ * disallow_subvp_in_active_plus_blank() - Function to determine disallowed subvp + drr/vblank configs
+ *
+ * @pipe: subvp pipe to be used for the subvp + drr/vblank config
+ *
+ * Since subvp is being enabled on more configs (such as 1080p60), we want
+ * to explicitly block any configs that we don't want to enable. We do not
+ * want to enable any 1080p60 (SubVP) + drr / vblank configs since these
+ * are already convered by FPO.
+ *
+ * Return: True if disallowed, false otherwise
+ */
+static bool disallow_subvp_in_active_plus_blank(struct pipe_ctx *pipe)
+{
+	bool disallow = false;
+
+	if (resource_is_pipe_type(pipe, OPP_HEAD) &&
+			resource_is_pipe_type(pipe, DPP_PIPE)) {
+		if (pipe->stream->timing.v_addressable == 1080 && pipe->stream->timing.h_addressable == 1920)
+			disallow = true;
+	}
+	return disallow;
+}
+
+/**
  * dcn32_subvp_drr_admissable() - Determine if SubVP + DRR config is admissible
  *
  * @dc: Current DC state
@@ -688,21 +660,24 @@ bool dcn32_subvp_drr_admissable(struct dc *dc, struct dc_state *context)
 	bool drr_pipe_found = false;
 	bool drr_psr_capable = false;
 	uint64_t refresh_rate = 0;
+	bool subvp_disallow = false;
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+		enum mall_stream_type pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe);
 
 		if (resource_is_pipe_type(pipe, OPP_HEAD) &&
 				resource_is_pipe_type(pipe, DPP_PIPE)) {
-			if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
+			if (pipe_mall_type == SUBVP_MAIN) {
 				subvp_count++;
 
+				subvp_disallow |= disallow_subvp_in_active_plus_blank(pipe);
 				refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 +
 					pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1);
 				refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total);
 				refresh_rate = div_u64(refresh_rate, pipe->stream->timing.h_total);
 			}
-			if (pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+			if (pipe_mall_type == SUBVP_NONE) {
 				non_subvp_pipes++;
 				drr_psr_capable = (drr_psr_capable || dcn32_is_psr_capable(pipe));
 				if (pipe->stream->ignore_msa_timing_param &&
@@ -713,7 +688,7 @@ bool dcn32_subvp_drr_admissable(struct dc *dc, struct dc_state *context)
 		}
 	}
 
-	if (subvp_count == 1 && non_subvp_pipes == 1 && drr_pipe_found && !drr_psr_capable &&
+	if (subvp_count == 1 && !subvp_disallow && non_subvp_pipes == 1 && drr_pipe_found && !drr_psr_capable &&
 		((uint32_t)refresh_rate < 120))
 		result = true;
 
@@ -746,21 +721,24 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int
 	struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
 	bool vblank_psr_capable = false;
 	uint64_t refresh_rate = 0;
+	bool subvp_disallow = false;
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+		enum mall_stream_type pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe);
 
 		if (resource_is_pipe_type(pipe, OPP_HEAD) &&
 				resource_is_pipe_type(pipe, DPP_PIPE)) {
-			if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
+			if (pipe_mall_type == SUBVP_MAIN) {
 				subvp_count++;
 
+				subvp_disallow |= disallow_subvp_in_active_plus_blank(pipe);
 				refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 +
 					pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1);
 				refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total);
 				refresh_rate = div_u64(refresh_rate, pipe->stream->timing.h_total);
 			}
-			if (pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+			if (pipe_mall_type == SUBVP_NONE) {
 				non_subvp_pipes++;
 				vblank_psr_capable = (vblank_psr_capable || dcn32_is_psr_capable(pipe));
 				if (pipe->stream->ignore_msa_timing_param &&
@@ -772,9 +750,35 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int
 	}
 
 	if (subvp_count == 1 && non_subvp_pipes == 1 && !drr_pipe_found && !vblank_psr_capable &&
-		((uint32_t)refresh_rate < 120) &&
+		((uint32_t)refresh_rate < 120) && !subvp_disallow &&
 		vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_vblank_w_mall_sub_vp)
 		result = true;
 
 	return result;
 }
+
+void dcn32_update_dml_pipes_odm_policy_based_on_context(struct dc *dc, struct dc_state *context,
+		display_e2e_pipe_params_st *pipes)
+{
+	int i, pipe_cnt;
+	struct resource_context *res_ctx = &context->res_ctx;
+	struct pipe_ctx *pipe = NULL;
+
+	for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
+		int odm_slice_count = 0;
+
+		if (!res_ctx->pipe_ctx[i].stream)
+			continue;
+		pipe = &res_ctx->pipe_ctx[i];
+		odm_slice_count = resource_get_odm_slice_count(pipe);
+
+		if (odm_slice_count == 1)
+			pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal;
+		else if (odm_slice_count == 2)
+			pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1;
+		else if (odm_slice_count == 4)
+			pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_4to1;
+
+		pipe_cnt++;
+	}
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/Makefile b/drivers/gpu/drm/amd/display/dc/dcn321/Makefile
index 0a199c83bb5b..c195c47f58b4 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn321/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn321/Makefile
@@ -10,7 +10,7 @@
 #
 # Makefile for dcn321.
 
-DCN321 = dcn321_resource.o dcn321_dio_link_encoder.o
+DCN321 = dcn321_dio_link_encoder.o
 
 AMD_DAL_DCN321 = $(addprefix $(AMDDALPATH)/dc/dcn321/,$(DCN321))
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/Makefile b/drivers/gpu/drm/amd/display/dc/dcn35/Makefile
index 20d0eef1a13b..0e317e0c36a0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/Makefile
@@ -10,9 +10,9 @@
 #
 # Makefile for DCN35.
 
-DCN35 = dcn35_resource.o dcn35_init.o dcn35_dio_stream_encoder.o \
-	dcn35_dio_link_encoder.o dcn35_dccg.o dcn35_optc.o \
-	dcn35_dsc.o dcn35_hubp.o dcn35_hubbub.o \
+DCN35 = dcn35_dio_stream_encoder.o \
+	dcn35_dio_link_encoder.o dcn35_dccg.o \
+	dcn35_hubp.o dcn35_hubbub.o \
 	dcn35_mmhubbub.o dcn35_opp.o dcn35_dpp.o dcn35_pg_cntl.o dcn35_dwb.o
 
 AMD_DAL_DCN35 = $(addprefix $(AMDDALPATH)/dc/dcn35/,$(DCN35))
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c
index 479f3683c0b7..f1ba7bb792ea 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c
@@ -256,6 +256,21 @@ static void dccg35_set_dtbclk_dto(
 	if (params->ref_dtbclk_khz && req_dtbclk_khz) {
 		uint32_t modulo, phase;
 
+		switch (params->otg_inst) {
+		case 0:
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, 1);
+			break;
+		case 1:
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, 1);
+			break;
+		case 2:
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, 1);
+			break;
+		case 3:
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, 1);
+			break;
+		}
+
 		// phase / modulo = dtbclk / dtbclk ref
 		modulo = params->ref_dtbclk_khz * 1000;
 		phase = req_dtbclk_khz * 1000;
@@ -280,6 +295,21 @@ static void dccg35_set_dtbclk_dto(
 		REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst],
 				PIPE_DTO_SRC_SEL[params->otg_inst], 2);
 	} else {
+		switch (params->otg_inst) {
+		case 0:
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, 0);
+			break;
+		case 1:
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, 0);
+			break;
+		case 2:
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, 0);
+			break;
+		case 3:
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, 0);
+			break;
+		}
+
 		REG_UPDATE_2(OTG_PIXEL_RATE_CNTL[params->otg_inst],
 				DTBCLK_DTO_ENABLE[params->otg_inst], 0,
 				PIPE_DTO_SRC_SEL[params->otg_inst], params->is_hdmi ? 0 : 1);
@@ -476,6 +506,64 @@ static void dccg35_dpp_root_clock_control(
 	dccg->dpp_clock_gated[dpp_inst] = !clock_on;
 }
 
+static void dccg35_disable_symclk32_se(
+		struct dccg *dccg,
+		int hpo_se_inst)
+{
+	struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+	/* set refclk as the source for symclk32_se */
+	switch (hpo_se_inst) {
+	case 0:
+		REG_UPDATE_2(SYMCLK32_SE_CNTL,
+				SYMCLK32_SE0_SRC_SEL, 0,
+				SYMCLK32_SE0_EN, 0);
+		if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) {
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+					SYMCLK32_SE0_GATE_DISABLE, 0);
+//			REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+//					SYMCLK32_ROOT_SE0_GATE_DISABLE, 0);
+		}
+		break;
+	case 1:
+		REG_UPDATE_2(SYMCLK32_SE_CNTL,
+				SYMCLK32_SE1_SRC_SEL, 0,
+				SYMCLK32_SE1_EN, 0);
+		if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) {
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+					SYMCLK32_SE1_GATE_DISABLE, 0);
+//			REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+//					SYMCLK32_ROOT_SE1_GATE_DISABLE, 0);
+		}
+		break;
+	case 2:
+		REG_UPDATE_2(SYMCLK32_SE_CNTL,
+				SYMCLK32_SE2_SRC_SEL, 0,
+				SYMCLK32_SE2_EN, 0);
+		if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) {
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+					SYMCLK32_SE2_GATE_DISABLE, 0);
+//			REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+//					SYMCLK32_ROOT_SE2_GATE_DISABLE, 0);
+		}
+		break;
+	case 3:
+		REG_UPDATE_2(SYMCLK32_SE_CNTL,
+				SYMCLK32_SE3_SRC_SEL, 0,
+				SYMCLK32_SE3_EN, 0);
+		if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) {
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+					SYMCLK32_SE3_GATE_DISABLE, 0);
+//			REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+//					SYMCLK32_ROOT_SE3_GATE_DISABLE, 0);
+		}
+		break;
+	default:
+		BREAK_TO_DEBUGGER();
+		return;
+	}
+}
+
 void dccg35_init(struct dccg *dccg)
 {
 	int otg_inst;
@@ -484,7 +572,7 @@ void dccg35_init(struct dccg *dccg)
 	 * will cause DCN to hang.
 	 */
 	for (otg_inst = 0; otg_inst < 4; otg_inst++)
-		dccg31_disable_symclk32_se(dccg, otg_inst);
+		dccg35_disable_symclk32_se(dccg, otg_inst);
 
 	if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le)
 		for (otg_inst = 0; otg_inst < 2; otg_inst++)
@@ -758,7 +846,7 @@ static const struct dccg_funcs dccg35_funcs = {
 	.dccg_init = dccg35_init,
 	.set_dpstreamclk = dccg35_set_dpstreamclk,
 	.enable_symclk32_se = dccg31_enable_symclk32_se,
-	.disable_symclk32_se = dccg31_disable_symclk32_se,
+	.disable_symclk32_se = dccg35_disable_symclk32_se,
 	.enable_symclk32_le = dccg31_enable_symclk32_le,
 	.disable_symclk32_le = dccg31_disable_symclk32_le,
 	.set_symclk32_le_root_clock_gating = dccg31_set_symclk32_le_root_clock_gating,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.h b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.h
index 423feb4c2f3f..1586a45ca3bd 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.h
@@ -34,6 +34,8 @@
 #define DCCG_REG_LIST_DCN35() \
 	DCCG_REG_LIST_DCN314(),\
 	SR(DPPCLK_CTRL),\
+	SR(DCCG_GATE_DISABLE_CNTL4),\
+	SR(DCCG_GATE_DISABLE_CNTL5),\
 	SR(DCCG_GATE_DISABLE_CNTL6),\
 	SR(DCCG_GLOBAL_FGCG_REP_CNTL),\
 	SR(SYMCLKA_CLOCK_ENABLE),\
@@ -174,7 +176,61 @@
 	DCCG_SF(SYMCLKB_CLOCK_ENABLE, SYMCLKB_FE_SRC_SEL, mask_sh),\
 	DCCG_SF(SYMCLKC_CLOCK_ENABLE, SYMCLKC_FE_SRC_SEL, mask_sh),\
 	DCCG_SF(SYMCLKD_CLOCK_ENABLE, SYMCLKD_FE_SRC_SEL, mask_sh),\
-	DCCG_SF(SYMCLKE_CLOCK_ENABLE, SYMCLKE_FE_SRC_SEL, mask_sh)
+	DCCG_SF(SYMCLKE_CLOCK_ENABLE, SYMCLKE_FE_SRC_SEL, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKA_FE_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKB_FE_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKC_FE_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKD_FE_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKE_FE_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK1_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK2_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK3_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL2, HDMICHARCLK0_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL4, HDMICHARCLK0_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL6, HDMISTREAMCLK0_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKA_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKB_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKC_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKD_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKE_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE0_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE1_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE2_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE3_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE0_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE1_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE0_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE1_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE2_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE3_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE0_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE1_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYA_REFCLK_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYB_REFCLK_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYC_REFCLK_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYD_REFCLK_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYE_REFCLK_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK0_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK1_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK2_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(HDMISTREAMCLK0_DTO_PARAM, HDMISTREAMCLK0_DTO_PHASE, mask_sh),\
+	DCCG_SF(HDMISTREAMCLK0_DTO_PARAM, HDMISTREAMCLK0_DTO_MODULO, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL, DISPCLK_DCCG_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, HDMISTREAMCLK0_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK0_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK1_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK2_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_GATE_DISABLE, mask_sh),\
 
 struct dccg *dccg35_create(
 		struct dc_context *ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c
index f91e08895275..81e349d5835b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c
@@ -184,8 +184,6 @@ void dcn35_link_encoder_construct(
 	enc10->base.hpd_source = init_data->hpd_source;
 	enc10->base.connector = init_data->connector;
 
-	if (enc10->base.connector.id == CONNECTOR_ID_USBC)
-		enc10->base.features.flags.bits.DP_IS_USB_C = 1;
 
 	enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
 
@@ -240,6 +238,8 @@ void dcn35_link_encoder_construct(
 	}
 
 	enc10->base.features.flags.bits.HDMI_6GB_EN = 1;
+	if (enc10->base.connector.id == CONNECTOR_ID_USBC)
+		enc10->base.features.flags.bits.DP_IS_USB_C = 1;
 
 	if (bp_funcs->get_connector_speed_cap_info)
 		result = bp_funcs->get_connector_speed_cap_info(enc10->base.ctx->dc_bios,
@@ -256,6 +256,10 @@ void dcn35_link_encoder_construct(
 		enc10->base.features.flags.bits.IS_UHBR10_CAPABLE = bp_cap_info.DP_UHBR10_EN;
 		enc10->base.features.flags.bits.IS_UHBR13_5_CAPABLE = bp_cap_info.DP_UHBR13_5_EN;
 		enc10->base.features.flags.bits.IS_UHBR20_CAPABLE = bp_cap_info.DP_UHBR20_EN;
+		if (bp_cap_info.DP_IS_USB_C) {
+			/*BIOS not switch to use CONNECTOR_ID_USBC = 24 yet*/
+			enc10->base.features.flags.bits.DP_IS_USB_C = 1;
+		}
 
 	} else {
 		DC_LOG_WARNING("%s: Failed to get encoder_cap_info from VBIOS with error code %d!\n",
@@ -264,4 +268,5 @@ void dcn35_link_encoder_construct(
 	}
 	if (enc10->base.ctx->dc->debug.hdmi20_disable)
 		enc10->base.features.flags.bits.HDMI_6GB_EN = 0;
+
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c
index 46f71ff08fd1..53bd0ae4bab5 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c
@@ -261,6 +261,7 @@ void pg_cntl35_hpo_pg_control(struct pg_cntl *pg_cntl, bool power_on)
 	uint32_t power_gate = power_on ? 0 : 1;
 	uint32_t pwr_status = power_on ? 0 : 2;
 	uint32_t org_ip_request_cntl;
+	uint32_t power_forceon;
 	bool block_enabled;
 
 	if (pg_cntl->ctx->dc->debug.ignore_pg ||
@@ -277,6 +278,10 @@ void pg_cntl35_hpo_pg_control(struct pg_cntl *pg_cntl, bool power_on)
 			return;
 	}
 
+	REG_GET(DOMAIN25_PG_CONFIG, DOMAIN_POWER_FORCEON, &power_forceon);
+	if (power_forceon)
+		return;
+
 	REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl);
 	if (org_ip_request_cntl == 0)
 		REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1);
@@ -304,6 +309,7 @@ void pg_cntl35_io_clk_pg_control(struct pg_cntl *pg_cntl, bool power_on)
 	uint32_t power_gate = power_on ? 0 : 1;
 	uint32_t pwr_status = power_on ? 0 : 2;
 	uint32_t org_ip_request_cntl;
+	uint32_t power_forceon;
 	bool block_enabled;
 
 	if (pg_cntl->ctx->dc->debug.ignore_pg ||
@@ -319,6 +325,10 @@ void pg_cntl35_io_clk_pg_control(struct pg_cntl *pg_cntl, bool power_on)
 			return;
 	}
 
+	REG_GET(DOMAIN22_PG_CONFIG, DOMAIN_POWER_FORCEON, &power_forceon);
+	if (power_forceon)
+		return;
+
 	REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl);
 	if (org_ip_request_cntl == 0)
 		REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1);
@@ -332,13 +342,6 @@ void pg_cntl35_io_clk_pg_control(struct pg_cntl *pg_cntl, bool power_on)
 	pg_cntl->pg_res_enable[PG_DCIO] = power_on;
 }
 
-void pg_cntl35_set_force_poweron_domain22(struct pg_cntl *pg_cntl, bool power_on)
-{
-	struct dcn_pg_cntl *pg_cntl_dcn = TO_DCN_PG_CNTL(pg_cntl);
-
-	REG_UPDATE(DOMAIN22_PG_CONFIG, DOMAIN_POWER_FORCEON, power_on ? 1 : 0);
-}
-
 static bool pg_cntl35_plane_otg_status(struct pg_cntl *pg_cntl)
 {
 	struct dcn_pg_cntl *pg_cntl_dcn = TO_DCN_PG_CNTL(pg_cntl);
@@ -508,8 +511,7 @@ static const struct pg_cntl_funcs pg_cntl35_funcs = {
 	.mpcc_pg_control = pg_cntl35_mpcc_pg_control,
 	.opp_pg_control = pg_cntl35_opp_pg_control,
 	.optc_pg_control = pg_cntl35_optc_pg_control,
-	.dwb_pg_control = pg_cntl35_dwb_pg_control,
-	.set_force_poweron_domain22 = pg_cntl35_set_force_poweron_domain22
+	.dwb_pg_control = pg_cntl35_dwb_pg_control
 };
 
 struct pg_cntl *pg_cntl35_create(
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.h b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.h
index 069dae08e222..3de240884d22 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.h
@@ -183,7 +183,6 @@ void pg_cntl35_optc_pg_control(struct pg_cntl *pg_cntl,
 	unsigned int optc_inst, bool power_on);
 void pg_cntl35_dwb_pg_control(struct pg_cntl *pg_cntl, bool power_on);
 void pg_cntl35_init_pg_status(struct pg_cntl *pg_cntl);
-void pg_cntl35_set_force_poweron_domain22(struct pg_cntl *pg_cntl, bool power_on);
 
 struct pg_cntl *pg_cntl35_create(
 	struct dc_context *ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/dm_helpers.h b/drivers/gpu/drm/amd/display/dc/dm_helpers.h
index 7ce9a5b6c33b..6d7a15dcf8a7 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_helpers.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_helpers.h
@@ -103,10 +103,16 @@ enum act_return_status dm_helpers_dp_mst_poll_for_allocation_change_trigger(
 /*
  * Sends ALLOCATE_PAYLOAD message.
  */
-bool dm_helpers_dp_mst_send_payload_allocation(
+void dm_helpers_dp_mst_send_payload_allocation(
 		struct dc_context *ctx,
-		const struct dc_stream_state *stream,
-		bool enable);
+		const struct dc_stream_state *stream);
+
+/*
+ * Update mst manager relevant variables
+ */
+void dm_helpers_dp_mst_update_mst_mgr_for_deallocation(
+		struct dc_context *ctx,
+		const struct dc_stream_state *stream);
 
 bool dm_helpers_dp_mst_start_top_mgr(
 		struct dc_context *ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
index 4440d08743aa..bd7ba0a25198 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
@@ -247,6 +247,7 @@ struct pp_smu_funcs_nv {
 #define PP_SMU_NUM_MEMCLK_DPM_LEVELS  4
 #define PP_SMU_NUM_DCLK_DPM_LEVELS    8
 #define PP_SMU_NUM_VCLK_DPM_LEVELS    8
+#define PP_SMU_NUM_VPECLK_DPM_LEVELS  8
 
 struct dpm_clock {
   uint32_t  Freq;    // In MHz
@@ -262,6 +263,7 @@ struct dpm_clocks {
 	struct dpm_clock MemClocks[PP_SMU_NUM_MEMCLK_DPM_LEVELS];
 	struct dpm_clock VClocks[PP_SMU_NUM_VCLK_DPM_LEVELS];
 	struct dpm_clock DClocks[PP_SMU_NUM_DCLK_DPM_LEVELS];
+	struct dpm_clock VPEClocks[PP_SMU_NUM_VPECLK_DPM_LEVELS];
 };
 
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index ea7d60f9a9b4..59ade76ffb18 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -61,18 +61,22 @@ endif
 endif
 
 ifneq ($(CONFIG_FRAME_WARN),0)
+ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y)
+frame_warn_flag := -Wframe-larger-than=3072
+else
 frame_warn_flag := -Wframe-larger-than=2048
 endif
+endif
 
 CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/dcn10/dcn10_fpu.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/dcn20_fpu.o := $(dml_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags) $(frame_warn_flag)
 CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags) $(frame_warn_flag)
 CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_ccflags) $(frame_warn_flag)
 CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) $(frame_warn_flag)
 CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags)
diff --git a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c
index 50b0434354f8..0c4a8fe8e5ca 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c
@@ -30,7 +30,7 @@
 #include "dcn_calc_auto.h"
 #include "dal_asic_id.h"
 #include "resource.h"
-#include "dcn10/dcn10_resource.h"
+#include "resource/dcn10/dcn10_resource.h"
 #include "dcn10/dcn10_hubbub.h"
 #include "dml/dml1_display_rq_dlg_calc.h"
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dc_features.h b/drivers/gpu/drm/amd/display/dc/dml/dc_features.h
index 2cbdd75429ff..6e669a2c5b2d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dc_features.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dc_features.h
@@ -36,7 +36,7 @@
  * Define the maximum amount of states supported by the ASIC. Every ASIC has a
  * specific number of states; this macro defines the maximum number of states.
  */
-#define DC__VOLTAGE_STATES 20
+#define DC__VOLTAGE_STATES 40
 #define DC__NUM_DPP__4 1
 #define DC__NUM_DPP__0_PRESENT 1
 #define DC__NUM_DPP__1_PRESENT 1
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
index 7fc8b18096ba..38ab9ad60ef8 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
@@ -33,6 +33,7 @@
 
 #include "link.h"
 #include "dcn20_fpu.h"
+#include "dc_state_priv.h"
 
 #define DC_LOGGER \
 	dc->ctx->logger
@@ -440,7 +441,115 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv14_soc = {
 	.use_urgent_burst_bw = 0
 };
 
-struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv12_soc = { 0 };
+struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv12_soc = {
+	.clock_limits = {
+		{
+			.state = 0,
+			.dcfclk_mhz = 560.0,
+			.fabricclk_mhz = 560.0,
+			.dispclk_mhz = 513.0,
+			.dppclk_mhz = 513.0,
+			.phyclk_mhz = 540.0,
+			.socclk_mhz = 560.0,
+			.dscclk_mhz = 171.0,
+			.dram_speed_mts = 1069.0,
+		},
+		{
+			.state = 1,
+			.dcfclk_mhz = 694.0,
+			.fabricclk_mhz = 694.0,
+			.dispclk_mhz = 642.0,
+			.dppclk_mhz = 642.0,
+			.phyclk_mhz = 600.0,
+			.socclk_mhz = 694.0,
+			.dscclk_mhz = 214.0,
+			.dram_speed_mts = 1324.0,
+		},
+		{
+			.state = 2,
+			.dcfclk_mhz = 875.0,
+			.fabricclk_mhz = 875.0,
+			.dispclk_mhz = 734.0,
+			.dppclk_mhz = 734.0,
+			.phyclk_mhz = 810.0,
+			.socclk_mhz = 875.0,
+			.dscclk_mhz = 245.0,
+			.dram_speed_mts = 1670.0,
+		},
+		{
+			.state = 3,
+			.dcfclk_mhz = 1000.0,
+			.fabricclk_mhz = 1000.0,
+			.dispclk_mhz = 1100.0,
+			.dppclk_mhz = 1100.0,
+			.phyclk_mhz = 810.0,
+			.socclk_mhz = 1000.0,
+			.dscclk_mhz = 367.0,
+			.dram_speed_mts = 2000.0,
+		},
+		{
+			.state = 4,
+			.dcfclk_mhz = 1200.0,
+			.fabricclk_mhz = 1200.0,
+			.dispclk_mhz = 1284.0,
+			.dppclk_mhz = 1284.0,
+			.phyclk_mhz = 810.0,
+			.socclk_mhz = 1200.0,
+			.dscclk_mhz = 428.0,
+			.dram_speed_mts = 2000.0,
+		},
+		{
+			.state = 5,
+			.dcfclk_mhz = 1200.0,
+			.fabricclk_mhz = 1200.0,
+			.dispclk_mhz = 1284.0,
+			.dppclk_mhz = 1284.0,
+			.phyclk_mhz = 810.0,
+			.socclk_mhz = 1200.0,
+			.dscclk_mhz = 428.0,
+			.dram_speed_mts = 2000.0,
+		},
+	},
+
+	.num_states = 5,
+	.sr_exit_time_us = 1.9,
+	.sr_enter_plus_exit_time_us = 4.4,
+	.urgent_latency_us = 3.0,
+	.urgent_latency_pixel_data_only_us = 4.0,
+	.urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+	.urgent_latency_vm_data_only_us = 4.0,
+	.urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+	.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+	.urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+	.pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 40.0,
+	.pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 40.0,
+	.pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0,
+	.max_avg_sdp_bw_use_normal_percent = 40.0,
+	.max_avg_dram_bw_use_normal_percent = 40.0,
+	.writeback_latency_us = 12.0,
+	.ideal_dram_bw_after_urgent_percent = 40.0,
+	.max_request_size_bytes = 256,
+	.dram_channel_width_bytes = 16,
+	.fabric_datapath_to_dcn_data_return_bytes = 64,
+	.dcn_downspread_percent = 0.5,
+	.downspread_percent = 0.5,
+	.dram_page_open_time_ns = 50.0,
+	.dram_rw_turnaround_time_ns = 17.5,
+	.dram_return_buffer_per_channel_bytes = 8192,
+	.round_trip_ping_latency_dcfclk_cycles = 131,
+	.urgent_out_of_order_return_per_channel_bytes = 4096,
+	.channel_interleave_bytes = 256,
+	.num_banks = 8,
+	.num_chans = 16,
+	.vmm_page_size_bytes = 4096,
+	.dram_clock_change_latency_us = 45.0,
+	.writeback_dram_clock_change_latency_us = 23.0,
+	.return_bus_width_bytes = 64,
+	.dispclk_dppclk_vco_speed_mhz = 3850,
+	.xfc_bus_transport_time_us = 20,
+	.xfc_xbuf_latency_tolerance_us = 50,
+	.use_urgent_burst_bw = 0,
+};
 
 struct _vcs_dpi_ip_params_st dcn2_1_ip = {
 	.odm_capable = 1,
@@ -950,10 +1059,8 @@ static enum dcn_zstate_support_state  decide_zstate_support(struct dc *dc, struc
 {
 	int plane_count;
 	int i;
-	unsigned int min_dst_y_next_start_us;
 
 	plane_count = 0;
-	min_dst_y_next_start_us = 0;
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		if (context->res_ctx.pipe_ctx[i].plane_state)
 			plane_count++;
@@ -975,26 +1082,15 @@ static enum dcn_zstate_support_state  decide_zstate_support(struct dc *dc, struc
 	else if (context->stream_count == 1 &&  context->streams[0]->signal == SIGNAL_TYPE_EDP) {
 		struct dc_link *link = context->streams[0]->sink->link;
 		struct dc_stream_status *stream_status = &context->stream_status[0];
-		struct dc_stream_state *current_stream = context->streams[0];
 		int minmum_z8_residency = dc->debug.minimum_z8_residency_time > 0 ? dc->debug.minimum_z8_residency_time : 1000;
 		bool allow_z8 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z8_residency;
 		bool is_pwrseq0 = link->link_index == 0;
-		bool isFreesyncVideo;
-
-		isFreesyncVideo = current_stream->adjust.v_total_min == current_stream->adjust.v_total_max;
-		isFreesyncVideo = isFreesyncVideo && current_stream->timing.v_total < current_stream->adjust.v_total_min;
-		for (i = 0; i < dc->res_pool->pipe_count; i++) {
-			if (context->res_ctx.pipe_ctx[i].stream == current_stream && isFreesyncVideo) {
-				min_dst_y_next_start_us = context->res_ctx.pipe_ctx[i].dlg_regs.min_dst_y_next_start_us;
-				break;
-			}
-		}
 
 		/* Don't support multi-plane configurations */
 		if (stream_status->plane_count > 1)
 			return DCN_ZSTATE_SUPPORT_DISALLOW;
 
-		if (is_pwrseq0 && (context->bw_ctx.dml.vba.StutterPeriod > 5000.0 || min_dst_y_next_start_us > 5000))
+		if (is_pwrseq0 && context->bw_ctx.dml.vba.StutterPeriod > 5000.0)
 			return DCN_ZSTATE_SUPPORT_ALLOW;
 		else if (is_pwrseq0 && link->psr_settings.psr_version == DC_PSR_VERSION_1 && !link->panel_config.psr.disable_psr)
 			return allow_z8 ? DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY : DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY;
@@ -1087,7 +1183,7 @@ void dcn20_calculate_dlg_params(struct dc *dc,
 		pipes[pipe_idx].pipe.dest.vupdate_width = get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
 		pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
 
-		if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) {
+		if (dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) == SUBVP_PHANTOM) {
 			// Phantom pipe requires that DET_SIZE = 0 and no unbounded requests
 			context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 0;
 			context->res_ctx.pipe_ctx[i].unbounded_req = false;
@@ -1437,7 +1533,7 @@ int dcn20_populate_dml_pipes_from_context(struct dc *dc,
 		 */
 		if (res_ctx->pipe_ctx[i].plane_state &&
 				(res_ctx->pipe_ctx[i].plane_state->address.type == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE ||
-				 res_ctx->pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM))
+				dc_state_get_pipe_subvp_type(context, &res_ctx->pipe_ctx[i]) == SUBVP_PHANTOM))
 			pipes[pipe_cnt].pipe.src.num_cursors = 0;
 		else
 			pipes[pipe_cnt].pipe.src.num_cursors = dc->dml.ip.number_of_cursors;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
index 3686f1e7de3a..63c48c29ba49 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
@@ -3542,7 +3542,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 {
 	struct vba_vars_st *v = &mode_lib->vba;
 	int MinPrefetchMode, MaxPrefetchMode;
-	int i;
+	int i, start_state;
 	unsigned int j, k, m;
 	bool   EnoughWritebackUnits = true;
 	bool   WritebackModeSupport = true;
@@ -3553,6 +3553,11 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 
 	/*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
 
+	if (mode_lib->validate_max_state)
+		start_state = v->soc.num_states - 1;
+	else
+		start_state = 0;
+
 	CalculateMinAndMaxPrefetchMode(
 		mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
 		&MinPrefetchMode, &MaxPrefetchMode);
@@ -3851,7 +3856,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 			v->SingleDPPViewportSizeSupportPerPlane,
 			&v->ViewportSizeSupport[0][0]);
 
-	for (i = 0; i < v->soc.num_states; i++) {
+	for (i = start_state; i < v->soc.num_states; i++) {
 		for (j = 0; j < 2; j++) {
 			v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
 			v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
@@ -4007,7 +4012,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 
 	/*Total Available Pipes Support Check*/
 
-	for (i = 0; i < v->soc.num_states; i++) {
+	for (i = start_state; i < v->soc.num_states; i++) {
 		for (j = 0; j < 2; j++) {
 			if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
 				v->TotalAvailablePipesSupport[i][j] = true;
@@ -4046,7 +4051,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 		}
 	}
 
-	for (i = 0; i < v->soc.num_states; i++) {
+	for (i = start_state; i < v->soc.num_states; i++) {
 		for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
 			v->RequiresDSC[i][k] = false;
 			v->RequiresFEC[i][k] = false;
@@ -4174,7 +4179,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 			}
 		}
 	}
-	for (i = 0; i < v->soc.num_states; i++) {
+	for (i = start_state; i < v->soc.num_states; i++) {
 		v->DIOSupport[i] = true;
 		for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
 			if (!v->skip_dio_check[k] && v->BlendingAndTiming[k] == k && (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_hdmi)
@@ -4185,7 +4190,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 		}
 	}
 
-	for (i = 0; i < v->soc.num_states; ++i) {
+	for (i = start_state; i < v->soc.num_states; ++i) {
 		v->ODMCombine4To1SupportCheckOK[i] = true;
 		for (k = 0; k < v->NumberOfActivePlanes; ++k) {
 			if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
@@ -4197,7 +4202,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 
 	/* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
 
-	for (i = 0; i < v->soc.num_states; i++) {
+	for (i = start_state; i < v->soc.num_states; i++) {
 		v->NotEnoughDSCUnits[i] = false;
 		v->TotalDSCUnitsRequired = 0.0;
 		for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
@@ -4217,7 +4222,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 	}
 	/*DSC Delay per state*/
 
-	for (i = 0; i < v->soc.num_states; i++) {
+	for (i = start_state; i < v->soc.num_states; i++) {
 		for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
 			if (v->OutputBppPerState[i][k] == BPP_INVALID) {
 				v->BPP = 0.0;
@@ -4333,7 +4338,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 		v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
 	}
 
-	for (i = 0; i < v->soc.num_states; i++) {
+	for (i = start_state; i < v->soc.num_states; i++) {
 		for (j = 0; j < 2; j++) {
 			for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
 				v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
@@ -5075,7 +5080,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 
 	/*PTE Buffer Size Check*/
 
-	for (i = 0; i < v->soc.num_states; i++) {
+	for (i = start_state; i < v->soc.num_states; i++) {
 		for (j = 0; j < 2; j++) {
 			v->PTEBufferSizeNotExceeded[i][j] = true;
 			for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
@@ -5136,7 +5141,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 	}
 	/*Mode Support, Voltage State and SOC Configuration*/
 
-	for (i = v->soc.num_states - 1; i >= 0; i--) {
+	for (i = v->soc.num_states - 1; i >= start_state; i--) {
 		for (j = 0; j < 2; j++) {
 			if (v->ScaleRatioAndTapsSupport == 1 && v->SourceFormatPixelAndScanSupport == 1 && v->ViewportSizeSupport[i][j] == 1
 					&& v->DIOSupport[i] == 1 && v->ODMCombine4To1SupportCheckOK[i] == 1
@@ -5158,7 +5163,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 	}
 	{
 		unsigned int MaximumMPCCombine = 0;
-		for (i = v->soc.num_states; i >= 0; i--) {
+		for (i = v->soc.num_states; i >= start_state; i--) {
 			if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
 				v->VoltageLevel = i;
 				v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 9ec4172d1c2d..a0a65e099104 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -32,6 +32,7 @@
 #include "clk_mgr/dcn32/dcn32_smu13_driver_if.h"
 #include "dcn30/dcn30_resource.h"
 #include "link.h"
+#include "dc_state_priv.h"
 
 #define DC_LOGGER_INIT(logger)
 
@@ -45,6 +46,14 @@ static const struct subvp_high_refresh_list subvp_high_refresh_list = {
 				{.width = 1920, .height = 1080, }},
 };
 
+static const struct subvp_active_margin_list subvp_active_margin_list = {
+			.min_refresh = 55,
+			.max_refresh = 65,
+			.res = {
+				{.width = 2560, .height = 1440, },
+				{.width = 1920, .height = 1080, }},
+};
+
 struct _vcs_dpi_ip_params_st dcn3_2_ip = {
 	.gpuvm_enable = 0,
 	.gpuvm_max_page_table_levels = 4,
@@ -333,7 +342,7 @@ void dcn32_helper_populate_phantom_dlg_params(struct dc *dc,
 		if (!pipe->stream)
 			continue;
 
-		if (pipe->plane_state && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+		if (pipe->plane_state && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) {
 			pipes[pipe_idx].pipe.dest.vstartup_start =
 				get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
 			pipes[pipe_idx].pipe.dest.vupdate_offset =
@@ -616,7 +625,7 @@ static bool dcn32_assign_subvp_pipe(struct dc *dc,
 		if (pipe->plane_state && !pipe->top_pipe && !dcn32_is_center_timing(pipe) &&
 				!(pipe->stream->timing.pix_clk_100hz / 10000 > DCN3_2_MAX_SUBVP_PIXEL_RATE_MHZ) &&
 				(!dcn32_is_psr_capable(pipe) || (context->stream_count == 1 && dc->caps.dmub_caps.subvp_psr)) &&
-				pipe->stream->mall_stream_config.type == SUBVP_NONE &&
+				dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_NONE &&
 				(refresh_rate < 120 || dcn32_allow_subvp_high_refresh_rate(dc, context, pipe)) &&
 				!pipe->plane_state->address.tmz_surface &&
 				(vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] <= 0 ||
@@ -674,7 +683,7 @@ static bool dcn32_enough_pipes_for_subvp(struct dc *dc, struct dc_state *context
 
 		// Find the minimum pipe split count for non SubVP pipes
 		if (resource_is_pipe_type(pipe, OPP_HEAD) &&
-		    pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+			dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_NONE) {
 			split_cnt = 0;
 			while (pipe) {
 				split_cnt++;
@@ -727,8 +736,8 @@ static bool subvp_subvp_schedulable(struct dc *dc, struct dc_state *context)
 		 * and also to store the two main SubVP pipe pointers in subvp_pipes[2].
 		 */
 		if (pipe->stream && pipe->plane_state && !pipe->top_pipe &&
-		    pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
-			phantom = pipe->stream->mall_stream_config.paired_stream;
+			dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) {
+			phantom = dc_state_get_paired_subvp_stream(context, pipe->stream);
 			microschedule_lines = (phantom->timing.v_total - phantom->timing.v_front_porch) +
 					phantom->timing.v_addressable;
 
@@ -796,6 +805,9 @@ static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context)
 	int16_t stretched_drr_us = 0;
 	int16_t drr_stretched_vblank_us = 0;
 	int16_t max_vblank_mallregion = 0;
+	struct dc_stream_state *phantom_stream;
+	bool subvp_found = false;
+	bool drr_found = false;
 
 	// Find SubVP pipe
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
@@ -808,8 +820,10 @@ static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context)
 			continue;
 
 		// Find the SubVP pipe
-		if (pipe->stream->mall_stream_config.type == SUBVP_MAIN)
+		if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) {
+			subvp_found = true;
 			break;
+		}
 	}
 
 	// Find the DRR pipe
@@ -817,32 +831,37 @@ static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context)
 		drr_pipe = &context->res_ctx.pipe_ctx[i];
 
 		// We check for master pipe only
-		if (!resource_is_pipe_type(pipe, OTG_MASTER) ||
-				!resource_is_pipe_type(pipe, DPP_PIPE))
+		if (!resource_is_pipe_type(drr_pipe, OTG_MASTER) ||
+				!resource_is_pipe_type(drr_pipe, DPP_PIPE))
 			continue;
 
-		if (drr_pipe->stream->mall_stream_config.type == SUBVP_NONE && drr_pipe->stream->ignore_msa_timing_param &&
-				(drr_pipe->stream->allow_freesync || drr_pipe->stream->vrr_active_variable || drr_pipe->stream->vrr_active_fixed))
+		if (dc_state_get_pipe_subvp_type(context, drr_pipe) == SUBVP_NONE && drr_pipe->stream->ignore_msa_timing_param &&
+				(drr_pipe->stream->allow_freesync || drr_pipe->stream->vrr_active_variable || drr_pipe->stream->vrr_active_fixed)) {
+			drr_found = true;
 			break;
+		}
 	}
 
-	main_timing = &pipe->stream->timing;
-	phantom_timing = &pipe->stream->mall_stream_config.paired_stream->timing;
-	drr_timing = &drr_pipe->stream->timing;
-	prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total /
-			(double)(phantom_timing->pix_clk_100hz * 100) * 1000000 +
-			dc->caps.subvp_prefetch_end_to_mall_start_us;
-	subvp_active_us = main_timing->v_addressable * main_timing->h_total /
-			(double)(main_timing->pix_clk_100hz * 100) * 1000000;
-	drr_frame_us = drr_timing->v_total * drr_timing->h_total /
-			(double)(drr_timing->pix_clk_100hz * 100) * 1000000;
-	// P-State allow width and FW delays already included phantom_timing->v_addressable
-	mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total /
-			(double)(phantom_timing->pix_clk_100hz * 100) * 1000000;
-	stretched_drr_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US;
-	drr_stretched_vblank_us = (drr_timing->v_total - drr_timing->v_addressable) * drr_timing->h_total /
-			(double)(drr_timing->pix_clk_100hz * 100) * 1000000 + (stretched_drr_us - drr_frame_us);
-	max_vblank_mallregion = drr_stretched_vblank_us > mall_region_us ? drr_stretched_vblank_us : mall_region_us;
+	if (subvp_found && drr_found) {
+		phantom_stream = dc_state_get_paired_subvp_stream(context, pipe->stream);
+		main_timing = &pipe->stream->timing;
+		phantom_timing = &phantom_stream->timing;
+		drr_timing = &drr_pipe->stream->timing;
+		prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total /
+				(double)(phantom_timing->pix_clk_100hz * 100) * 1000000 +
+				dc->caps.subvp_prefetch_end_to_mall_start_us;
+		subvp_active_us = main_timing->v_addressable * main_timing->h_total /
+				(double)(main_timing->pix_clk_100hz * 100) * 1000000;
+		drr_frame_us = drr_timing->v_total * drr_timing->h_total /
+				(double)(drr_timing->pix_clk_100hz * 100) * 1000000;
+		// P-State allow width and FW delays already included phantom_timing->v_addressable
+		mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total /
+				(double)(phantom_timing->pix_clk_100hz * 100) * 1000000;
+		stretched_drr_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US;
+		drr_stretched_vblank_us = (drr_timing->v_total - drr_timing->v_addressable) * drr_timing->h_total /
+				(double)(drr_timing->pix_clk_100hz * 100) * 1000000 + (stretched_drr_us - drr_frame_us);
+		max_vblank_mallregion = drr_stretched_vblank_us > mall_region_us ? drr_stretched_vblank_us : mall_region_us;
+	}
 
 	/* We consider SubVP + DRR schedulable if the stretched frame duration of the DRR display (i.e. the
 	 * highest refresh rate + margin that can support UCLK P-State switch) passes the static analysis
@@ -887,6 +906,8 @@ static bool subvp_vblank_schedulable(struct dc *dc, struct dc_state *context)
 	struct dc_crtc_timing *main_timing = NULL;
 	struct dc_crtc_timing *phantom_timing = NULL;
 	struct dc_crtc_timing *vblank_timing = NULL;
+	struct dc_stream_state *phantom_stream;
+	enum mall_stream_type pipe_mall_type;
 
 	/* For SubVP + VBLANK/DRR cases, we assume there can only be
 	 * a single VBLANK/DRR display. If DML outputs SubVP + VBLANK
@@ -896,6 +917,7 @@ static bool subvp_vblank_schedulable(struct dc *dc, struct dc_state *context)
 	 */
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		pipe = &context->res_ctx.pipe_ctx[i];
+		pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe);
 
 		// We check for master pipe, but it shouldn't matter since we only need
 		// the pipe for timing info (stream should be same for any pipe splits)
@@ -903,18 +925,19 @@ static bool subvp_vblank_schedulable(struct dc *dc, struct dc_state *context)
 				!resource_is_pipe_type(pipe, DPP_PIPE))
 			continue;
 
-		if (!found && pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+		if (!found && pipe_mall_type == SUBVP_NONE) {
 			// Found pipe which is not SubVP or Phantom (i.e. the VBLANK pipe).
 			vblank_index = i;
 			found = true;
 		}
 
-		if (!subvp_pipe && pipe->stream->mall_stream_config.type == SUBVP_MAIN)
+		if (!subvp_pipe && pipe_mall_type == SUBVP_MAIN)
 			subvp_pipe = pipe;
 	}
 	if (found) {
+		phantom_stream = dc_state_get_paired_subvp_stream(context, subvp_pipe->stream);
 		main_timing = &subvp_pipe->stream->timing;
-		phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing;
+		phantom_timing = &phantom_stream->timing;
 		vblank_timing = &context->res_ctx.pipe_ctx[vblank_index].stream->timing;
 		// Prefetch time is equal to VACTIVE + BP + VSYNC of the phantom pipe
 		// Also include the prefetch end to mallstart delay time
@@ -969,7 +992,7 @@ static bool subvp_subvp_admissable(struct dc *dc,
 			continue;
 
 		if (pipe->plane_state && !pipe->top_pipe &&
-				pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
+				dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) {
 			refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 +
 				pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1);
 			refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total);
@@ -1018,23 +1041,23 @@ static bool subvp_validate_static_schedulability(struct dc *dc,
 
 	for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+		enum mall_stream_type pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe);
 
 		if (!pipe->stream)
 			continue;
 
 		if (pipe->plane_state && !pipe->top_pipe) {
-			if (pipe->stream->mall_stream_config.type == SUBVP_MAIN)
+			if (pipe_mall_type == SUBVP_MAIN)
 				subvp_count++;
-			if (pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+			if (pipe_mall_type == SUBVP_NONE)
 				non_subvp_pipes++;
-			}
 		}
 
 		// Count how many planes that aren't SubVP/phantom are capable of VACTIVE
 		// switching (SubVP + VACTIVE unsupported). In situations where we force
 		// SubVP for a VACTIVE plane, we don't want to increment the vactive_count.
 		if (vba->ActiveDRAMClockChangeLatencyMarginPerState[vlevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] > 0 &&
-		    pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+				pipe_mall_type == SUBVP_NONE) {
 			vactive_count++;
 		}
 		pipe_idx++;
@@ -1070,7 +1093,7 @@ static void assign_subvp_index(struct dc *dc, struct dc_state *context)
 		struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
 
 		if (resource_is_pipe_type(pipe_ctx, OTG_MASTER) &&
-				pipe_ctx->stream->mall_stream_config.type == SUBVP_MAIN) {
+				dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_MAIN) {
 			pipe_ctx->subvp_index = index++;
 		} else {
 			pipe_ctx->subvp_index = 0;
@@ -1089,7 +1112,7 @@ struct pipe_slice_table {
 		struct pipe_ctx *pri_pipe;
 		struct dc_plane_state *plane;
 		int slice_count;
-	} mpc_combines[MAX_SURFACES];
+	} mpc_combines[MAX_PLANES];
 	int mpc_combine_count;
 };
 
@@ -1192,13 +1215,16 @@ static bool update_pipe_slice_table_with_split_flags(
 	 */
 	struct pipe_ctx *pipe;
 	bool odm;
-	int i;
+	int dc_pipe_idx, dml_pipe_idx = 0;
 	bool updated = false;
 
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		pipe = &context->res_ctx.pipe_ctx[i];
+	for (dc_pipe_idx = 0;
+			dc_pipe_idx < dc->res_pool->pipe_count; dc_pipe_idx++) {
+		pipe = &context->res_ctx.pipe_ctx[dc_pipe_idx];
+		if (resource_is_pipe_type(pipe, FREE_PIPE))
+			continue;
 
-		if (merge[i]) {
+		if (merge[dc_pipe_idx]) {
 			if (resource_is_pipe_type(pipe, OPP_HEAD))
 				/* merging OPP head means reducing ODM slice
 				 * count by 1
@@ -1213,17 +1239,18 @@ static bool update_pipe_slice_table_with_split_flags(
 			updated = true;
 		}
 
-		if (split[i]) {
-			odm = vba->ODMCombineEnabled[vba->pipe_plane[i]] !=
+		if (split[dc_pipe_idx]) {
+			odm = vba->ODMCombineEnabled[vba->pipe_plane[dml_pipe_idx]] !=
 					dm_odm_combine_mode_disabled;
 			if (odm && resource_is_pipe_type(pipe, OPP_HEAD))
 				update_slice_table_for_stream(
-						table, pipe->stream, split[i] - 1);
+						table, pipe->stream, split[dc_pipe_idx] - 1);
 			else if (!odm && resource_is_pipe_type(pipe, DPP_PIPE))
 				update_slice_table_for_plane(table, pipe,
-						pipe->plane_state, split[i] - 1);
+						pipe->plane_state, split[dc_pipe_idx] - 1);
 			updated = true;
 		}
+		dml_pipe_idx++;
 	}
 	return updated;
 }
@@ -1233,15 +1260,11 @@ static void update_pipes_with_slice_table(struct dc *dc, struct dc_state *contex
 {
 	int i;
 
-	for (i = 0; i < table->odm_combine_count; i++) {
+	for (i = 0; i < table->odm_combine_count; i++)
 		resource_update_pipes_for_stream_with_slice_count(context,
 				dc->current_state, dc->res_pool,
 				table->odm_combines[i].stream,
 				table->odm_combines[i].slice_count);
-		/* TODO: move this into the function above */
-		dcn20_build_mapped_resource(dc, context,
-				table->odm_combines[i].stream);
-	}
 
 	for (i = 0; i < table->mpc_combine_count; i++)
 		resource_update_pipes_for_plane_with_slice_count(context,
@@ -1265,7 +1288,7 @@ static bool update_pipes_with_split_flags(struct dc *dc, struct dc_state *contex
 	return updated;
 }
 
-static bool should_allow_odm_power_optimization(struct dc *dc,
+static bool should_apply_odm_power_optimization(struct dc *dc,
 		struct dc_state *context, struct vba_vars_st *v, int *split,
 		bool *merge)
 {
@@ -1369,9 +1392,12 @@ static void try_odm_power_optimization_and_revalidate(
 {
 	int i;
 	unsigned int new_vlevel;
+	unsigned int cur_policy[MAX_PIPES];
 
-	for (i = 0; i < pipe_cnt; i++)
+	for (i = 0; i < pipe_cnt; i++) {
+		cur_policy[i] = pipes[i].pipe.dest.odm_combine_policy;
 		pipes[i].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1;
+	}
 
 	new_vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt);
 
@@ -1380,6 +1406,9 @@ static void try_odm_power_optimization_and_revalidate(
 		memset(merge, 0, MAX_PIPES * sizeof(bool));
 		*vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, new_vlevel, split, merge);
 		context->bw_ctx.dml.vba.VoltageLevel = *vlevel;
+	} else {
+		for (i = 0; i < pipe_cnt; i++)
+			pipes[i].pipe.dest.odm_combine_policy = cur_policy[i];
 	}
 }
 
@@ -1408,6 +1437,7 @@ static void dcn32_full_validate_bw_helper(struct dc *dc,
 	unsigned int dc_pipe_idx = 0;
 	int i = 0;
 	bool found_supported_config = false;
+	int vlevel_temp = 0;
 
 	dc_assert_fp_enabled();
 
@@ -1440,13 +1470,15 @@ static void dcn32_full_validate_bw_helper(struct dc *dc,
 	 */
 	if (!dc->debug.force_disable_subvp && !dc->caps.dmub_caps.gecc_enable && dcn32_all_pipes_have_stream_and_plane(dc, context) &&
 	    !dcn32_mpo_in_use(context) && !dcn32_any_surfaces_rotated(dc, context) && !is_test_pattern_enabled(context) &&
-		(*vlevel == context->bw_ctx.dml.soc.num_states ||
+		(*vlevel == context->bw_ctx.dml.soc.num_states || (vba->DRAMSpeedPerState[*vlevel] != vba->DRAMSpeedPerState[0] &&
+				vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] != dm_dram_clock_change_unsupported) ||
 	    vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported ||
 	    dc->debug.force_subvp_mclk_switch)) {
 
 		dcn32_merge_pipes_for_subvp(dc, context);
 		memset(merge, 0, MAX_PIPES * sizeof(bool));
 
+		vlevel_temp = *vlevel;
 		/* to re-initialize viewport after the pipe merge */
 		for (i = 0; i < dc->res_pool->pipe_count; i++) {
 			struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
@@ -1515,10 +1547,14 @@ static void dcn32_full_validate_bw_helper(struct dc *dc,
 			}
 		}
 
+		if (vba->DRAMSpeedPerState[*vlevel] >= vba->DRAMSpeedPerState[vlevel_temp])
+			found_supported_config = false;
+
 		// If SubVP pipe config is unsupported (or cannot be used for UCLK switching)
 		// remove phantom pipes and repopulate dml pipes
 		if (!found_supported_config) {
-			dc->res_pool->funcs->remove_phantom_pipes(dc, context, false);
+			dc_state_remove_phantom_streams_and_planes(dc, context);
+			dc_state_release_phantom_streams_and_planes(dc, context);
 			vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] = dm_dram_clock_change_unsupported;
 			*pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, false);
 
@@ -1550,7 +1586,7 @@ static void dcn32_full_validate_bw_helper(struct dc *dc,
 		}
 	}
 
-	if (should_allow_odm_power_optimization(dc, context, vba, split, merge))
+	if (should_apply_odm_power_optimization(dc, context, vba, split, merge))
 		try_odm_power_optimization_and_revalidate(
 				dc, context, pipes, split, merge, vlevel, *pipe_cnt);
 
@@ -1670,7 +1706,7 @@ static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context,
 		pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt,
 				pipe_idx);
 
-		if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) {
+		if (dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) == SUBVP_PHANTOM) {
 			// Phantom pipe requires that DET_SIZE = 0 and no unbounded requests
 			context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 0;
 			context->res_ctx.pipe_ctx[i].unbounded_req = false;
@@ -1702,7 +1738,7 @@ static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context,
 				context->res_ctx.pipe_ctx[i].plane_state != context->res_ctx.pipe_ctx[i].top_pipe->plane_state) &&
 				context->res_ctx.pipe_ctx[i].prev_odm_pipe == NULL) {
 			/* SS: all active surfaces stored in MALL */
-			if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type != SUBVP_PHANTOM) {
+			if (dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) != SUBVP_PHANTOM) {
 				context->bw_ctx.bw.dcn.mall_ss_size_bytes += context->res_ctx.pipe_ctx[i].surface_size_in_mall_bytes;
 
 				if (context->res_ctx.pipe_ctx[i].stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED) {
@@ -1916,7 +1952,8 @@ bool dcn32_internal_validate_bw(struct dc *dc,
 		return false;
 
 	// For each full update, remove all existing phantom pipes first
-	dc->res_pool->funcs->remove_phantom_pipes(dc, context, fast_validate);
+	dc_state_remove_phantom_streams_and_planes(dc, context);
+	dc_state_release_phantom_streams_and_planes(dc, context);
 
 	dc->res_pool->funcs->update_soc_for_wm_a(dc, context);
 
@@ -2178,6 +2215,8 @@ bool dcn32_internal_validate_bw(struct dc *dc,
 		int i;
 
 		pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate);
+		if (!dc->config.enable_windowed_mpo_odm)
+			dcn32_update_dml_pipes_odm_policy_based_on_context(dc, context, pipes);
 
 		/* repopulate_pipes = 1 means the pipes were either split or merged. In this case
 		 * we have to re-calculate the DET allocation and run through DML once more to
@@ -2186,7 +2225,9 @@ bool dcn32_internal_validate_bw(struct dc *dc,
 		 * */
 		context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
 					dm_prefetch_support_uclk_fclk_and_stutter_if_possible;
+
 		vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt);
+
 		if (vlevel == context->bw_ctx.dml.soc.num_states) {
 			/* failed after DET size changes */
 			goto validate_fail;
@@ -2231,6 +2272,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
 	int i, pipe_idx, vlevel_temp = 0;
 	double dcfclk = dcn3_2_soc.clock_limits[0].dcfclk_mhz;
 	double dcfclk_from_validation = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
+	double dram_speed_from_validation = context->bw_ctx.dml.vba.DRAMSpeed;
 	double dcfclk_from_fw_based_mclk_switching = dcfclk_from_validation;
 	bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] !=
 			dm_dram_clock_change_unsupported;
@@ -2418,7 +2460,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
 	}
 
 	if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) {
-		min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed;
+		min_dram_speed_mts = dram_speed_from_validation;
 		min_dram_speed_mts_margin = 160;
 
 		context->bw_ctx.dml.soc.dram_clock_change_latency_us =
@@ -3294,25 +3336,24 @@ bool dcn32_allow_subvp_with_active_margin(struct pipe_ctx *pipe)
 {
 	bool allow = false;
 	uint32_t refresh_rate = 0;
+	uint32_t min_refresh = subvp_active_margin_list.min_refresh;
+	uint32_t max_refresh = subvp_active_margin_list.max_refresh;
+	uint32_t i;
 
-	/* Allow subvp on displays that have active margin for 2560x1440@60hz displays
-	 * only for now. There must be no scaling as well.
-	 *
-	 * For now we only enable on 2560x1440@60hz displays to enable 4K60 + 1440p60 configs
-	 * for p-state switching.
-	 */
-	if (pipe->stream && pipe->plane_state) {
-		refresh_rate = (pipe->stream->timing.pix_clk_100hz * 100 +
-						pipe->stream->timing.v_total * pipe->stream->timing.h_total - 1)
-						/ (double)(pipe->stream->timing.v_total * pipe->stream->timing.h_total);
-		if (pipe->stream->timing.v_addressable == 1440 &&
-				pipe->stream->timing.h_addressable == 2560 &&
-				refresh_rate >= 55 && refresh_rate <= 65 &&
-				pipe->plane_state->src_rect.height == 1440 &&
-				pipe->plane_state->src_rect.width == 2560 &&
-				pipe->plane_state->dst_rect.height == 1440 &&
-				pipe->plane_state->dst_rect.width == 2560)
+	for (i = 0; i < SUBVP_ACTIVE_MARGIN_LIST_LEN; i++) {
+		uint32_t width = subvp_active_margin_list.res[i].width;
+		uint32_t height = subvp_active_margin_list.res[i].height;
+
+		refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 +
+			pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1);
+		refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total);
+		refresh_rate = div_u64(refresh_rate, pipe->stream->timing.h_total);
+
+		if (refresh_rate >= min_refresh && refresh_rate <= max_refresh &&
+				dcn32_check_native_scaling_for_res(pipe, width, height)) {
 			allow = true;
+			break;
+		}
 	}
 	return allow;
 }
@@ -3431,7 +3472,15 @@ void dcn32_assign_fpo_vactive_candidate(struct dc *dc, const struct dc_state *co
 	for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
 		const struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 
-		if (!pipe->stream)
+		/* In DCN32/321, FPO uses per-pipe P-State force.
+		 * If there's no planes, HUBP is power gated and
+		 * therefore programming UCLK_PSTATE_FORCE does
+		 * nothing (P-State will always be asserted naturally
+		 * on a pipe that has HUBP power gated. Therefore we
+		 * only want to enable FPO if the FPO pipe has both
+		 * a stream and a plane.
+		 */
+		if (!pipe->stream || !pipe->plane_state)
 			continue;
 
 		if (vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] <= 0) {
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
index cbdfb762c10c..6c84b0fa40f4 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
@@ -813,6 +813,8 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
 					(v->DRAMSpeedPerState[mode_lib->vba.VoltageLevel] <= MEM_STROBE_FREQ_MHZ ||
 						v->DCFCLKPerState[mode_lib->vba.VoltageLevel] <= DCFCLK_FREQ_EXTRA_PREFETCH_REQ_MHZ) ?
 							mode_lib->vba.ip.min_prefetch_in_strobe_us : 0,
+					mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] > 0 || mode_lib->vba.DRAMClockChangeRequirementFinal == false,
+
 					/* Output */
 					&v->DSTXAfterScaler[k],
 					&v->DSTYAfterScaler[k],
@@ -3317,6 +3319,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 							v->SwathHeightCThisState[k], v->TWait,
 							(v->DRAMSpeedPerState[i] <= MEM_STROBE_FREQ_MHZ || v->DCFCLKState[i][j] <= DCFCLK_FREQ_EXTRA_PREFETCH_REQ_MHZ) ?
 									mode_lib->vba.ip.min_prefetch_in_strobe_us : 0,
+							mode_lib->vba.PrefetchModePerState[i][j] > 0 || mode_lib->vba.DRAMClockChangeRequirementFinal == false,
 
 							/* Output */
 							&v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTXAfterScaler[k],
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
index d940dfa5ae43..80fccd4999a5 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
@@ -3423,6 +3423,7 @@ bool dml32_CalculatePrefetchSchedule(
 		unsigned int SwathHeightC,
 		double TWait,
 		double TPreReq,
+		bool ExtendPrefetchIfPossible,
 		/* Output */
 		double   *DSTXAfterScaler,
 		double   *DSTYAfterScaler,
@@ -3892,12 +3893,32 @@ bool dml32_CalculatePrefetchSchedule(
 			/* Clamp to oto for bandwidth calculation */
 			LinesForPrefetchBandwidth = dst_y_prefetch_oto;
 		} else {
-			*DestinationLinesForPrefetch = dst_y_prefetch_equ;
-			TimeForFetchingMetaPTE = Tvm_equ;
-			TimeForFetchingRowInVBlank = Tr0_equ;
-			*PrefetchBandwidth = prefetch_bw_equ;
-			/* Clamp to equ for bandwidth calculation */
-			LinesForPrefetchBandwidth = dst_y_prefetch_equ;
+			/* For mode programming we want to extend the prefetch as much as possible
+			 * (up to oto, or as long as we can for equ) if we're not already applying
+			 * the 60us prefetch requirement. This is to avoid intermittent underflow
+			 * issues during prefetch.
+			 *
+			 * The prefetch extension is applied under the following scenarios:
+			 * 1. We're in prefetch mode > 0 (i.e. we don't support MCLK switch in blank)
+			 * 2. We're using subvp or drr methods of p-state switch, in which case we
+			 *    we don't care if prefetch takes up more of the blanking time
+			 *
+			 * Mode programming typically chooses the smallest prefetch time possible
+			 * (i.e. highest bandwidth during prefetch) presumably to create margin between
+			 * p-states / c-states that happen in vblank and prefetch. Therefore we only
+			 * apply this prefetch extension when p-state in vblank is not required (UCLK
+			 * p-states take up the most vblank time).
+			 */
+			if (ExtendPrefetchIfPossible && TPreReq == 0 && VStartup < MaxVStartup) {
+				MyError = true;
+			} else {
+				*DestinationLinesForPrefetch = dst_y_prefetch_equ;
+				TimeForFetchingMetaPTE = Tvm_equ;
+				TimeForFetchingRowInVBlank = Tr0_equ;
+				*PrefetchBandwidth = prefetch_bw_equ;
+				/* Clamp to equ for bandwidth calculation */
+				LinesForPrefetchBandwidth = dst_y_prefetch_equ;
+			}
 		}
 
 		*DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
index 592d174df6c6..5d34735df83d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
@@ -747,6 +747,7 @@ bool dml32_CalculatePrefetchSchedule(
 		unsigned int SwathHeightC,
 		double TWait,
 		double TPreReq,
+		bool ExtendPrefetchIfPossible,
 		/* Output */
 		double   *DSTXAfterScaler,
 		double   *DSTYAfterScaler,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
index a5fe523668e9..7ea2bd5374d5 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
@@ -124,7 +124,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = {
 			.phyclk_mhz = 600.0,
 			.phyclk_d18_mhz = 667.0,
 			.dscclk_mhz = 186.0,
-			.dtbclk_mhz = 625.0,
+			.dtbclk_mhz = 600.0,
 		},
 		{
 			.state = 1,
@@ -133,7 +133,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = {
 			.phyclk_mhz = 810.0,
 			.phyclk_d18_mhz = 667.0,
 			.dscclk_mhz = 209.0,
-			.dtbclk_mhz = 625.0,
+			.dtbclk_mhz = 600.0,
 		},
 		{
 			.state = 2,
@@ -142,7 +142,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = {
 			.phyclk_mhz = 810.0,
 			.phyclk_d18_mhz = 667.0,
 			.dscclk_mhz = 209.0,
-			.dtbclk_mhz = 625.0,
+			.dtbclk_mhz = 600.0,
 		},
 		{
 			.state = 3,
@@ -151,7 +151,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = {
 			.phyclk_mhz = 810.0,
 			.phyclk_d18_mhz = 667.0,
 			.dscclk_mhz = 371.0,
-			.dtbclk_mhz = 625.0,
+			.dtbclk_mhz = 600.0,
 		},
 		{
 			.state = 4,
@@ -160,15 +160,15 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = {
 			.phyclk_mhz = 810.0,
 			.phyclk_d18_mhz = 667.0,
 			.dscclk_mhz = 417.0,
-			.dtbclk_mhz = 625.0,
+			.dtbclk_mhz = 600.0,
 		},
 	},
 	.num_states = 5,
-	.sr_exit_time_us = 9.0,
-	.sr_enter_plus_exit_time_us = 11.0,
-	.sr_exit_z8_time_us = 50.0, /*changed from 442.0*/
-	.sr_enter_plus_exit_z8_time_us = 50.0,/*changed from 560.0*/
-	.fclk_change_latency_us = 20.0,
+	.sr_exit_time_us = 28.0,
+	.sr_enter_plus_exit_time_us = 30.0,
+	.sr_exit_z8_time_us = 210.0,
+	.sr_enter_plus_exit_z8_time_us = 320.0,
+	.fclk_change_latency_us = 24.0,
 	.usr_retraining_latency_us = 2,
 	.writeback_latency_us = 12.0,
 
@@ -326,9 +326,74 @@ void dcn35_update_bw_bounding_box_fpu(struct dc *dc,
 		dcn3_5_soc.dram_clock_change_latency_us =
 			dc->debug.dram_clock_change_latency_ns / 1000.0;
 	}
+
+	if (dc->bb_overrides.dram_clock_change_latency_ns > 0)
+		dcn3_5_soc.dram_clock_change_latency_us =
+			dc->bb_overrides.dram_clock_change_latency_ns / 1000.0;
+
+	if (dc->bb_overrides.sr_exit_time_ns > 0)
+		dcn3_5_soc.sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0;
+
+	if (dc->bb_overrides.sr_enter_plus_exit_time_ns > 0)
+		dcn3_5_soc.sr_enter_plus_exit_time_us =
+			dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0;
+
+	if (dc->bb_overrides.sr_exit_z8_time_ns > 0)
+		dcn3_5_soc.sr_exit_z8_time_us = dc->bb_overrides.sr_exit_z8_time_ns / 1000.0;
+
+	if (dc->bb_overrides.sr_enter_plus_exit_z8_time_ns > 0)
+		dcn3_5_soc.sr_enter_plus_exit_z8_time_us =
+			dc->bb_overrides.sr_enter_plus_exit_z8_time_ns / 1000.0;
+
 	/*temp till dml2 fully work without dml1*/
 	dml_init_instance(&dc->dml, &dcn3_5_soc, &dcn3_5_ip,
 				DML_PROJECT_DCN31);
+
+	/*copy to dml2, before dml2_create*/
+	if (clk_table->num_entries > 2) {
+
+		for (i = 0; i < clk_table->num_entries; i++) {
+			dc->dml2_options.bbox_overrides.clks_table.num_states =
+				clk_table->num_entries;
+			dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dcfclk_mhz =
+				clock_limits[i].dcfclk_mhz;
+			dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].fclk_mhz =
+				clock_limits[i].fabricclk_mhz;
+			dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dispclk_mhz =
+				clock_limits[i].dispclk_mhz;
+			dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dppclk_mhz =
+				clock_limits[i].dppclk_mhz;
+			dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].socclk_mhz =
+				clock_limits[i].socclk_mhz;
+			dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].memclk_mhz =
+				clk_table->entries[i].memclk_mhz * clk_table->entries[i].wck_ratio;
+			dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz =
+				clock_limits[i].dtbclk_mhz;
+			dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels =
+				clk_table->num_entries;
+			dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_fclk_levels =
+				clk_table->num_entries;
+			dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dispclk_levels =
+				clk_table->num_entries;
+			dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dppclk_levels =
+				clk_table->num_entries;
+			dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_socclk_levels =
+				clk_table->num_entries;
+			dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_memclk_levels =
+				clk_table->num_entries;
+			dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dtbclk_levels =
+				clk_table->num_entries;
+		}
+	}
+
+	/* Update latency values */
+	dc->dml2_options.bbox_overrides.dram_clock_change_latency_us = dcn3_5_soc.dram_clock_change_latency_us;
+
+	dc->dml2_options.bbox_overrides.sr_exit_latency_us = dcn3_5_soc.sr_exit_time_us;
+	dc->dml2_options.bbox_overrides.sr_enter_plus_exit_latency_us = dcn3_5_soc.sr_enter_plus_exit_time_us;
+
+	dc->dml2_options.bbox_overrides.sr_exit_z8_time_us = dcn3_5_soc.sr_exit_z8_time_us;
+	dc->dml2_options.bbox_overrides.sr_enter_plus_exit_z8_time_us = dcn3_5_soc.sr_enter_plus_exit_z8_time_us;
 }
 
 static bool is_dual_plane(enum surface_pixel_format format)
@@ -507,3 +572,37 @@ int dcn35_populate_dml_pipes_from_context_fpu(struct dc *dc,
 
 	return pipe_cnt;
 }
+
+void dcn35_decide_zstate_support(struct dc *dc, struct dc_state *context)
+{
+	enum dcn_zstate_support_state support = DCN_ZSTATE_SUPPORT_DISALLOW;
+	unsigned int i, plane_count = 0;
+
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		if (context->res_ctx.pipe_ctx[i].plane_state)
+			plane_count++;
+	}
+
+	if (plane_count == 0) {
+		support = DCN_ZSTATE_SUPPORT_ALLOW;
+	} else if (plane_count == 1 && context->stream_count == 1 && context->streams[0]->signal == SIGNAL_TYPE_EDP) {
+		struct dc_link *link = context->streams[0]->sink->link;
+		bool is_pwrseq0 = link && link->link_index == 0;
+		bool is_psr1 = link && link->psr_settings.psr_version == DC_PSR_VERSION_1 && !link->panel_config.psr.disable_psr;
+		int minmum_z8_residency =
+			dc->debug.minimum_z8_residency_time > 0 ? dc->debug.minimum_z8_residency_time : 1000;
+		bool allow_z8 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z8_residency;
+		int minmum_z10_residency =
+			dc->debug.minimum_z10_residency_time > 0 ? dc->debug.minimum_z10_residency_time : 5000;
+		bool allow_z10 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z10_residency;
+
+		if (is_pwrseq0 && allow_z10)
+			support = DCN_ZSTATE_SUPPORT_ALLOW;
+		else if (is_pwrseq0 && is_psr1)
+			support = allow_z8 ? DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY : DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY;
+		else if (allow_z8)
+			support = DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY;
+	}
+
+	context->bw_ctx.bw.dcn.clk.zstate_support = support;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h
index e8d5a170893e..067480fc3691 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h
@@ -39,4 +39,6 @@ int dcn35_populate_dml_pipes_from_context_fpu(struct dc *dc,
 					      display_e2e_pipe_params_st *pipes,
 					      bool fast_validate);
 
+void dcn35_decide_zstate_support(struct dc *dc, struct dc_state *context);
+
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
index 510be909cd75..9be5ebf3a8c0 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
@@ -5420,7 +5420,7 @@ static void CalculateOutputLink(
 					*OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
 												OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
 
-					if (OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
+					if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
 						*RequiresDSC = true;
 						LinkDSCEnable = true;
 						*OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
@@ -6229,7 +6229,7 @@ static void set_calculate_prefetch_schedule_params(struct display_mode_lib_st *m
 				CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable;
 				CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
 				CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
-				CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes;
+				CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
 				CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k];
 				CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled;
 				CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k];
@@ -6329,7 +6329,7 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib)
 				mode_lib->ms.NoOfDPPThisState,
 				mode_lib->ms.dpte_group_bytes,
 				s->HostVMInefficiencyFactor,
-				mode_lib->ms.soc.hostvm_min_page_size_kbytes,
+				mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
 				mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels);
 
 		s->NextMaxVStartup = s->MaxVStartupAllPlanes[j];
@@ -6542,7 +6542,7 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib)
 						mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
 						mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels,
 						mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
-						mode_lib->ms.soc.hostvm_min_page_size_kbytes,
+						mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
 						mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k],
 						mode_lib->ms.MetaRowBytes[j][k],
 						mode_lib->ms.DPTEBytesPerRow[j][k],
@@ -7687,7 +7687,7 @@ dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib)
 		CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
 		CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
 		CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes;
-		CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes;
+		CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
 		CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn;
 		CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode;
 		CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = mode_lib->ms.PTEBufferSizeNotExceededPerState;
@@ -7957,7 +7957,7 @@ dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib)
 		UseMinimumDCFCLK_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
 		UseMinimumDCFCLK_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
 		UseMinimumDCFCLK_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
-		UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes;
+		UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
 		UseMinimumDCFCLK_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
 		UseMinimumDCFCLK_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled;
 		UseMinimumDCFCLK_params->ImmediateFlipRequirement = s->ImmediateFlipRequiredFinal;
@@ -8699,7 +8699,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc
 	CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
 	CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
 	CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes;
-	CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes;
+	CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
 	CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn;
 	CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode;
 	CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = s->dummy_boolean_array[0];
@@ -8805,7 +8805,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc
 			mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
 			locals->dpte_group_bytes,
 			s->HostVMInefficiencyFactor,
-			mode_lib->ms.soc.hostvm_min_page_size_kbytes,
+			mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
 			mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels);
 
 	locals->TCalc = 24.0 / locals->DCFCLKDeepSleep;
@@ -8995,7 +8995,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc
 			CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable;
 			CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
 			CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
-			CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes;
+			CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
 			CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k];
 			CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled;
 			CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k];
@@ -9240,7 +9240,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc
 						mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
 						mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels,
 						mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
-						mode_lib->ms.soc.hostvm_min_page_size_kbytes,
+						mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
 						locals->PDEAndMetaPTEBytesFrame[k],
 						locals->MetaRowByte[k],
 						locals->PixelPTEBytesPerRow[k],
@@ -9446,13 +9446,13 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc
 		CalculateWatermarks_params->CompressedBufferSizeInkByte = locals->CompressedBufferSizeInkByte;
 
 		// Output
-		CalculateWatermarks_params->Watermark = &s->dummy_watermark; // Watermarks *Watermark
-		CalculateWatermarks_params->DRAMClockChangeSupport = &mode_lib->ms.support.DRAMClockChangeSupport[j];
-		CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0][0]; // dml_float_t *MaxActiveDRAMClockChangeLatencySupported[]
-		CalculateWatermarks_params->SubViewportLinesNeededInMALL = &mode_lib->ms.SubViewportLinesNeededInMALL[j]; // dml_uint_t SubViewportLinesNeededInMALL[]
-		CalculateWatermarks_params->FCLKChangeSupport = &mode_lib->ms.support.FCLKChangeSupport[j];
-		CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // dml_float_t *MaxActiveFCLKChangeLatencySupported
-		CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport[j];
+		CalculateWatermarks_params->Watermark = &locals->Watermark; // Watermarks *Watermark
+		CalculateWatermarks_params->DRAMClockChangeSupport = &locals->DRAMClockChangeSupport;
+		CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = locals->MaxActiveDRAMClockChangeLatencySupported; // dml_float_t *MaxActiveDRAMClockChangeLatencySupported[]
+		CalculateWatermarks_params->SubViewportLinesNeededInMALL = locals->SubViewportLinesNeededInMALL; // dml_uint_t SubViewportLinesNeededInMALL[]
+		CalculateWatermarks_params->FCLKChangeSupport = &locals->FCLKChangeSupport;
+		CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &locals->MaxActiveFCLKChangeLatencySupported; // dml_float_t *MaxActiveFCLKChangeLatencySupported
+		CalculateWatermarks_params->USRRetrainingSupport = &locals->USRRetrainingSupport;
 
 		CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
 			&mode_lib->scratch,
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c
index 1a2b24cc6b61..0baf39d64a2d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c
@@ -772,18 +772,29 @@ static unsigned int get_mpc_factor(struct dml2_context *ctx,
 		const struct dc_state *state,
 		const struct dml_display_cfg_st *disp_cfg,
 		struct dml2_dml_to_dc_pipe_mapping *mapping,
-		const struct dc_stream_status *status, unsigned int stream_id,
+		const struct dc_stream_status *status,
+		const struct dc_stream_state *stream,
 		int plane_idx)
 {
 	unsigned int plane_id;
 	unsigned int cfg_idx;
+	unsigned int mpc_factor;
 
-	get_plane_id(ctx, state, status->plane_states[plane_idx], stream_id, plane_idx, &plane_id);
+	get_plane_id(ctx, state, status->plane_states[plane_idx],
+			stream->stream_id, plane_idx, &plane_id);
 	cfg_idx = find_disp_cfg_idx_by_plane_id(mapping, plane_id);
-	if (ctx->architecture == dml2_architecture_20)
-		return (unsigned int)disp_cfg->hw.DPPPerSurface[cfg_idx];
-	ASSERT(false);
-	return 1;
+	if (ctx->architecture == dml2_architecture_20) {
+		mpc_factor = (unsigned int)disp_cfg->hw.DPPPerSurface[cfg_idx];
+	} else {
+		mpc_factor = 1;
+		ASSERT(false);
+	}
+
+	/* For stereo timings, we need to pipe split */
+	if (dml2_is_stereo_timing(stream))
+		mpc_factor = 2;
+
+	return mpc_factor;
 }
 
 static unsigned int get_odm_factor(
@@ -820,14 +831,13 @@ static void populate_mpc_factors_for_stream(
 		unsigned int mpc_factors[MAX_PIPES])
 {
 	const struct dc_stream_status *status = &state->stream_status[stream_idx];
-	unsigned int stream_id = state->streams[stream_idx]->stream_id;
 	int i;
 
 	for (i = 0; i < status->plane_count; i++)
 		if (odm_factor == 1)
 			mpc_factors[i] = get_mpc_factor(
 					ctx, state, disp_cfg, mapping, status,
-					stream_id, i);
+					state->streams[stream_idx], i);
 		else
 			mpc_factors[i] = 1;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_types.h
index e85866db80ff..7ca7f2a743c2 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_types.h
@@ -38,5 +38,6 @@
 #include "core_types.h"
 #include "dsc.h"
 #include "clk_mgr.h"
+#include "dc_state_priv.h"
 
 #endif //__DML2_DC_TYPES_H__
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c
index 32f8a43af3d6..282d70e2b18a 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c
@@ -51,7 +51,7 @@ unsigned int dml2_helper_calculate_num_ways_for_subvp(struct dml2_context *ctx,
 
 		// Find the phantom pipes
 		if (pipe->stream && pipe->plane_state && !pipe->top_pipe && !pipe->prev_odm_pipe &&
-				pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+				ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) {
 			bytes_per_pixel = pipe->plane_state->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4;
 			mblk_width = ctx->config.mall_cfg.mblk_width_pixels;
 			mblk_height = bytes_per_pixel == 4 ? mblk_width = ctx->config.mall_cfg.mblk_height_4bpe_pixels : ctx->config.mall_cfg.mblk_height_8bpe_pixels;
@@ -253,7 +253,7 @@ static bool assign_subvp_pipe(struct dml2_context *ctx, struct dc_state *context
 		 *   to combine this with SubVP can cause issues with the scheduling).
 		 */
 		if (pipe->plane_state && !pipe->top_pipe &&
-				pipe->stream->mall_stream_config.type == SUBVP_NONE && refresh_rate < 120 &&
+				ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe) == SUBVP_NONE && refresh_rate < 120 &&
 				vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] <= 0) {
 			while (pipe) {
 				num_pipes++;
@@ -317,7 +317,7 @@ static bool enough_pipes_for_subvp(struct dml2_context *ctx, struct dc_state *st
 
 		// Find the minimum pipe split count for non SubVP pipes
 		if (pipe->stream && !pipe->top_pipe &&
-		    pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+				ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(state, pipe) == SUBVP_NONE) {
 			split_cnt = 0;
 			while (pipe) {
 				split_cnt++;
@@ -372,8 +372,8 @@ static bool subvp_subvp_schedulable(struct dml2_context *ctx, struct dc_state *c
 		 * and also to store the two main SubVP pipe pointers in subvp_pipes[2].
 		 */
 		if (pipe->stream && pipe->plane_state && !pipe->top_pipe &&
-		    pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
-			phantom = pipe->stream->mall_stream_config.paired_stream;
+				ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) {
+			phantom = ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, pipe->stream);
 			microschedule_lines = (phantom->timing.v_total - phantom->timing.v_front_porch) +
 					phantom->timing.v_addressable;
 
@@ -435,6 +435,7 @@ bool dml2_svp_drr_schedulable(struct dml2_context *ctx, struct dc_state *context
 	struct pipe_ctx *pipe = NULL;
 	struct dc_crtc_timing *main_timing = NULL;
 	struct dc_crtc_timing *phantom_timing = NULL;
+	struct dc_stream_state *phantom_stream;
 	int16_t prefetch_us = 0;
 	int16_t mall_region_us = 0;
 	int16_t drr_frame_us = 0;	// nominal frame time
@@ -453,12 +454,13 @@ bool dml2_svp_drr_schedulable(struct dml2_context *ctx, struct dc_state *context
 			continue;
 
 		// Find the SubVP pipe
-		if (pipe->stream->mall_stream_config.type == SUBVP_MAIN)
+		if (ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe) == SUBVP_MAIN)
 			break;
 	}
 
+	phantom_stream = ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, pipe->stream);
 	main_timing = &pipe->stream->timing;
-	phantom_timing = &pipe->stream->mall_stream_config.paired_stream->timing;
+	phantom_timing = &phantom_stream->timing;
 	prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total /
 			(double)(phantom_timing->pix_clk_100hz * 100) * 1000000 +
 			ctx->config.svp_pstate.subvp_prefetch_end_to_mall_start_us;
@@ -519,6 +521,8 @@ static bool subvp_vblank_schedulable(struct dml2_context *ctx, struct dc_state *
 	struct dc_crtc_timing *main_timing = NULL;
 	struct dc_crtc_timing *phantom_timing = NULL;
 	struct dc_crtc_timing *vblank_timing = NULL;
+	struct dc_stream_state *phantom_stream;
+	enum mall_stream_type pipe_mall_type;
 
 	/* For SubVP + VBLANK/DRR cases, we assume there can only be
 	 * a single VBLANK/DRR display. If DML outputs SubVP + VBLANK
@@ -528,19 +532,20 @@ static bool subvp_vblank_schedulable(struct dml2_context *ctx, struct dc_state *
 	 */
 	for (i = 0; i < ctx->config.dcn_pipe_count; i++) {
 		pipe = &context->res_ctx.pipe_ctx[i];
+		pipe_mall_type = ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe);
 
 		// We check for master pipe, but it shouldn't matter since we only need
 		// the pipe for timing info (stream should be same for any pipe splits)
 		if (!pipe->stream || !pipe->plane_state || pipe->top_pipe || pipe->prev_odm_pipe)
 			continue;
 
-		if (!found && pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+		if (!found && pipe_mall_type == SUBVP_NONE) {
 			// Found pipe which is not SubVP or Phantom (i.e. the VBLANK pipe).
 			vblank_index = i;
 			found = true;
 		}
 
-		if (!subvp_pipe && pipe->stream->mall_stream_config.type == SUBVP_MAIN)
+		if (!subvp_pipe && pipe_mall_type == SUBVP_MAIN)
 			subvp_pipe = pipe;
 	}
 	// Use ignore_msa_timing_param flag to identify as DRR
@@ -548,8 +553,9 @@ static bool subvp_vblank_schedulable(struct dml2_context *ctx, struct dc_state *
 		// SUBVP + DRR case
 		schedulable = dml2_svp_drr_schedulable(ctx, context, &context->res_ctx.pipe_ctx[vblank_index].stream->timing);
 	} else if (found) {
+		phantom_stream = ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, subvp_pipe->stream);
 		main_timing = &subvp_pipe->stream->timing;
-		phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing;
+		phantom_timing = &phantom_stream->timing;
 		vblank_timing = &context->res_ctx.pipe_ctx[vblank_index].stream->timing;
 		// Prefetch time is equal to VACTIVE + BP + VSYNC of the phantom pipe
 		// Also include the prefetch end to mallstart delay time
@@ -602,19 +608,20 @@ bool dml2_svp_validate_static_schedulability(struct dml2_context *ctx, struct dc
 
 	for (i = 0, pipe_idx = 0; i < ctx->config.dcn_pipe_count; i++) {
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+		enum mall_stream_type pipe_mall_type = ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe);
 
 		if (!pipe->stream)
 			continue;
 
 		if (pipe->plane_state && !pipe->top_pipe &&
-				pipe->stream->mall_stream_config.type == SUBVP_MAIN)
+				pipe_mall_type == SUBVP_MAIN)
 			subvp_count++;
 
 		// Count how many planes that aren't SubVP/phantom are capable of VACTIVE
 		// switching (SubVP + VACTIVE unsupported). In situations where we force
 		// SubVP for a VACTIVE plane, we don't want to increment the vactive_count.
 		if (vba->ActiveDRAMClockChangeLatencyMargin[vba->pipe_plane[pipe_idx]] > 0 &&
-		    pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+		    pipe_mall_type == SUBVP_NONE) {
 			vactive_count++;
 		}
 		pipe_idx++;
@@ -708,14 +715,10 @@ static void set_phantom_stream_timing(struct dml2_context *ctx, struct dc_state
 static struct dc_stream_state *enable_phantom_stream(struct dml2_context *ctx, struct dc_state *state, unsigned int dc_pipe_idx, unsigned int svp_height, unsigned int vstartup)
 {
 	struct pipe_ctx *ref_pipe = &state->res_ctx.pipe_ctx[dc_pipe_idx];
-	struct dc_stream_state *phantom_stream = ctx->config.svp_pstate.callbacks.create_stream_for_sink(ref_pipe->stream->sink);
-
-	phantom_stream->signal = SIGNAL_TYPE_VIRTUAL;
-	phantom_stream->dpms_off = true;
-	phantom_stream->mall_stream_config.type = SUBVP_PHANTOM;
-	phantom_stream->mall_stream_config.paired_stream = ref_pipe->stream;
-	ref_pipe->stream->mall_stream_config.type = SUBVP_MAIN;
-	ref_pipe->stream->mall_stream_config.paired_stream = phantom_stream;
+	struct dc_stream_state *phantom_stream = ctx->config.svp_pstate.callbacks.create_phantom_stream(
+			ctx->config.svp_pstate.callbacks.dc,
+			state,
+			ref_pipe->stream);
 
 	/* stream has limited viewport and small timing */
 	memcpy(&phantom_stream->timing, &ref_pipe->stream->timing, sizeof(phantom_stream->timing));
@@ -723,7 +726,10 @@ static struct dc_stream_state *enable_phantom_stream(struct dml2_context *ctx, s
 	memcpy(&phantom_stream->dst, &ref_pipe->stream->dst, sizeof(phantom_stream->dst));
 	set_phantom_stream_timing(ctx, state, ref_pipe, phantom_stream, dc_pipe_idx, svp_height, vstartup);
 
-	ctx->config.svp_pstate.callbacks.add_stream_to_ctx(ctx->config.svp_pstate.callbacks.dc, state, phantom_stream);
+	ctx->config.svp_pstate.callbacks.add_phantom_stream(ctx->config.svp_pstate.callbacks.dc,
+			state,
+			phantom_stream,
+			ref_pipe->stream);
 	return phantom_stream;
 }
 
@@ -740,7 +746,10 @@ static void enable_phantom_plane(struct dml2_context *ctx,
 		if (curr_pipe->top_pipe && curr_pipe->top_pipe->plane_state == curr_pipe->plane_state) {
 			phantom_plane = prev_phantom_plane;
 		} else {
-			phantom_plane = ctx->config.svp_pstate.callbacks.create_plane(ctx->config.svp_pstate.callbacks.dc);
+			phantom_plane = ctx->config.svp_pstate.callbacks.create_phantom_plane(
+					ctx->config.svp_pstate.callbacks.dc,
+					state,
+					curr_pipe->plane_state);
 		}
 
 		memcpy(&phantom_plane->address, &curr_pipe->plane_state->address, sizeof(phantom_plane->address));
@@ -763,9 +772,7 @@ static void enable_phantom_plane(struct dml2_context *ctx,
 		phantom_plane->clip_rect.y = 0;
 		phantom_plane->clip_rect.height = phantom_stream->timing.v_addressable;
 
-		phantom_plane->is_phantom = true;
-
-		ctx->config.svp_pstate.callbacks.add_plane_to_context(ctx->config.svp_pstate.callbacks.dc, phantom_stream, phantom_plane, state);
+		ctx->config.svp_pstate.callbacks.add_phantom_plane(ctx->config.svp_pstate.callbacks.dc, phantom_stream, phantom_plane, state);
 
 		curr_pipe = curr_pipe->bottom_pipe;
 		prev_phantom_plane = phantom_plane;
@@ -790,7 +797,7 @@ static void add_phantom_pipes_for_main_pipe(struct dml2_context *ctx, struct dc_
 		// We determine which phantom pipes were added by comparing with
 		// the phantom stream.
 		if (pipe->plane_state && pipe->stream && pipe->stream == phantom_stream &&
-				pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+				ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(state, pipe) == SUBVP_PHANTOM) {
 			pipe->stream->use_dynamic_meta = false;
 			pipe->plane_state->flip_immediate = false;
 			if (!ctx->config.svp_pstate.callbacks.build_scaling_params(pipe)) {
@@ -800,7 +807,7 @@ static void add_phantom_pipes_for_main_pipe(struct dml2_context *ctx, struct dc_
 	}
 }
 
-static bool remove_all_planes_for_stream(struct dml2_context *ctx, struct dc_stream_state *stream, struct dc_state *context)
+static bool remove_all_phantom_planes_for_stream(struct dml2_context *ctx, struct dc_stream_state *stream, struct dc_state *context)
 {
 	int i, old_plane_count;
 	struct dc_stream_status *stream_status = NULL;
@@ -821,9 +828,11 @@ static bool remove_all_planes_for_stream(struct dml2_context *ctx, struct dc_str
 	for (i = 0; i < old_plane_count; i++)
 		del_planes[i] = stream_status->plane_states[i];
 
-	for (i = 0; i < old_plane_count; i++)
-		if (!ctx->config.svp_pstate.callbacks.remove_plane_from_context(ctx->config.svp_pstate.callbacks.dc, stream, del_planes[i], context))
+	for (i = 0; i < old_plane_count; i++) {
+		if (!ctx->config.svp_pstate.callbacks.remove_phantom_plane(ctx->config.svp_pstate.callbacks.dc, stream, del_planes[i], context))
 			return false;
+		ctx->config.svp_pstate.callbacks.release_phantom_plane(ctx->config.svp_pstate.callbacks.dc, context, del_planes[i]);
+	}
 
 	return true;
 }
@@ -832,35 +841,21 @@ bool dml2_svp_remove_all_phantom_pipes(struct dml2_context *ctx, struct dc_state
 {
 	int i;
 	bool removed_pipe = false;
-	struct dc_plane_state *phantom_plane = NULL;
 	struct dc_stream_state *phantom_stream = NULL;
 
 	for (i = 0; i < ctx->config.dcn_pipe_count; i++) {
 		struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i];
 		// build scaling params for phantom pipes
-		if (pipe->plane_state && pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
-			phantom_plane = pipe->plane_state;
+		if (pipe->plane_state && pipe->stream && ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(state, pipe) == SUBVP_PHANTOM) {
 			phantom_stream = pipe->stream;
 
-			remove_all_planes_for_stream(ctx, pipe->stream, state);
-			ctx->config.svp_pstate.callbacks.remove_stream_from_ctx(ctx->config.svp_pstate.callbacks.dc, state, pipe->stream);
-
-			/* Ref count is incremented on allocation and also when added to the context.
-			 * Therefore we must call release for the the phantom plane and stream once
-			 * they are removed from the ctx to finally decrement the refcount to 0 to free.
-			 */
-			ctx->config.svp_pstate.callbacks.plane_state_release(phantom_plane);
-			ctx->config.svp_pstate.callbacks.stream_release(phantom_stream);
+			remove_all_phantom_planes_for_stream(ctx, phantom_stream, state);
+			ctx->config.svp_pstate.callbacks.remove_phantom_stream(ctx->config.svp_pstate.callbacks.dc, state, phantom_stream);
+			ctx->config.svp_pstate.callbacks.release_phantom_stream(ctx->config.svp_pstate.callbacks.dc, state, phantom_stream);
 
 			removed_pipe = true;
 		}
 
-		// Clear all phantom stream info
-		if (pipe->stream) {
-			pipe->stream->mall_stream_config.type = SUBVP_NONE;
-			pipe->stream->mall_stream_config.paired_stream = NULL;
-		}
-
 		if (pipe->plane_state) {
 			pipe->plane_state->is_phantom = false;
 		}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
index 75171bee6f71..1ba6933d2b36 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
@@ -341,25 +341,42 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc,
 		break;
 	}
 
-	/* Override from passed values, mainly for debugging purposes, if available */
-	if (dml2->config.bbox_overrides.sr_exit_latency_us) {
-		p->in_states->state_array[0].sr_exit_time_us = dml2->config.bbox_overrides.sr_exit_latency_us;
-	}
+	/* Override from passed values, if available */
+	for (i = 0; i < p->in_states->num_states; i++) {
+		if (dml2->config.bbox_overrides.sr_exit_latency_us) {
+			p->in_states->state_array[i].sr_exit_time_us =
+				dml2->config.bbox_overrides.sr_exit_latency_us;
+		}
 
-	if (dml2->config.bbox_overrides.sr_enter_plus_exit_latency_us) {
-		p->in_states->state_array[0].sr_enter_plus_exit_time_us = dml2->config.bbox_overrides.sr_enter_plus_exit_latency_us;
-	}
+		if (dml2->config.bbox_overrides.sr_enter_plus_exit_latency_us) {
+			p->in_states->state_array[i].sr_enter_plus_exit_time_us =
+				dml2->config.bbox_overrides.sr_enter_plus_exit_latency_us;
+		}
 
-	if (dml2->config.bbox_overrides.urgent_latency_us) {
-		p->in_states->state_array[0].urgent_latency_pixel_data_only_us = dml2->config.bbox_overrides.urgent_latency_us;
-	}
+		if (dml2->config.bbox_overrides.sr_exit_z8_time_us) {
+			p->in_states->state_array[i].sr_exit_z8_time_us =
+				dml2->config.bbox_overrides.sr_exit_z8_time_us;
+		}
 
-	if (dml2->config.bbox_overrides.dram_clock_change_latency_us) {
-		p->in_states->state_array[0].dram_clock_change_latency_us = dml2->config.bbox_overrides.dram_clock_change_latency_us;
-	}
+		if (dml2->config.bbox_overrides.sr_enter_plus_exit_z8_time_us) {
+			p->in_states->state_array[i].sr_enter_plus_exit_z8_time_us =
+				dml2->config.bbox_overrides.sr_enter_plus_exit_z8_time_us;
+		}
+
+		if (dml2->config.bbox_overrides.urgent_latency_us) {
+			p->in_states->state_array[i].urgent_latency_pixel_data_only_us =
+				dml2->config.bbox_overrides.urgent_latency_us;
+		}
+
+		if (dml2->config.bbox_overrides.dram_clock_change_latency_us) {
+			p->in_states->state_array[i].dram_clock_change_latency_us =
+				dml2->config.bbox_overrides.dram_clock_change_latency_us;
+		}
 
-	if (dml2->config.bbox_overrides.fclk_change_latency_us) {
-		p->in_states->state_array[0].fclk_change_latency_us = dml2->config.bbox_overrides.fclk_change_latency_us;
+		if (dml2->config.bbox_overrides.fclk_change_latency_us) {
+			p->in_states->state_array[i].fclk_change_latency_us =
+				dml2->config.bbox_overrides.fclk_change_latency_us;
+		}
 	}
 
 	/* DCFCLK stas values are project specific */
@@ -381,7 +398,6 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc,
 	/* Copy clocks tables entries, if available */
 	if (dml2->config.bbox_overrides.clks_table.num_states) {
 		p->in_states->num_states = dml2->config.bbox_overrides.clks_table.num_states;
-
 		for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels; i++) {
 			p->in_states->state_array[i].dcfclk_mhz = dml2->config.bbox_overrides.clks_table.clk_entries[i].dcfclk_mhz;
 		}
@@ -406,8 +422,9 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc,
 		}
 
 		for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dtbclk_levels; i++) {
-			p->in_states->state_array[i].dtbclk_mhz =
-				dml2->config.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz;
+			if (dml2->config.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz > 0)
+				p->in_states->state_array[i].dtbclk_mhz =
+					dml2->config.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz;
 		}
 
 		for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dispclk_levels; i++) {
@@ -419,6 +436,14 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc,
 	}
 
 	dml2_policy_build_synthetic_soc_states(s, p);
+	if (dml2->v20.dml_core_ctx.project == dml_project_dcn35 ||
+		dml2->v20.dml_core_ctx.project == dml_project_dcn351) {
+		// Override last out_state with data from last in_state
+		// This will ensure that out_state contains max fclk
+		memcpy(&p->out_states->state_array[p->out_states->num_states - 1],
+				&p->in_states->state_array[p->in_states->num_states - 1],
+				sizeof(struct soc_state_bounding_box_st));
+	}
 }
 
 void dml2_translate_ip_params(const struct dc *in, struct ip_params_st *out)
@@ -498,8 +523,8 @@ void dml2_translate_socbb_params(const struct dc *in, struct soc_bounding_box_st
 	out->do_urgent_latency_adjustment = in_soc_params->do_urgent_latency_adjustment;
 	out->dram_channel_width_bytes = (dml_uint_t)in_soc_params->dram_channel_width_bytes;
 	out->fabric_datapath_to_dcn_data_return_bytes = (dml_uint_t)in_soc_params->fabric_datapath_to_dcn_data_return_bytes;
-	out->gpuvm_min_page_size_kbytes = in_soc_params->gpuvm_min_page_size_bytes * 1024;
-	out->hostvm_min_page_size_kbytes = in_soc_params->hostvm_min_page_size_bytes * 1024;
+	out->gpuvm_min_page_size_kbytes = in_soc_params->gpuvm_min_page_size_bytes / 1024;
+	out->hostvm_min_page_size_kbytes = in_soc_params->hostvm_min_page_size_bytes / 1024;
 	out->mall_allocated_for_dcn_mbytes = (dml_uint_t)in_soc_params->mall_allocated_for_dcn_mbytes;
 	out->max_avg_dram_bw_use_normal_percent = in_soc_params->max_avg_dram_bw_use_normal_percent;
 	out->max_avg_fabric_bw_use_normal_percent = in_soc_params->max_avg_fabric_bw_use_normal_percent;
@@ -606,8 +631,8 @@ static void populate_dml_output_cfg_from_stream_state(struct dml_output_cfg_st *
 		if (is_dp2p0_output_encoder(pipe))
 			out->OutputEncoder[location] = dml_dp2p0;
 		break;
-		out->OutputEncoder[location] = dml_edp;
 	case SIGNAL_TYPE_EDP:
+		out->OutputEncoder[location] = dml_edp;
 		break;
 	case SIGNAL_TYPE_HDMI_TYPE_A:
 	case SIGNAL_TYPE_DVI_SINGLE_LINK:
@@ -773,35 +798,28 @@ static void populate_dml_surface_cfg_from_plane_state(enum dml_project_id dml2_p
 	}
 }
 
-/*TODO no support for mpc combine, need rework - should calculate scaling params based on plane+stream*/
-static struct scaler_data get_scaler_data_for_plane(const struct dc_plane_state *in, const struct dc_state *context)
+static struct scaler_data get_scaler_data_for_plane(const struct dc_plane_state *in, struct dc_state *context)
 {
 	int i;
-	struct scaler_data data = { 0 };
+	struct pipe_ctx *temp_pipe = &context->res_ctx.temp_pipe;
+
+	memset(temp_pipe, 0, sizeof(struct pipe_ctx));
 
 	for (i = 0; i < MAX_PIPES; i++)	{
 		const struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 
 		if (pipe->plane_state == in && !pipe->prev_odm_pipe) {
-			const struct pipe_ctx *next_pipe = pipe->next_odm_pipe;
-
-			data = context->res_ctx.pipe_ctx[i].plane_res.scl_data;
-			while (next_pipe) {
-				data.h_active += next_pipe->plane_res.scl_data.h_active;
-				data.recout.width += next_pipe->plane_res.scl_data.recout.width;
-				if (in->rotation == ROTATION_ANGLE_0 || in->rotation == ROTATION_ANGLE_180) {
-					data.viewport.width += next_pipe->plane_res.scl_data.viewport.width;
-				} else {
-					data.viewport.height += next_pipe->plane_res.scl_data.viewport.height;
-				}
-				next_pipe = next_pipe->next_odm_pipe;
-			}
+			temp_pipe->stream = pipe->stream;
+			temp_pipe->plane_state = pipe->plane_state;
+			temp_pipe->plane_res.scl_data.taps = pipe->plane_res.scl_data.taps;
+
+			resource_build_scaling_params(temp_pipe);
 			break;
 		}
 	}
 
 	ASSERT(i < MAX_PIPES);
-	return data;
+	return temp_pipe->plane_res.scl_data;
 }
 
 static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned int location, const struct dc_stream_state *in)
@@ -846,7 +864,7 @@ static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned
 	out->ScalerEnabled[location] = false;
 }
 
-static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out, unsigned int location, const struct dc_plane_state *in, const struct dc_state *context)
+static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out, unsigned int location, const struct dc_plane_state *in, struct dc_state *context)
 {
 	const struct scaler_data scaler_data = get_scaler_data_for_plane(in, context);
 
@@ -1029,8 +1047,10 @@ static void dml2_populate_pipe_to_plane_index_mapping(struct dml2_context *dml2,
 
 void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_state *context, struct dml_display_cfg_st *dml_dispcfg)
 {
-	int i = 0, j = 0;
+	int i = 0, j = 0, k = 0;
 	int disp_cfg_stream_location, disp_cfg_plane_location;
+	enum mall_stream_type stream_mall_type;
+	struct pipe_ctx *current_pipe_context;
 
 	for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) {
 		dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id_valid[i] = false;
@@ -1040,14 +1060,27 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat
 	}
 
 	//Generally these are set by referencing our latest BB/IP params in dcn32_resource.c file
-	dml_dispcfg->plane.GPUVMEnable = true;
-	dml_dispcfg->plane.GPUVMMaxPageTableLevels = 4;
-	dml_dispcfg->plane.HostVMEnable = false;
+	dml_dispcfg->plane.GPUVMEnable = dml2->v20.dml_core_ctx.ip.gpuvm_enable;
+	dml_dispcfg->plane.GPUVMMaxPageTableLevels = dml2->v20.dml_core_ctx.ip.gpuvm_max_page_table_levels;
+	dml_dispcfg->plane.HostVMEnable = dml2->v20.dml_core_ctx.ip.hostvm_enable;
+	dml_dispcfg->plane.HostVMMaxPageTableLevels = dml2->v20.dml_core_ctx.ip.hostvm_max_page_table_levels;
+	if (dml2->v20.dml_core_ctx.ip.hostvm_enable)
+		dml2->v20.dml_core_ctx.policy.AllowForPStateChangeOrStutterInVBlankFinal = dml_prefetch_support_uclk_fclk_and_stutter;
 
 	dml2_populate_pipe_to_plane_index_mapping(dml2, context);
 
 	for (i = 0; i < context->stream_count; i++) {
+		current_pipe_context = NULL;
+		for (k = 0; k < MAX_PIPES; k++) {
+			/* find one pipe allocated to this stream for the purpose of getting
+			info about the link later */
+			if (context->streams[i] == context->res_ctx.pipe_ctx[k].stream) {
+				current_pipe_context = &context->res_ctx.pipe_ctx[k];
+				break;
+			}
+		}
 		disp_cfg_stream_location = map_stream_to_dml_display_cfg(dml2, context->streams[i], dml_dispcfg);
+		stream_mall_type = dc_state_get_stream_subvp_type(context, context->streams[i]);
 
 		if (disp_cfg_stream_location < 0)
 			disp_cfg_stream_location = dml_dispcfg->num_timings++;
@@ -1055,7 +1088,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat
 		ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__);
 
 		populate_dml_timing_cfg_from_stream_state(&dml_dispcfg->timing, disp_cfg_stream_location, context->streams[i]);
-		populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_stream_location, context->streams[i], &context->res_ctx.pipe_ctx[i]);
+		populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_stream_location, context->streams[i], current_pipe_context);
 		switch (context->streams[i]->debug.force_odm_combine_segments) {
 		case 2:
 			dml2->v20.dml_core_ctx.policy.ODMUse[disp_cfg_stream_location] = dml_odm_use_policy_combine_2to1;
@@ -1092,10 +1125,10 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat
 				populate_dml_surface_cfg_from_plane_state(dml2->v20.dml_core_ctx.project, &dml_dispcfg->surface, disp_cfg_plane_location, context->stream_status[i].plane_states[j]);
 				populate_dml_plane_cfg_from_plane_state(&dml_dispcfg->plane, disp_cfg_plane_location, context->stream_status[i].plane_states[j], context);
 
-				if (context->streams[i]->mall_stream_config.type == SUBVP_MAIN) {
+				if (stream_mall_type == SUBVP_MAIN) {
 					dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_sub_viewport;
 					dml_dispcfg->plane.UseMALLForStaticScreen[disp_cfg_plane_location] = dml_use_mall_static_screen_optimize;
-				} else if (context->streams[i]->mall_stream_config.type == SUBVP_PHANTOM) {
+				} else if (stream_mall_type == SUBVP_PHANTOM) {
 					dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_phantom_pipe;
 					dml_dispcfg->plane.UseMALLForStaticScreen[disp_cfg_plane_location] = dml_use_mall_static_screen_disable;
 					dml2->v20.dml_core_ctx.policy.ImmediateFlipRequirement[disp_cfg_plane_location] = dml_immediate_flip_not_required;
@@ -1112,7 +1145,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat
 
 				if (j >= 1) {
 					populate_dml_timing_cfg_from_stream_state(&dml_dispcfg->timing, disp_cfg_plane_location, context->streams[i]);
-					populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_plane_location, context->streams[i], &context->res_ctx.pipe_ctx[i]);
+					populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_plane_location, context->streams[i], current_pipe_context);
 					switch (context->streams[i]->debug.force_odm_combine_segments) {
 					case 2:
 						dml2->v20.dml_core_ctx.policy.ODMUse[disp_cfg_plane_location] = dml_odm_use_policy_combine_2to1;
@@ -1124,9 +1157,9 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat
 						break;
 					}
 
-					if (context->streams[i]->mall_stream_config.type == SUBVP_MAIN)
+					if (stream_mall_type == SUBVP_MAIN)
 						dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_sub_viewport;
-					else if (context->streams[i]->mall_stream_config.type == SUBVP_PHANTOM)
+					else if (stream_mall_type == SUBVP_PHANTOM)
 						dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_phantom_pipe;
 
 					dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[disp_cfg_plane_location] = context->streams[i]->stream_id;
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
index 2498b8341199..1068b962d1c1 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
@@ -155,8 +155,20 @@ unsigned int dml2_util_get_maximum_odm_combine_for_output(bool force_odm_4to1, e
 
 bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx)
 {
+	if (pipe_ctx == NULL || pipe_ctx->stream == NULL)
+		return false;
+
 	/* If this assert is hit then we have a link encoder dynamic management issue */
 	ASSERT(pipe_ctx->stream_res.hpo_dp_stream_enc ? pipe_ctx->link_res.hpo_dp_link_enc != NULL : true);
+
+	/* Count MST hubs once by treating only 1st remote sink in topology as an encoder */
+	if (pipe_ctx->stream->link && pipe_ctx->stream->link->remote_sinks[0]) {
+		return (pipe_ctx->stream_res.hpo_dp_stream_enc &&
+			pipe_ctx->link_res.hpo_dp_link_enc &&
+			dc_is_dp_signal(pipe_ctx->stream->signal) &&
+			(pipe_ctx->stream->link->remote_sinks[0]->sink_id == pipe_ctx->stream->sink->sink_id));
+	}
+
 	return (pipe_ctx->stream_res.hpo_dp_stream_enc &&
 		pipe_ctx->link_res.hpo_dp_link_enc &&
 		dc_is_dp_signal(pipe_ctx->stream->signal));
@@ -275,6 +287,7 @@ static void populate_pipe_ctx_dlg_params_from_dml(struct pipe_ctx *pipe_ctx, str
 void dml2_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *context, struct resource_context *out_new_hw_state, struct dml2_context *in_ctx, unsigned int pipe_cnt)
 {
 	unsigned int dc_pipe_ctx_index, dml_pipe_idx, plane_id;
+	enum mall_stream_type pipe_mall_type;
 	bool unbounded_req_enabled = false;
 	struct dml2_calculate_rq_and_dlg_params_scratch *s = &in_ctx->v20.scratch.calculate_rq_and_dlg_params_scratch;
 
@@ -322,7 +335,8 @@ void dml2_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *cont
 		 */
 		populate_pipe_ctx_dlg_params_from_dml(&context->res_ctx.pipe_ctx[dc_pipe_ctx_index], &context->bw_ctx.dml2->v20.dml_core_ctx, dml_pipe_idx);
 
-		if (context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream->mall_stream_config.type == SUBVP_PHANTOM) {
+		pipe_mall_type = dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[dc_pipe_ctx_index]);
+		if (pipe_mall_type == SUBVP_PHANTOM) {
 			// Phantom pipe requires that DET_SIZE = 0 and no unbounded requests
 			context->res_ctx.pipe_ctx[dc_pipe_ctx_index].det_buffer_size_kb = 0;
 			context->res_ctx.pipe_ctx[dc_pipe_ctx_index].unbounded_req = false;
@@ -349,7 +363,7 @@ void dml2_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *cont
 			context->res_ctx.pipe_ctx[dc_pipe_ctx_index].plane_state != context->res_ctx.pipe_ctx[dc_pipe_ctx_index].top_pipe->plane_state) &&
 			context->res_ctx.pipe_ctx[dc_pipe_ctx_index].prev_odm_pipe == NULL) {
 			/* SS: all active surfaces stored in MALL */
-			if (context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream->mall_stream_config.type != SUBVP_PHANTOM) {
+			if (pipe_mall_type != SUBVP_PHANTOM) {
 				context->bw_ctx.bw.dcn.mall_ss_size_bytes += context->res_ctx.pipe_ctx[dc_pipe_ctx_index].surface_size_in_mall_bytes;
 			} else {
 				/* SUBVP: phantom surfaces only stored in MALL */
@@ -468,7 +482,7 @@ bool dml2_verify_det_buffer_configuration(struct dml2_context *in_ctx, struct dc
 	return need_recalculation;
 }
 
-bool dml2_is_stereo_timing(struct dc_stream_state *stream)
+bool dml2_is_stereo_timing(const struct dc_stream_state *stream)
 {
 	bool is_stereo = false;
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.h
index 23b9028337d4..5842d6d3c4b6 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.h
@@ -42,7 +42,7 @@ void dml2_copy_clocks_to_dc_state(struct dml2_dcn_clocks *out_clks, struct dc_st
 void dml2_extract_watermark_set(struct dcn_watermarks *watermark, struct display_mode_lib_st *dml_core_ctx);
 int dml2_helper_find_dml_pipe_idx_by_stream_id(struct dml2_context *ctx, unsigned int stream_id);
 bool is_dtbclk_required(const struct dc *dc, struct dc_state *context);
-bool dml2_is_stereo_timing(struct dc_stream_state *stream);
+bool dml2_is_stereo_timing(const struct dc_stream_state *stream);
 
 /*
  * dml2_dc_construct_pipes - This function will determine if we need additional pipes based
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c
index 8f231418870f..26307e599614 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c
@@ -418,7 +418,7 @@ static int find_drr_eligible_stream(struct dc_state *display_state)
 	int i;
 
 	for (i = 0; i < display_state->stream_count; i++) {
-		if (display_state->streams[i]->mall_stream_config.type == SUBVP_NONE
+		if (dc_state_get_stream_subvp_type(display_state, display_state->streams[i]) == SUBVP_NONE
 			&& display_state->streams[i]->ignore_msa_timing_param) {
 			// Use ignore_msa_timing_param flag to identify as DRR
 			return i;
@@ -634,6 +634,8 @@ static bool dml2_validate_and_build_resource(const struct dc *in_dc, struct dc_s
 		dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.b, &dml2->v20.dml_core_ctx);
 		memcpy(&context->bw_ctx.bw.dcn.watermarks.c, &dml2->v20.g6_temp_read_watermark_set, sizeof(context->bw_ctx.bw.dcn.watermarks.c));
 		dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.d, &dml2->v20.dml_core_ctx);
+		//copy for deciding zstate use
+		context->bw_ctx.dml.vba.StutterPeriod = context->bw_ctx.dml2->v20.dml_core_ctx.mp.StutterPeriod;
 	}
 
 	return result;
@@ -691,10 +693,15 @@ bool dml2_validate(const struct dc *in_dc, struct dc_state *context, bool fast_v
 	return out;
 }
 
+static inline struct dml2_context *dml2_allocate_memory(void)
+{
+	return (struct dml2_context *) kzalloc(sizeof(struct dml2_context), GFP_KERNEL);
+}
+
 bool dml2_create(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2)
 {
 	// Allocate Mode Lib Ctx
-	*dml2 = (struct dml2_context *) kzalloc(sizeof(struct dml2_context), GFP_KERNEL);
+	*dml2 = dml2_allocate_memory();
 
 	if (!(*dml2))
 		return false;
@@ -745,3 +752,25 @@ void dml2_extract_dram_and_fclk_change_support(struct dml2_context *dml2,
 	*fclk_change_support = (unsigned int) dml2->v20.dml_core_ctx.ms.support.FCLKChangeSupport[0];
 	*dram_clk_change_support = (unsigned int) dml2->v20.dml_core_ctx.ms.support.DRAMClockChangeSupport[0];
 }
+
+void dml2_copy(struct dml2_context *dst_dml2,
+	struct dml2_context *src_dml2)
+{
+	/* copy Mode Lib Ctx */
+	memcpy(dst_dml2, src_dml2, sizeof(struct dml2_context));
+}
+
+bool dml2_create_copy(struct dml2_context **dst_dml2,
+	struct dml2_context *src_dml2)
+{
+	/* Allocate Mode Lib Ctx */
+	*dst_dml2 = dml2_allocate_memory();
+
+	if (!(*dst_dml2))
+		return false;
+
+	/* copy Mode Lib Ctx */
+	dml2_copy(*dst_dml2, src_dml2);
+
+	return true;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h
index 317f90776d97..ee0eb184eb6d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h
@@ -93,15 +93,34 @@ struct dml2_dc_callbacks {
 struct dml2_dc_svp_callbacks {
 	struct dc *dc;
 	bool (*build_scaling_params)(struct pipe_ctx *pipe_ctx);
-	struct dc_stream_state* (*create_stream_for_sink)(struct dc_sink *dc_sink_data);
-	struct dc_plane_state* (*create_plane)(struct dc *dc);
-	enum dc_status (*add_stream_to_ctx)(struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *dc_stream);
-	bool (*add_plane_to_context)(const struct dc *dc, struct dc_stream_state *stream, struct dc_plane_state *plane_state, struct dc_state *context);
-	bool (*remove_plane_from_context)(const struct dc *dc, struct dc_stream_state *stream, struct dc_plane_state *plane_state, struct dc_state *context);
-	enum dc_status (*remove_stream_from_ctx)(struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *stream);
-	void (*plane_state_release)(struct dc_plane_state *plane_state);
-	void (*stream_release)(struct dc_stream_state *stream);
+	struct dc_stream_state* (*create_phantom_stream)(const struct dc *dc,
+			struct dc_state *state,
+			struct dc_stream_state *main_stream);
+	struct dc_plane_state* (*create_phantom_plane)(struct dc *dc,
+			struct dc_state *state,
+			struct dc_plane_state *main_plane);
+	enum dc_status (*add_phantom_stream)(struct dc *dc,
+			struct dc_state *state,
+			struct dc_stream_state *phantom_stream,
+			struct dc_stream_state *main_stream);
+	bool (*add_phantom_plane)(const struct dc *dc, struct dc_stream_state *stream, struct dc_plane_state *plane_state, struct dc_state *context);
+	bool (*remove_phantom_plane)(const struct dc *dc,
+			struct dc_stream_state *stream,
+			struct dc_plane_state *plane_state,
+			struct dc_state *context);
+	enum dc_status (*remove_phantom_stream)(struct dc *dc,
+			struct dc_state *state,
+			struct dc_stream_state *stream);
+	void (*release_phantom_plane)(const struct dc *dc,
+			struct dc_state *state,
+			struct dc_plane_state *plane);
+	void (*release_phantom_stream)(const struct dc *dc,
+			struct dc_state *state,
+			struct dc_stream_state *stream);
 	void (*release_dsc)(struct resource_context *res_ctx, const struct resource_pool *pool, struct display_stream_compressor **dsc);
+	enum mall_stream_type (*get_pipe_subvp_type)(const struct dc_state *state, const struct pipe_ctx *pipe_ctx);
+	enum mall_stream_type (*get_stream_subvp_type)(const struct dc_state *state, const struct dc_stream_state *stream);
+	struct dc_stream_state *(*get_paired_subvp_stream)(const struct dc_state *state, const struct dc_stream_state *stream);
 };
 
 struct dml2_clks_table_entry {
@@ -139,6 +158,8 @@ struct dml2_soc_bbox_overrides {
 	double urgent_latency_us;
 	double sr_exit_latency_us;
 	double sr_enter_plus_exit_latency_us;
+	double sr_exit_z8_time_us;
+	double sr_enter_plus_exit_z8_time_us;
 	double dram_clock_change_latency_us;
 	double fclk_change_latency_us;
 	unsigned int dram_num_chan;
@@ -189,6 +210,10 @@ bool dml2_create(const struct dc *in_dc,
 				 struct dml2_context **dml2);
 
 void dml2_destroy(struct dml2_context *dml2);
+void dml2_copy(struct dml2_context *dst_dml2,
+	struct dml2_context *src_dml2);
+bool dml2_create_copy(struct dml2_context **dst_dml2,
+	struct dml2_context *src_dml2);
 
 /*
  * dml2_validate - Determines if a display configuration is supported or not.
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/Makefile b/drivers/gpu/drm/amd/display/dc/dsc/Makefile
index a2537229ee88..b183ba5a692e 100644
--- a/drivers/gpu/drm/amd/display/dc/dsc/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dsc/Makefile
@@ -1,8 +1,34 @@
 # SPDX-License-Identifier: MIT
 #
 # Makefile for the 'dsc' sub-component of DAL.
+
+ifdef CONFIG_DRM_AMD_DC_FP
+
+###############################################################################
+# DCN20
+###############################################################################
+DSC_DCN20 = dcn20_dsc.o
+
+AMD_DISPLAY_FILES += $(addprefix $(AMDDALPATH)/dc/dsc/dcn20/,$(DSC_DCN20))
+
+
+
+
+###############################################################################
+# DCN35
+###############################################################################
+
+DSC_DCN35 = dcn35_dsc.o
+
+AMD_DISPLAY_FILES += $(addprefix $(AMDDALPATH)/dc/dsc/dcn35/,$(DSC_DCN35))
+
+
+
+endif
+
 DSC = dc_dsc.o rc_calc.o rc_calc_dpi.o
 
 AMD_DAL_DSC = $(addprefix $(AMDDALPATH)/dc/dsc/,$(DSC))
 
 AMD_DISPLAY_FILES += $(AMD_DAL_DSC)
+
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
index e8b5f17beb96..0df6c55eb326 100644
--- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
+++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
@@ -331,8 +331,9 @@ bool dc_dsc_parse_dsc_dpcd(const struct dc *dc,
 		int buff_block_size;
 		int buff_size;
 
-		if (!dsc_buff_block_size_from_dpcd(dpcd_dsc_basic_data[DP_DSC_RC_BUF_BLK_SIZE - DP_DSC_SUPPORT],
-										   &buff_block_size))
+		if (!dsc_buff_block_size_from_dpcd(
+				dpcd_dsc_basic_data[DP_DSC_RC_BUF_BLK_SIZE - DP_DSC_SUPPORT] & 0x03,
+				&buff_block_size))
 			return false;
 
 		buff_size = dpcd_dsc_basic_data[DP_DSC_RC_BUF_SIZE - DP_DSC_SUPPORT] + 1;
@@ -357,10 +358,15 @@ bool dc_dsc_parse_dsc_dpcd(const struct dc *dc,
 
 	{
 		int dpcd_throughput = dpcd_dsc_basic_data[DP_DSC_PEAK_THROUGHPUT - DP_DSC_SUPPORT];
+		int dsc_throughput_granular_delta;
+
+		dsc_throughput_granular_delta = dpcd_dsc_basic_data[DP_DSC_RC_BUF_BLK_SIZE - DP_DSC_SUPPORT] >> 3;
+		dsc_throughput_granular_delta *= 2;
 
 		if (!dsc_throughput_from_dpcd(dpcd_throughput & DP_DSC_THROUGHPUT_MODE_0_MASK,
 									  &dsc_sink_caps->throughput_mode_0_mps))
 			return false;
+		dsc_sink_caps->throughput_mode_0_mps += dsc_throughput_granular_delta;
 
 		dpcd_throughput = (dpcd_throughput & DP_DSC_THROUGHPUT_MODE_1_MASK) >> DP_DSC_THROUGHPUT_MODE_1_SHIFT;
 		if (!dsc_throughput_from_dpcd(dpcd_throughput, &dsc_sink_caps->throughput_mode_1_mps))
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c
index c9ae2d8f0096..c9ae2d8f0096 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c
+++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.h b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.h
index ba869387c3c5..ba869387c3c5 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.h
+++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.c
index 71d2dff9986d..71d2dff9986d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dsc.c
+++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dsc.h b/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.h
index 133ad38842cc..133ad38842cc 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dsc.h
+++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.h
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h b/drivers/gpu/drm/amd/display/dc/dsc/dsc.h
index 4b27f29d0d80..4b27f29d0d80 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h
+++ b/drivers/gpu/drm/amd/display/dc/dsc/dsc.h
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/Makefile b/drivers/gpu/drm/amd/display/dc/hwss/Makefile
index bccd46bd1815..254136f8e3f9 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/hwss/Makefile
@@ -78,7 +78,7 @@ ifdef CONFIG_DRM_AMD_DC_FP
 # DCN
 ###############################################################################
 
-HWSS_DCN10 = dcn10_hwseq.o
+HWSS_DCN10 = dcn10_hwseq.o dcn10_init.o
 
 AMD_DAL_HWSS_DCN10 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn10/,$(HWSS_DCN10))
 
@@ -86,7 +86,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN10)
 
 ###############################################################################
 
-HWSS_DCN20 = dcn20_hwseq.o
+HWSS_DCN20 = dcn20_hwseq.o dcn20_init.o
 
 AMD_DAL_HWSS_DCN20 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn20/,$(HWSS_DCN20))
 
@@ -94,7 +94,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN20)
 
 ###############################################################################
 
-HWSS_DCN201 = dcn201_hwseq.o
+HWSS_DCN201 = dcn201_hwseq.o dcn201_init.o
 
 AMD_DAL_HWSS_DCN201 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn201/,$(HWSS_DCN201))
 
@@ -102,7 +102,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN201)
 
 ###############################################################################
 
-HWSS_DCN21 = dcn21_hwseq.o
+HWSS_DCN21 = dcn21_hwseq.o dcn21_init.o
 
 AMD_DAL_HWSS_DCN21 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn21/,$(HWSS_DCN21))
 
@@ -114,7 +114,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN21)
 
 ###############################################################################
 
-HWSS_DCN30 = dcn30_hwseq.o
+HWSS_DCN30 = dcn30_hwseq.o dcn30_init.o
 
 AMD_DAL_HWSS_DCN30 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn30/,$(HWSS_DCN30))
 
@@ -122,7 +122,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN30)
 
 ###############################################################################
 
-HWSS_DCN301 = dcn301_hwseq.o
+HWSS_DCN301 = dcn301_hwseq.o dcn301_init.o
 
 AMD_DAL_HWSS_DCN301 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn301/,$(HWSS_DCN301))
 
@@ -130,15 +130,17 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN301)
 
 ###############################################################################
 
-HWSS_DCN302 = dcn302_hwseq.o
+HWSS_DCN302 = dcn302_hwseq.o dcn302_init.o
 
 AMD_DAL_HWSS_DCN302 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn302/,$(HWSS_DCN302))
 
 AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN302)
 
+
+
 ###############################################################################
 
-HWSS_DCN303 = dcn303_hwseq.o
+HWSS_DCN303 = dcn303_hwseq.o dcn303_init.o
 
 AMD_DAL_HWSS_DCN303 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn303/,$(HWSS_DCN303))
 
@@ -146,7 +148,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN303)
 
 ###############################################################################
 
-HWSS_DCN31 = dcn31_hwseq.o
+HWSS_DCN31 = dcn31_hwseq.o dcn31_init.o
 
 AMD_DAL_HWSS_DCN31 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn31/,$(HWSS_DCN31))
 
@@ -154,7 +156,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN31)
 
 ###############################################################################
 
-HWSS_DCN314 = dcn314_hwseq.o
+HWSS_DCN314 = dcn314_hwseq.o dcn314_init.o
 
 AMD_DAL_HWSS_DCN314 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn314/,$(HWSS_DCN314))
 
@@ -162,7 +164,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN314)
 
 ###############################################################################
 
-HWSS_DCN32 = dcn32_hwseq.o
+HWSS_DCN32 = dcn32_hwseq.o dcn32_init.o
 
 AMD_DAL_HWSS_DCN32 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn32/,$(HWSS_DCN32))
 
@@ -170,7 +172,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN32)
 
 ###############################################################################
 
-HWSS_DCN35 = dcn35_hwseq.o
+HWSS_DCN35 = dcn35_hwseq.o dcn35_init.o
 
 AMD_DAL_HWSS_DCN35 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn35/,$(HWSS_DCN35))
 
@@ -180,4 +182,4 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN35)
 
 ###############################################################################
 
-endif
-\ No newline at end of file
+endif
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h
index 44b4df6469d1..52f045cfd52a 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h
@@ -682,6 +682,7 @@ struct dce_hwseq_registers {
 	uint32_t DCHUBBUB_ARB_HOSTVM_CNTL;
 	uint32_t HPO_TOP_HW_CONTROL;
 	uint32_t DMU_CLK_CNTL;
+	uint32_t DCCG_GATE_DISABLE_CNTL4;
 	uint32_t DCCG_GATE_DISABLE_CNTL5;
 };
  /* set field name */
@@ -1199,7 +1200,19 @@ struct dce_hwseq_registers {
 	type PHYBSYMCLK_ROOT_GATE_DISABLE;\
 	type PHYCSYMCLK_ROOT_GATE_DISABLE;\
 	type PHYDSYMCLK_ROOT_GATE_DISABLE;\
-	type PHYESYMCLK_ROOT_GATE_DISABLE;
+	type PHYESYMCLK_ROOT_GATE_DISABLE;\
+	type DTBCLK_P0_GATE_DISABLE;\
+	type DTBCLK_P1_GATE_DISABLE;\
+	type DTBCLK_P2_GATE_DISABLE;\
+	type DTBCLK_P3_GATE_DISABLE;\
+	type DPSTREAMCLK0_GATE_DISABLE;\
+	type DPSTREAMCLK1_GATE_DISABLE;\
+	type DPSTREAMCLK2_GATE_DISABLE;\
+	type DPSTREAMCLK3_GATE_DISABLE;\
+	type DPIASYMCLK0_GATE_DISABLE;\
+	type DPIASYMCLK1_GATE_DISABLE;\
+	type DPIASYMCLK2_GATE_DISABLE;\
+	type DPIASYMCLK3_GATE_DISABLE;
 
 struct dce_hwseq_shift {
 	HWSEQ_REG_FIELD_LIST(uint8_t)
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
index 960a55e06375..01493c49bd7a 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
@@ -55,6 +55,7 @@
 #include "audio.h"
 #include "reg_helper.h"
 #include "panel_cntl.h"
+#include "dc_state_priv.h"
 #include "dpcd_defs.h"
 /* include DCE11 register header files */
 #include "dce/dce_11_0_d.h"
@@ -790,7 +791,7 @@ void dce110_edp_power_control(
 	struct dc_context *ctx = link->ctx;
 	struct bp_transmitter_control cntl = { 0 };
 	enum bp_result bp_result;
-	uint8_t panel_instance;
+	uint8_t pwrseq_instance;
 
 
 	if (dal_graphics_object_id_get_connector_id(link->link_enc->connector)
@@ -873,7 +874,7 @@ void dce110_edp_power_control(
 		cntl.coherent = false;
 		cntl.lanes_number = LANE_COUNT_FOUR;
 		cntl.hpd_sel = link->link_enc->hpd_source;
-		panel_instance = link->panel_cntl->inst;
+		pwrseq_instance = link->panel_cntl->pwrseq_inst;
 
 		if (ctx->dc->ctx->dmub_srv &&
 				ctx->dc->debug.dmub_command_table) {
@@ -881,11 +882,11 @@ void dce110_edp_power_control(
 			if (cntl.action == TRANSMITTER_CONTROL_POWER_ON) {
 				bp_result = ctx->dc_bios->funcs->enable_lvtma_control(ctx->dc_bios,
 						LVTMA_CONTROL_POWER_ON,
-						panel_instance, link->link_powered_externally);
+						pwrseq_instance, link->link_powered_externally);
 			} else {
 				bp_result = ctx->dc_bios->funcs->enable_lvtma_control(ctx->dc_bios,
 						LVTMA_CONTROL_POWER_OFF,
-						panel_instance, link->link_powered_externally);
+						pwrseq_instance, link->link_powered_externally);
 			}
 		}
 
@@ -956,7 +957,7 @@ void dce110_edp_backlight_control(
 {
 	struct dc_context *ctx = link->ctx;
 	struct bp_transmitter_control cntl = { 0 };
-	uint8_t panel_instance;
+	uint8_t pwrseq_instance;
 	unsigned int pre_T11_delay = OLED_PRE_T11_DELAY;
 	unsigned int post_T7_delay = OLED_POST_T7_DELAY;
 
@@ -1009,7 +1010,7 @@ void dce110_edp_backlight_control(
 	 */
 	/* dc_service_sleep_in_milliseconds(50); */
 		/*edp 1.2*/
-	panel_instance = link->panel_cntl->inst;
+	pwrseq_instance = link->panel_cntl->pwrseq_inst;
 
 	if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_ON) {
 		if (!link->dc->config.edp_no_power_sequencing)
@@ -1034,11 +1035,11 @@ void dce110_edp_backlight_control(
 		if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_ON)
 			ctx->dc_bios->funcs->enable_lvtma_control(ctx->dc_bios,
 					LVTMA_CONTROL_LCD_BLON,
-					panel_instance, link->link_powered_externally);
+					pwrseq_instance, link->link_powered_externally);
 		else
 			ctx->dc_bios->funcs->enable_lvtma_control(ctx->dc_bios,
 					LVTMA_CONTROL_LCD_BLOFF,
-					panel_instance, link->link_powered_externally);
+					pwrseq_instance, link->link_powered_externally);
 	}
 
 	link_transmitter_control(ctx->dc_bios, &cntl);
@@ -1182,9 +1183,9 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx)
 		dto_params.timing = &pipe_ctx->stream->timing;
 		dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst;
 		if (dccg) {
-			dccg->funcs->set_dtbclk_dto(dccg, &dto_params);
 			dccg->funcs->disable_symclk32_se(dccg, dp_hpo_inst);
 			dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst, dp_hpo_inst);
+			dccg->funcs->set_dtbclk_dto(dccg, &dto_params);
 		}
 	} else if (dccg && dccg->funcs->disable_symclk_se) {
 		dccg->funcs->disable_symclk_se(dccg, stream_enc->stream_enc_inst,
@@ -1353,7 +1354,7 @@ static void build_audio_output(
 	if (state->clk_mgr &&
 		(pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT ||
 			pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)) {
-		audio_output->pll_info.dp_dto_source_clock_in_khz =
+		audio_output->pll_info.audio_dto_source_clock_in_khz =
 				state->clk_mgr->funcs->get_dp_ref_clk_frequency(
 						state->clk_mgr);
 	}
@@ -1475,7 +1476,7 @@ static enum dc_status dce110_enable_stream_timing(
 	return DC_OK;
 }
 
-static enum dc_status apply_single_controller_ctx_to_hw(
+enum dc_status dce110_apply_single_controller_ctx_to_hw(
 		struct pipe_ctx *pipe_ctx,
 		struct dc_state *context,
 		struct dc *dc)
@@ -1596,7 +1597,7 @@ static enum dc_status apply_single_controller_ctx_to_hw(
 	 * is constructed with the same sink). Make sure not to override
 	 * and link programming on the main.
 	 */
-	if (pipe_ctx->stream->mall_stream_config.type != SUBVP_PHANTOM) {
+	if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) {
 		pipe_ctx->stream->link->psr_settings.psr_feature_enabled = false;
 		pipe_ctx->stream->link->replay_settings.replay_feature_enabled = false;
 	}
@@ -1684,7 +1685,7 @@ static void disable_vga_and_power_gate_all_controllers(
 				true);
 
 		dc->current_state->res_ctx.pipe_ctx[i].pipe_idx = i;
-		dc->hwss.disable_plane(dc,
+		dc->hwss.disable_plane(dc, dc->current_state,
 			&dc->current_state->res_ctx.pipe_ctx[i]);
 	}
 }
@@ -2124,7 +2125,8 @@ static void dce110_reset_hw_ctx_wrap(
 				BREAK_TO_DEBUGGER();
 			}
 			pipe_ctx_old->stream_res.tg->funcs->disable_crtc(pipe_ctx_old->stream_res.tg);
-			pipe_ctx_old->stream->link->phy_state.symclk_ref_cnts.otg = 0;
+			if (dc_is_hdmi_tmds_signal(pipe_ctx_old->stream->signal))
+				pipe_ctx_old->stream->link->phy_state.symclk_ref_cnts.otg = 0;
 			pipe_ctx_old->plane_res.mi->funcs->free_mem_input(
 					pipe_ctx_old->plane_res.mi, dc->current_state->stream_count);
 
@@ -2133,7 +2135,7 @@ static void dce110_reset_hw_ctx_wrap(
 										old_clk))
 				old_clk->funcs->cs_power_down(old_clk);
 
-			dc->hwss.disable_plane(dc, pipe_ctx_old);
+			dc->hwss.disable_plane(dc, dc->current_state, pipe_ctx_old);
 
 			pipe_ctx_old->stream = NULL;
 		}
@@ -2300,7 +2302,7 @@ enum dc_status dce110_apply_ctx_to_hw(
 		if (pipe_ctx->top_pipe || pipe_ctx->prev_odm_pipe)
 			continue;
 
-		status = apply_single_controller_ctx_to_hw(
+		status = dce110_apply_single_controller_ctx_to_hw(
 				pipe_ctx,
 				context,
 				dc);
@@ -2497,6 +2499,7 @@ static bool wait_for_reset_trigger_to_occur(
 /* Enable timing synchronization for a group of Timing Generators. */
 static void dce110_enable_timing_synchronization(
 		struct dc *dc,
+		struct dc_state *state,
 		int group_index,
 		int group_size,
 		struct pipe_ctx *grouped_pipes[])
@@ -2590,6 +2593,7 @@ static void init_hw(struct dc *dc)
 	struct dmcu *dmcu;
 	struct dce_hwseq *hws = dc->hwseq;
 	uint32_t backlight = MAX_BACKLIGHT_LEVEL;
+	uint32_t user_level = MAX_BACKLIGHT_LEVEL;
 
 	bp = dc->ctx->dc_bios;
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
@@ -2639,13 +2643,15 @@ static void init_hw(struct dc *dc)
 	for (i = 0; i < dc->link_count; i++) {
 		struct dc_link *link = dc->links[i];
 
-		if (link->panel_cntl)
+		if (link->panel_cntl) {
 			backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl);
+			user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL;
+		}
 	}
 
 	abm = dc->res_pool->abm;
 	if (abm != NULL)
-		abm->funcs->abm_init(abm, backlight);
+		abm->funcs->abm_init(abm, backlight, user_level);
 
 	dmcu = dc->res_pool->dmcu;
 	if (dmcu != NULL && abm != NULL)
@@ -2842,7 +2848,7 @@ static void dce110_post_unlock_program_front_end(
 {
 }
 
-static void dce110_power_down_fe(struct dc *dc, struct pipe_ctx *pipe_ctx)
+static void dce110_power_down_fe(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx)
 {
 	struct dce_hwseq *hws = dc->hwseq;
 	int fe_idx = pipe_ctx->plane_res.mi ?
@@ -3115,7 +3121,8 @@ void dce110_disable_link_output(struct dc_link *link,
 	struct dmcu *dmcu = dc->res_pool->dmcu;
 
 	if (signal == SIGNAL_TYPE_EDP &&
-			link->dc->hwss.edp_backlight_control)
+			link->dc->hwss.edp_backlight_control &&
+			!link->skip_implict_edp_power_control)
 		link->dc->hwss.edp_backlight_control(link, false);
 	else if (dmcu != NULL && dmcu->funcs->lock_phy)
 		dmcu->funcs->lock_phy(dmcu);
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h
index 08028a1779ae..ed3cc3648e8e 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h
@@ -39,6 +39,10 @@ enum dc_status dce110_apply_ctx_to_hw(
 		struct dc *dc,
 		struct dc_state *context);
 
+enum dc_status dce110_apply_single_controller_ctx_to_hw(
+		struct pipe_ctx *pipe_ctx,
+		struct dc_state *context,
+		struct dc *dc);
 
 void dce110_enable_stream(struct pipe_ctx *pipe_ctx);
 
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
index 2b8b8366538e..6dd479e8a348 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
@@ -56,6 +56,7 @@
 #include "dc_trace.h"
 #include "dce/dmub_outbox.h"
 #include "link.h"
+#include "dc_state_priv.h"
 
 #define DC_LOGGER \
 	dc_logger
@@ -115,7 +116,7 @@ void dcn10_lock_all_pipes(struct dc *dc,
 		    !pipe_ctx->stream ||
 		    (!pipe_ctx->plane_state && !old_pipe_ctx->plane_state) ||
 		    !tg->funcs->is_tg_enabled(tg) ||
-			pipe_ctx->stream->mall_stream_config.type == SUBVP_PHANTOM)
+			dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM)
 			continue;
 
 		if (lock)
@@ -1057,7 +1058,8 @@ static void dcn10_reset_back_end_for_pipe(
 		if (pipe_ctx->stream_res.tg->funcs->set_drr)
 			pipe_ctx->stream_res.tg->funcs->set_drr(
 					pipe_ctx->stream_res.tg, NULL);
-		pipe_ctx->stream->link->phy_state.symclk_ref_cnts.otg = 0;
+		if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal))
+			pipe_ctx->stream->link->phy_state.symclk_ref_cnts.otg = 0;
 	}
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++)
@@ -1180,7 +1182,9 @@ void dcn10_verify_allow_pstate_change_high(struct dc *dc)
 }
 
 /* trigger HW to start disconnect plane from stream on the next vsync */
-void dcn10_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx)
+void dcn10_plane_atomic_disconnect(struct dc *dc,
+		struct dc_state *state,
+		struct pipe_ctx *pipe_ctx)
 {
 	struct dce_hwseq *hws = dc->hwseq;
 	struct hubp *hubp = pipe_ctx->plane_res.hubp;
@@ -1200,7 +1204,7 @@ void dcn10_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx)
 	mpc->funcs->remove_mpcc(mpc, mpc_tree_params, mpcc_to_remove);
 	// Phantom pipes have OTG disabled by default, so MPCC_STATUS will never assert idle,
 	// so don't wait for MPCC_IDLE in the programming sequence
-	if (opp != NULL && !pipe_ctx->plane_state->is_phantom)
+	if (opp != NULL && dc_state_get_pipe_subvp_type(state, pipe_ctx) != SUBVP_PHANTOM)
 		opp->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true;
 
 	dc->optimized_required = true;
@@ -1290,7 +1294,7 @@ void dcn10_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx)
 	pipe_ctx->plane_state = NULL;
 }
 
-void dcn10_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx)
+void dcn10_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx)
 {
 	struct dce_hwseq *hws = dc->hwseq;
 	DC_LOGGER_INIT(dc->ctx->logger);
@@ -1416,12 +1420,12 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state *context)
 		dc->res_pool->opps[i]->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true;
 		pipe_ctx->stream_res.opp = dc->res_pool->opps[i];
 
-		hws->funcs.plane_atomic_disconnect(dc, pipe_ctx);
+		hws->funcs.plane_atomic_disconnect(dc, context, pipe_ctx);
 
 		if (tg->funcs->is_tg_enabled(tg))
 			tg->funcs->unlock(tg);
 
-		dc->hwss.disable_plane(dc, pipe_ctx);
+		dc->hwss.disable_plane(dc, context, pipe_ctx);
 
 		pipe_ctx->stream_res.tg = NULL;
 		pipe_ctx->plane_res.hubp = NULL;
@@ -1486,6 +1490,7 @@ void dcn10_init_hw(struct dc *dc)
 	struct dc_bios *dcb = dc->ctx->dc_bios;
 	struct resource_pool *res_pool = dc->res_pool;
 	uint32_t backlight = MAX_BACKLIGHT_LEVEL;
+	uint32_t user_level = MAX_BACKLIGHT_LEVEL;
 	bool   is_optimized_init_done = false;
 
 	if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks)
@@ -1583,12 +1588,14 @@ void dcn10_init_hw(struct dc *dc)
 		for (i = 0; i < dc->link_count; i++) {
 			struct dc_link *link = dc->links[i];
 
-			if (link->panel_cntl)
+			if (link->panel_cntl) {
 				backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl);
+				user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL;
+			}
 		}
 
 		if (abm != NULL)
-			abm->funcs->abm_init(abm, backlight);
+			abm->funcs->abm_init(abm, backlight, user_level);
 
 		if (dmcu != NULL && !dmcu->auto_load_dmcu)
 			dmcu->funcs->dmcu_init(dmcu);
@@ -2262,6 +2269,7 @@ void dcn10_enable_vblanks_synchronization(
 
 void dcn10_enable_timing_synchronization(
 	struct dc *dc,
+	struct dc_state *state,
 	int group_index,
 	int group_size,
 	struct pipe_ctx *grouped_pipes[])
@@ -2276,7 +2284,7 @@ void dcn10_enable_timing_synchronization(
 	DC_SYNC_INFO("Setting up OTG reset trigger\n");
 
 	for (i = 1; i < group_size; i++) {
-		if (grouped_pipes[i]->stream && grouped_pipes[i]->stream->mall_stream_config.type == SUBVP_PHANTOM)
+		if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(state, grouped_pipes[i]) == SUBVP_PHANTOM)
 			continue;
 
 		opp = grouped_pipes[i]->stream_res.opp;
@@ -2296,14 +2304,14 @@ void dcn10_enable_timing_synchronization(
 		if (grouped_pipes[i]->stream == NULL)
 			continue;
 
-		if (grouped_pipes[i]->stream && grouped_pipes[i]->stream->mall_stream_config.type == SUBVP_PHANTOM)
+		if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(state, grouped_pipes[i]) == SUBVP_PHANTOM)
 			continue;
 
 		grouped_pipes[i]->stream->vblank_synchronized = false;
 	}
 
 	for (i = 1; i < group_size; i++) {
-		if (grouped_pipes[i]->stream && grouped_pipes[i]->stream->mall_stream_config.type == SUBVP_PHANTOM)
+		if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(state, grouped_pipes[i]) == SUBVP_PHANTOM)
 			continue;
 
 		grouped_pipes[i]->stream_res.tg->funcs->enable_reset_trigger(
@@ -2317,11 +2325,11 @@ void dcn10_enable_timing_synchronization(
 	 * synchronized. Look at last pipe programmed to reset.
 	 */
 
-	if (grouped_pipes[1]->stream && grouped_pipes[1]->stream->mall_stream_config.type != SUBVP_PHANTOM)
+	if (grouped_pipes[1]->stream && dc_state_get_pipe_subvp_type(state, grouped_pipes[1]) != SUBVP_PHANTOM)
 		wait_for_reset_trigger_to_occur(dc_ctx, grouped_pipes[1]->stream_res.tg);
 
 	for (i = 1; i < group_size; i++) {
-		if (grouped_pipes[i]->stream && grouped_pipes[i]->stream->mall_stream_config.type == SUBVP_PHANTOM)
+		if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(state, grouped_pipes[i]) == SUBVP_PHANTOM)
 			continue;
 
 		grouped_pipes[i]->stream_res.tg->funcs->disable_reset_trigger(
@@ -2329,7 +2337,7 @@ void dcn10_enable_timing_synchronization(
 	}
 
 	for (i = 1; i < group_size; i++) {
-		if (grouped_pipes[i]->stream && grouped_pipes[i]->stream->mall_stream_config.type == SUBVP_PHANTOM)
+		if (dc_state_get_pipe_subvp_type(state, grouped_pipes[i]) == SUBVP_PHANTOM)
 			continue;
 
 		opp = grouped_pipes[i]->stream_res.opp;
@@ -3021,7 +3029,7 @@ void dcn10_post_unlock_program_front_end(
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++)
 		if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable)
-			dc->hwss.disable_plane(dc, &dc->current_state->res_ctx.pipe_ctx[i]);
+			dc->hwss.disable_plane(dc, dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]);
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++)
 		if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable) {
@@ -3417,7 +3425,8 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx)
 		.h_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.horz,
 		.v_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.vert,
 		.rotation = pipe_ctx->plane_state->rotation,
-		.mirror = pipe_ctx->plane_state->horizontal_mirror
+		.mirror = pipe_ctx->plane_state->horizontal_mirror,
+		.stream = pipe_ctx->stream,
 	};
 	bool pipe_split_on = false;
 	bool odm_combine_on = (pipe_ctx->next_odm_pipe != NULL) ||
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h
index ef6d56da417c..bc5dd68a2408 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h
@@ -75,7 +75,7 @@ void dcn10_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx);
 void dcn10_reset_hw_ctx_wrap(
 		struct dc *dc,
 		struct dc_state *context);
-void dcn10_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx);
+void dcn10_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx);
 void dcn10_lock_all_pipes(
 		struct dc *dc,
 		struct dc_state *context,
@@ -108,13 +108,16 @@ void dcn10_power_down_on_boot(struct dc *dc);
 enum dc_status dce110_apply_ctx_to_hw(
 		struct dc *dc,
 		struct dc_state *context);
-void dcn10_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx);
+void dcn10_plane_atomic_disconnect(struct dc *dc,
+		struct dc_state *state,
+		struct pipe_ctx *pipe_ctx);
 void dcn10_update_dchub(struct dce_hwseq *hws, struct dchub_init_data *dh_data);
 void dcn10_update_pending_status(struct pipe_ctx *pipe_ctx);
 void dce110_power_down(struct dc *dc);
 void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context);
 void dcn10_enable_timing_synchronization(
 		struct dc *dc,
+		struct dc_state *state,
 		int group_index,
 		int group_size,
 		struct pipe_ctx *grouped_pipes[]);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.c
index a5bdac79a744..a5bdac79a744 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.h
index 8c6fd7b844a4..8c6fd7b844a4 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
index 608221b0dd5d..931ac8ed7069 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
@@ -55,6 +55,7 @@
 #include "inc/link_enc_cfg.h"
 #include "link_hwss.h"
 #include "link.h"
+#include "dc_state_priv.h"
 
 #define DC_LOGGER \
 	dc_logger
@@ -623,9 +624,9 @@ void dcn20_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx)
 }
 
 
-void dcn20_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx)
+void dcn20_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx)
 {
-	bool is_phantom = pipe_ctx->plane_state && pipe_ctx->plane_state->is_phantom;
+	bool is_phantom = dc_state_get_pipe_subvp_type(state, pipe_ctx) == SUBVP_PHANTOM;
 	struct timing_generator *tg = is_phantom ? pipe_ctx->stream_res.tg : NULL;
 
 	DC_LOGGER_INIT(dc->ctx->logger);
@@ -847,7 +848,7 @@ enum dc_status dcn20_enable_stream_timing(
 	/* TODO enable stream if timing changed */
 	/* TODO unblank stream if DP */
 
-	if (pipe_ctx->stream && pipe_ctx->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+	if (pipe_ctx->stream && dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM) {
 		if (pipe_ctx->stream_res.tg && pipe_ctx->stream_res.tg->funcs->phantom_crtc_post_enable)
 			pipe_ctx->stream_res.tg->funcs->phantom_crtc_post_enable(pipe_ctx->stream_res.tg);
 	}
@@ -1368,8 +1369,14 @@ void dcn20_pipe_control_lock(
 	}
 }
 
-static void dcn20_detect_pipe_changes(struct pipe_ctx *old_pipe, struct pipe_ctx *new_pipe)
+static void dcn20_detect_pipe_changes(struct dc_state *old_state,
+		struct dc_state *new_state,
+		struct pipe_ctx *old_pipe,
+		struct pipe_ctx *new_pipe)
 {
+	bool old_is_phantom = dc_state_get_pipe_subvp_type(old_state, old_pipe) == SUBVP_PHANTOM;
+	bool new_is_phantom = dc_state_get_pipe_subvp_type(new_state, new_pipe) == SUBVP_PHANTOM;
+
 	new_pipe->update_flags.raw = 0;
 
 	/* If non-phantom pipe is being transitioned to a phantom pipe,
@@ -1379,8 +1386,8 @@ static void dcn20_detect_pipe_changes(struct pipe_ctx *old_pipe, struct pipe_ctx
 	 * be different). The post_unlock sequence will set the correct
 	 * update flags to enable the phantom pipe.
 	 */
-	if (old_pipe->plane_state && !old_pipe->plane_state->is_phantom &&
-			new_pipe->plane_state && new_pipe->plane_state->is_phantom) {
+	if (old_pipe->plane_state && !old_is_phantom &&
+			new_pipe->plane_state && new_is_phantom) {
 		new_pipe->update_flags.bits.disable = 1;
 		return;
 	}
@@ -1400,6 +1407,10 @@ static void dcn20_detect_pipe_changes(struct pipe_ctx *old_pipe, struct pipe_ctx
 		new_pipe->update_flags.bits.scaler = 1;
 		new_pipe->update_flags.bits.viewport = 1;
 		new_pipe->update_flags.bits.det_size = 1;
+		if (new_pipe->stream->test_pattern.type != DP_TEST_PATTERN_VIDEO_MODE &&
+				new_pipe->stream_res.test_pattern_params.width != 0 &&
+				new_pipe->stream_res.test_pattern_params.height != 0)
+			new_pipe->update_flags.bits.test_pattern_changed = 1;
 		if (!new_pipe->top_pipe && !new_pipe->prev_odm_pipe) {
 			new_pipe->update_flags.bits.odm = 1;
 			new_pipe->update_flags.bits.global_sync = 1;
@@ -1412,14 +1423,14 @@ static void dcn20_detect_pipe_changes(struct pipe_ctx *old_pipe, struct pipe_ctx
 	 * The remove-add sequence of the phantom pipe always results in the pipe
 	 * being blanked in enable_stream_timing (DPG).
 	 */
-	if (new_pipe->stream && new_pipe->stream->mall_stream_config.type == SUBVP_PHANTOM)
+	if (new_pipe->stream && dc_state_get_pipe_subvp_type(new_state, new_pipe) == SUBVP_PHANTOM)
 		new_pipe->update_flags.bits.enable = 1;
 
 	/* Phantom pipes are effectively disabled, if the pipe was previously phantom
 	 * we have to enable
 	 */
-	if (old_pipe->plane_state && old_pipe->plane_state->is_phantom &&
-			new_pipe->plane_state && !new_pipe->plane_state->is_phantom)
+	if (old_pipe->plane_state && old_is_phantom &&
+			new_pipe->plane_state && !new_is_phantom)
 		new_pipe->update_flags.bits.enable = 1;
 
 	if (old_pipe->plane_state && !new_pipe->plane_state) {
@@ -1556,6 +1567,7 @@ static void dcn20_update_dchubp_dpp(
 	struct dc_plane_state *plane_state = pipe_ctx->plane_state;
 	struct dccg *dccg = dc->res_pool->dccg;
 	bool viewport_changed = false;
+	enum mall_stream_type pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe_ctx);
 
 	if (pipe_ctx->update_flags.bits.dppclk)
 		dpp->funcs->dpp_dppclk_control(dpp, false, true);
@@ -1701,7 +1713,7 @@ static void dcn20_update_dchubp_dpp(
 		pipe_ctx->update_flags.bits.plane_changed ||
 		plane_state->update_flags.bits.addr_update) {
 		if (resource_is_pipe_type(pipe_ctx, OTG_MASTER) &&
-				pipe_ctx->stream->mall_stream_config.type == SUBVP_MAIN) {
+				pipe_mall_type == SUBVP_MAIN) {
 			union block_sequence_params params;
 
 			params.subvp_save_surf_addr.dc_dmub_srv = dc->ctx->dmub_srv;
@@ -1715,7 +1727,7 @@ static void dcn20_update_dchubp_dpp(
 	if (pipe_ctx->update_flags.bits.enable)
 		hubp->funcs->set_blank(hubp, false);
 	/* If the stream paired with this plane is phantom, the plane is also phantom */
-	if (pipe_ctx->stream && pipe_ctx->stream->mall_stream_config.type == SUBVP_PHANTOM
+	if (pipe_ctx->stream && pipe_mall_type == SUBVP_PHANTOM
 			&& hubp->funcs->phantom_hubp_post_enable)
 		hubp->funcs->phantom_hubp_post_enable(hubp);
 }
@@ -1773,7 +1785,7 @@ static void dcn20_program_pipe(
 				pipe_ctx->pipe_dlg_param.vupdate_offset,
 				pipe_ctx->pipe_dlg_param.vupdate_width);
 
-		if (pipe_ctx->stream->mall_stream_config.type != SUBVP_PHANTOM)
+		if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM)
 			pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE);
 
 		pipe_ctx->stream_res.tg->funcs->set_vtg_params(
@@ -1877,6 +1889,8 @@ void dcn20_program_front_end_for_ctx(
 	int i;
 	struct dce_hwseq *hws = dc->hwseq;
 	DC_LOGGER_INIT(dc->ctx->logger);
+	unsigned int prev_hubp_count = 0;
+	unsigned int hubp_count = 0;
 
 	if (resource_is_pipe_topology_changed(dc->current_state, context))
 		resource_log_pipe_topology_update(dc, context);
@@ -1894,9 +1908,23 @@ void dcn20_program_front_end_for_ctx(
 		}
 	}
 
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		if (dc->current_state->res_ctx.pipe_ctx[i].plane_state)
+			prev_hubp_count++;
+		if (context->res_ctx.pipe_ctx[i].plane_state)
+			hubp_count++;
+	}
+
+	if (prev_hubp_count == 0 && hubp_count > 0) {
+		if (dc->res_pool->hubbub->funcs->force_pstate_change_control)
+			dc->res_pool->hubbub->funcs->force_pstate_change_control(
+					dc->res_pool->hubbub, true, false);
+		udelay(500);
+	}
+
 	/* Set pipe update flags and lock pipes */
 	for (i = 0; i < dc->res_pool->pipe_count; i++)
-		dcn20_detect_pipe_changes(&dc->current_state->res_ctx.pipe_ctx[i],
+		dcn20_detect_pipe_changes(dc->current_state, context, &dc->current_state->res_ctx.pipe_ctx[i],
 				&context->res_ctx.pipe_ctx[i]);
 
 	/* When disabling phantom pipes, turn on phantom OTG first (so we can get double
@@ -1906,15 +1934,16 @@ void dcn20_program_front_end_for_ctx(
 		struct dc_stream_state *stream = dc->current_state->res_ctx.pipe_ctx[i].stream;
 
 		if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable && stream &&
-			dc->current_state->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) {
+				dc_state_get_pipe_subvp_type(dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]) == SUBVP_PHANTOM) {
 			struct timing_generator *tg = dc->current_state->res_ctx.pipe_ctx[i].stream_res.tg;
 
 			if (tg->funcs->enable_crtc) {
 				if (dc->hwss.blank_phantom) {
 					int main_pipe_width, main_pipe_height;
+					struct dc_stream_state *phantom_stream = dc_state_get_paired_subvp_stream(dc->current_state, dc->current_state->res_ctx.pipe_ctx[i].stream);
 
-					main_pipe_width = dc->current_state->res_ctx.pipe_ctx[i].stream->mall_stream_config.paired_stream->dst.width;
-					main_pipe_height = dc->current_state->res_ctx.pipe_ctx[i].stream->mall_stream_config.paired_stream->dst.height;
+					main_pipe_width = phantom_stream->dst.width;
+					main_pipe_height = phantom_stream->dst.height;
 					dc->hwss.blank_phantom(dc, tg, main_pipe_width, main_pipe_height);
 				}
 				tg->funcs->enable_crtc(tg);
@@ -1943,9 +1972,9 @@ void dcn20_program_front_end_for_ctx(
 			 * DET allocation.
 			 */
 			if (hubbub->funcs->program_det_size && (context->res_ctx.pipe_ctx[i].update_flags.bits.disable ||
-					(context->res_ctx.pipe_ctx[i].plane_state && context->res_ctx.pipe_ctx[i].plane_state->is_phantom)))
+					(context->res_ctx.pipe_ctx[i].plane_state && dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) == SUBVP_PHANTOM)))
 				hubbub->funcs->program_det_size(hubbub, dc->current_state->res_ctx.pipe_ctx[i].plane_res.hubp->inst, 0);
-			hws->funcs.plane_atomic_disconnect(dc, &dc->current_state->res_ctx.pipe_ctx[i]);
+			hws->funcs.plane_atomic_disconnect(dc, dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]);
 			DC_LOG_DC("Reset mpcc for pipe %d\n", dc->current_state->res_ctx.pipe_ctx[i].pipe_idx);
 		}
 
@@ -1968,7 +1997,7 @@ void dcn20_program_front_end_for_ctx(
 					 * but the MPO still exists until the double buffered update of the main pipe so we
 					 * will get a frame of underflow if the phantom pipe is programmed here.
 					 */
-					if (pipe->stream && pipe->stream->mall_stream_config.type != SUBVP_PHANTOM)
+					if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM)
 						dcn20_program_pipe(dc, pipe, context);
 				}
 
@@ -2018,7 +2047,7 @@ void dcn20_post_unlock_program_front_end(
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++)
 		if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable)
-			dc->hwss.disable_plane(dc, &dc->current_state->res_ctx.pipe_ctx[i]);
+			dc->hwss.disable_plane(dc, dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]);
 
 	/*
 	 * If we are enabling a pipe, we need to wait for pending clear as this is a critical
@@ -2030,7 +2059,7 @@ void dcn20_post_unlock_program_front_end(
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 		// Don't check flip pending on phantom pipes
 		if (pipe->plane_state && !pipe->top_pipe && pipe->update_flags.bits.enable &&
-				pipe->stream->mall_stream_config.type != SUBVP_PHANTOM) {
+				dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) {
 			struct hubp *hubp = pipe->plane_res.hubp;
 			int j = 0;
 			for (j = 0; j < TIMEOUT_FOR_PIPE_ENABLE_US / polling_interval_us
@@ -2039,6 +2068,10 @@ void dcn20_post_unlock_program_front_end(
 		}
 	}
 
+	if (dc->res_pool->hubbub->funcs->force_pstate_change_control)
+		dc->res_pool->hubbub->funcs->force_pstate_change_control(
+				dc->res_pool->hubbub, false, false);
+
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 
@@ -2049,7 +2082,7 @@ void dcn20_post_unlock_program_front_end(
 			 * programming sequence).
 			 */
 			while (pipe) {
-				if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+				if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) {
 					/* When turning on the phantom pipe we want to run through the
 					 * entire enable sequence, so apply all the "enable" flags.
 					 */
@@ -2119,7 +2152,7 @@ void dcn20_prepare_bandwidth(
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 
 		// At optimize don't restore the original watermark value
-		if (pipe->stream && pipe->stream->mall_stream_config.type != SUBVP_NONE) {
+		if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_NONE) {
 			context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 4U * 1000U * 1000U * 1000U;
 			break;
 		}
@@ -2163,7 +2196,7 @@ void dcn20_optimize_bandwidth(
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 
 		// At optimize don't need  to restore the original watermark value
-		if (pipe->stream && pipe->stream->mall_stream_config.type != SUBVP_NONE) {
+		if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_NONE) {
 			context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 4U * 1000U * 1000U * 1000U;
 			break;
 		}
@@ -2197,7 +2230,8 @@ void dcn20_optimize_bandwidth(
 			dc->clk_mgr,
 			context,
 			true);
-	if (context->bw_ctx.bw.dcn.clk.zstate_support == DCN_ZSTATE_SUPPORT_ALLOW) {
+	if (context->bw_ctx.bw.dcn.clk.zstate_support == DCN_ZSTATE_SUPPORT_ALLOW &&
+		!dc->debug.disable_extblankadj) {
 		for (i = 0; i < dc->res_pool->pipe_count; ++i) {
 			struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
 
@@ -2527,7 +2561,7 @@ void dcn20_setup_vupdate_interrupt(struct dc *dc, struct pipe_ctx *pipe_ctx)
 		tg->funcs->setup_vertical_interrupt2(tg, start_line);
 }
 
-static void dcn20_reset_back_end_for_pipe(
+void dcn20_reset_back_end_for_pipe(
 		struct dc *dc,
 		struct pipe_ctx *pipe_ctx,
 		struct dc_state *context)
@@ -2590,7 +2624,8 @@ static void dcn20_reset_back_end_for_pipe(
 		 * the case where the same symclk is shared across multiple otg
 		 * instances
 		 */
-		link->phy_state.symclk_ref_cnts.otg = 0;
+		if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal))
+			link->phy_state.symclk_ref_cnts.otg = 0;
 		if (link->phy_state.symclk_state == SYMCLK_ON_TX_OFF) {
 			link_hwss->disable_link_output(link,
 					&pipe_ctx->link_res, pipe_ctx->stream->signal);
@@ -2755,18 +2790,17 @@ void dcn20_enable_stream(struct pipe_ctx *pipe_ctx)
 	}
 
 	if (dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) {
-		dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst;
-		dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst, dp_hpo_inst);
-
-		phyd32clk = get_phyd32clk_src(link);
-		dccg->funcs->enable_symclk32_se(dccg, dp_hpo_inst, phyd32clk);
-
 		dto_params.otg_inst = tg->inst;
 		dto_params.pixclk_khz = pipe_ctx->stream->timing.pix_clk_100hz / 10;
 		dto_params.num_odm_segments = get_odm_segment_count(pipe_ctx);
 		dto_params.timing = &pipe_ctx->stream->timing;
 		dto_params.ref_dtbclk_khz = dc->clk_mgr->funcs->get_dtb_ref_clk_frequency(dc->clk_mgr);
 		dccg->funcs->set_dtbclk_dto(dccg, &dto_params);
+		dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst;
+		dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst, dp_hpo_inst);
+
+		phyd32clk = get_phyd32clk_src(link);
+		dccg->funcs->enable_symclk32_se(dccg, dp_hpo_inst, phyd32clk);
 	} else {
 		if (dccg->funcs->enable_symclk_se)
 			dccg->funcs->enable_symclk_se(dccg, stream_enc->stream_enc_inst,
@@ -2923,7 +2957,7 @@ void dcn20_fpga_init_hw(struct dc *dc)
 		dc->res_pool->opps[i]->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true;
 		pipe_ctx->stream_res.opp = dc->res_pool->opps[i];
 		/*to do*/
-		hws->funcs.plane_atomic_disconnect(dc, pipe_ctx);
+		hws->funcs.plane_atomic_disconnect(dc, context, pipe_ctx);
 	}
 
 	/* initialize DWB pointer to MCIF_WB */
@@ -2940,7 +2974,7 @@ void dcn20_fpga_init_hw(struct dc *dc)
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
 
-		dc->hwss.disable_plane(dc, pipe_ctx);
+		dc->hwss.disable_plane(dc, context, pipe_ctx);
 
 		pipe_ctx->stream_res.tg = NULL;
 		pipe_ctx->plane_res.hubp = NULL;
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h
index ab02e4e9c8c2..d950b3e54ec2 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h
@@ -52,7 +52,7 @@ void dcn20_program_output_csc(struct dc *dc,
 void dcn20_enable_stream(struct pipe_ctx *pipe_ctx);
 void dcn20_unblank_stream(struct pipe_ctx *pipe_ctx,
 		struct dc_link_settings *link_settings);
-void dcn20_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx);
+void dcn20_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx);
 void dcn20_disable_pixel_data(
 		struct dc *dc,
 		struct pipe_ctx *pipe_ctx,
@@ -84,6 +84,10 @@ enum dc_status dcn20_enable_stream_timing(
 void dcn20_disable_stream_gating(struct dc *dc, struct pipe_ctx *pipe_ctx);
 void dcn20_enable_stream_gating(struct dc *dc, struct pipe_ctx *pipe_ctx);
 void dcn20_setup_vupdate_interrupt(struct dc *dc, struct pipe_ctx *pipe_ctx);
+void dcn20_reset_back_end_for_pipe(
+		struct dc *dc,
+		struct pipe_ctx *pipe_ctx,
+		struct dc_state *context);
 void dcn20_init_blank(
 		struct dc *dc,
 		struct timing_generator *tg);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.c
index 884e3e323338..884e3e323338 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.h
index 12277797cd71..12277797cd71 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.c
index d3fe6092f50e..d5769f38874f 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.c
@@ -320,7 +320,7 @@ void dcn201_init_hw(struct dc *dc)
 		res_pool->opps[i]->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true;
 		pipe_ctx->stream_res.opp = res_pool->opps[i];
 		/*To do: number of MPCC != number of opp*/
-		hws->funcs.plane_atomic_disconnect(dc, pipe_ctx);
+		hws->funcs.plane_atomic_disconnect(dc, context, pipe_ctx);
 	}
 
 	/* initialize DWB pointer to MCIF_WB */
@@ -337,7 +337,7 @@ void dcn201_init_hw(struct dc *dc)
 	for (i = 0; i < res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
 
-		dc->hwss.disable_plane(dc, pipe_ctx);
+		dc->hwss.disable_plane(dc, context, pipe_ctx);
 
 		pipe_ctx->stream_res.tg = NULL;
 		pipe_ctx->plane_res.hubp = NULL;
@@ -369,7 +369,9 @@ void dcn201_init_hw(struct dc *dc)
 }
 
 /* trigger HW to start disconnect plane from stream on the next vsync */
-void dcn201_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx)
+void dcn201_plane_atomic_disconnect(struct dc *dc,
+		struct dc_state *state,
+		struct pipe_ctx *pipe_ctx)
 {
 	struct dce_hwseq *hws = dc->hwseq;
 	struct hubp *hubp = pipe_ctx->plane_res.hubp;
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.h
index 26cd62be6418..6a50a9894be6 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.h
@@ -33,7 +33,7 @@ void dcn201_init_hw(struct dc *dc);
 void dcn201_unblank_stream(struct pipe_ctx *pipe_ctx,
 		struct dc_link_settings *link_settings);
 void dcn201_update_plane_addr(const struct dc *dc, struct pipe_ctx *pipe_ctx);
-void dcn201_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx);
+void dcn201_plane_atomic_disconnect(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx);
 void dcn201_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx);
 void dcn201_set_cursor_attribute(struct pipe_ctx *pipe_ctx);
 void dcn201_pipe_control_lock(
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.c
index a13bf6c9386e..a13bf6c9386e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.h
index 1168887b033d..1168887b033d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c
index 467812cf3368..7252f5f781f0 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c
@@ -137,7 +137,8 @@ void dcn21_PLAT_58856_wa(struct dc_state *context, struct pipe_ctx *pipe_ctx)
 	pipe_ctx->stream->dpms_off = true;
 }
 
-static bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t option, uint32_t panel_inst)
+static bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst,
+		uint32_t option, uint32_t panel_inst, uint32_t pwrseq_inst)
 {
 	union dmub_rb_cmd cmd;
 	struct dc_context *dc = abm->ctx;
@@ -147,12 +148,13 @@ static bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t optio
 	cmd.abm_set_pipe.header.type = DMUB_CMD__ABM;
 	cmd.abm_set_pipe.header.sub_type = DMUB_CMD__ABM_SET_PIPE;
 	cmd.abm_set_pipe.abm_set_pipe_data.otg_inst = otg_inst;
+	cmd.abm_set_pipe.abm_set_pipe_data.pwrseq_inst = pwrseq_inst;
 	cmd.abm_set_pipe.abm_set_pipe_data.set_pipe_option = option;
 	cmd.abm_set_pipe.abm_set_pipe_data.panel_inst = panel_inst;
 	cmd.abm_set_pipe.abm_set_pipe_data.ramping_boundary = ramping_boundary;
 	cmd.abm_set_pipe.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_pipe_data);
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	return true;
 }
@@ -171,7 +173,7 @@ static void dmub_abm_set_backlight(struct dc_context *dc, uint32_t backlight_pwm
 	cmd.abm_set_backlight.abm_set_backlight_data.panel_mask = (0x01 << panel_inst);
 	cmd.abm_set_backlight.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_backlight_data);
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 void dcn21_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx)
@@ -179,7 +181,6 @@ void dcn21_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx)
 	struct abm *abm = pipe_ctx->stream_res.abm;
 	uint32_t otg_inst = pipe_ctx->stream_res.tg->inst;
 	struct panel_cntl *panel_cntl = pipe_ctx->stream->link->panel_cntl;
-
 	struct dmcu *dmcu = pipe_ctx->stream->ctx->dc->res_pool->dmcu;
 
 	if (dmcu) {
@@ -190,9 +191,13 @@ void dcn21_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx)
 	if (abm && panel_cntl) {
 		if (abm->funcs && abm->funcs->set_pipe_ex) {
 			abm->funcs->set_pipe_ex(abm, otg_inst, SET_ABM_PIPE_IMMEDIATELY_DISABLE,
-			panel_cntl->inst);
+					panel_cntl->inst, panel_cntl->pwrseq_inst);
 		} else {
-			dmub_abm_set_pipe(abm, otg_inst, SET_ABM_PIPE_IMMEDIATELY_DISABLE, panel_cntl->inst);
+				dmub_abm_set_pipe(abm,
+						otg_inst,
+						SET_ABM_PIPE_IMMEDIATELY_DISABLE,
+						panel_cntl->inst,
+						panel_cntl->pwrseq_inst);
 		}
 		panel_cntl->funcs->store_backlight_level(panel_cntl);
 	}
@@ -201,21 +206,32 @@ void dcn21_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx)
 void dcn21_set_pipe(struct pipe_ctx *pipe_ctx)
 {
 	struct abm *abm = pipe_ctx->stream_res.abm;
-	uint32_t otg_inst = pipe_ctx->stream_res.tg->inst;
+	struct timing_generator *tg = pipe_ctx->stream_res.tg;
 	struct panel_cntl *panel_cntl = pipe_ctx->stream->link->panel_cntl;
 	struct dmcu *dmcu = pipe_ctx->stream->ctx->dc->res_pool->dmcu;
+	uint32_t otg_inst;
+
+	if (!abm || !tg || !panel_cntl)
+		return;
+
+	otg_inst = tg->inst;
 
 	if (dmcu) {
 		dce110_set_pipe(pipe_ctx);
 		return;
 	}
 
-	if (abm && panel_cntl) {
-		if (abm->funcs && abm->funcs->set_pipe_ex) {
-			abm->funcs->set_pipe_ex(abm, otg_inst, SET_ABM_PIPE_NORMAL, panel_cntl->inst);
-		} else {
-			dmub_abm_set_pipe(abm, otg_inst, SET_ABM_PIPE_NORMAL, panel_cntl->inst);
-		}
+	if (abm->funcs && abm->funcs->set_pipe_ex) {
+		abm->funcs->set_pipe_ex(abm,
+					otg_inst,
+					SET_ABM_PIPE_NORMAL,
+					panel_cntl->inst,
+					panel_cntl->pwrseq_inst);
+	} else {
+		dmub_abm_set_pipe(abm, otg_inst,
+				  SET_ABM_PIPE_NORMAL,
+				  panel_cntl->inst,
+				  panel_cntl->pwrseq_inst);
 	}
 }
 
@@ -225,26 +241,35 @@ bool dcn21_set_backlight_level(struct pipe_ctx *pipe_ctx,
 {
 	struct dc_context *dc = pipe_ctx->stream->ctx;
 	struct abm *abm = pipe_ctx->stream_res.abm;
+	struct timing_generator *tg = pipe_ctx->stream_res.tg;
 	struct panel_cntl *panel_cntl = pipe_ctx->stream->link->panel_cntl;
+	uint32_t otg_inst;
+
+	if (!abm || !tg || !panel_cntl)
+		return false;
+
+	otg_inst = tg->inst;
 
 	if (dc->dc->res_pool->dmcu) {
 		dce110_set_backlight_level(pipe_ctx, backlight_pwm_u16_16, frame_ramp);
 		return true;
 	}
 
-	if (abm != NULL) {
-		uint32_t otg_inst = pipe_ctx->stream_res.tg->inst;
-
-		if (abm && panel_cntl) {
-			if (abm->funcs && abm->funcs->set_pipe_ex) {
-				abm->funcs->set_pipe_ex(abm, otg_inst, SET_ABM_PIPE_NORMAL, panel_cntl->inst);
-			} else {
-				dmub_abm_set_pipe(abm, otg_inst, SET_ABM_PIPE_NORMAL, panel_cntl->inst);
-			}
-		}
+	if (abm->funcs && abm->funcs->set_pipe_ex) {
+		abm->funcs->set_pipe_ex(abm,
+					otg_inst,
+					SET_ABM_PIPE_NORMAL,
+					panel_cntl->inst,
+					panel_cntl->pwrseq_inst);
+	} else {
+		dmub_abm_set_pipe(abm,
+				  otg_inst,
+				  SET_ABM_PIPE_NORMAL,
+				  panel_cntl->inst,
+				  panel_cntl->pwrseq_inst);
 	}
 
-	if (abm && abm->funcs && abm->funcs->set_backlight_level_pwm)
+	if (abm->funcs && abm->funcs->set_backlight_level_pwm)
 		abm->funcs->set_backlight_level_pwm(abm, backlight_pwm_u16_16,
 			frame_ramp, 0, panel_cntl->inst);
 	else
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c
index 18249c6b6d81..18249c6b6d81 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.h
index 3ed24292648a..3ed24292648a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c
index d71faf2ecd41..c34c13e1e0a4 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c
@@ -51,7 +51,7 @@
 #include "dcn20/dcn20_hwseq.h"
 #include "dcn30/dcn30_resource.h"
 #include "link.h"
-
+#include "dc_state_priv.h"
 
 
 
@@ -367,6 +367,10 @@ void dcn30_enable_writeback(
 	DC_LOG_DWB("%s dwb_pipe_inst = %d, mpcc_inst = %d",\
 		__func__, wb_info->dwb_pipe_inst,\
 		wb_info->mpcc_inst);
+
+	/* Warmup interface */
+	dcn30_mmhubbub_warmup(dc, 1, wb_info);
+
 	/* Update writeback pipe */
 	dcn30_set_writeback(dc, wb_info, context);
 
@@ -472,6 +476,7 @@ void dcn30_init_hw(struct dc *dc)
 	int i;
 	int edp_num;
 	uint32_t backlight = MAX_BACKLIGHT_LEVEL;
+	uint32_t user_level = MAX_BACKLIGHT_LEVEL;
 
 	if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks)
 		dc->clk_mgr->funcs->init_clocks(dc->clk_mgr);
@@ -608,13 +613,15 @@ void dcn30_init_hw(struct dc *dc)
 	for (i = 0; i < dc->link_count; i++) {
 		struct dc_link *link = dc->links[i];
 
-		if (link->panel_cntl)
+		if (link->panel_cntl) {
 			backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl);
+			user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL;
+		}
 	}
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		if (abms[i] != NULL)
-			abms[i]->funcs->abm_init(abms[i], backlight);
+			abms[i]->funcs->abm_init(abms[i], backlight, user_level);
 	}
 
 	/* power AFMT HDMI memory TODO: may move to dis/en output save power*/
@@ -750,7 +757,7 @@ bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable)
 				cmd.mall.header.sub_type = DMUB_CMD__MALL_ACTION_NO_DF_REQ;
 				cmd.mall.header.payload_bytes = sizeof(cmd.mall) - sizeof(cmd.mall.header);
 
-				dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
+				dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
 
 				return true;
 			}
@@ -872,7 +879,7 @@ bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable)
 					cmd.mall.cursor_height = cursor_attr.height;
 					cmd.mall.cursor_pitch = cursor_attr.pitch;
 
-					dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+					dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 					/* Use copied cursor, and it's okay to not switch back */
 					cursor_attr.address.quad_part = cmd.mall.cursor_copy_dst.quad_part;
@@ -888,7 +895,7 @@ bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable)
 				cmd.mall.tmr_scale = tmr_scale;
 				cmd.mall.debug_bits = dc->debug.mall_error_as_fatal;
 
-				dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
+				dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
 
 				return true;
 			}
@@ -905,7 +912,7 @@ bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable)
 	cmd.mall.header.payload_bytes =
 		sizeof(cmd.mall) - sizeof(cmd.mall.header);
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	return true;
 }
@@ -962,7 +969,7 @@ void dcn30_hardware_release(struct dc *dc)
 		if (!pipe->stream)
 			continue;
 
-		if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
+		if (dc_state_get_pipe_subvp_type(dc->current_state, pipe) == SUBVP_MAIN) {
 			subvp_in_use = true;
 			break;
 		}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c
index 9894caedffed..9894caedffed 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.h
index c280ff90bfa3..c280ff90bfa3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c
index 6477009ce065..6477009ce065 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.h
index 0bca48ccbfa2..0bca48ccbfa2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_init.c
index 637f9514d37b..637f9514d37b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_init.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_init.h
index 899587b93aa1..899587b93aa1 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_init.c
index edb4d68b8187..edb4d68b8187 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_init.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_init.h
index 4949981126d7..4949981126d7 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c
index 97798cee876e..7423880fabb6 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c
@@ -96,7 +96,8 @@ static void enable_memory_low_power(struct dc *dc)
 	if (dc->debug.enable_mem_low_power.bits.vpg && dc->res_pool->stream_enc[0]->vpg->funcs->vpg_powerdown) {
 		// Power down VPGs
 		for (i = 0; i < dc->res_pool->stream_enc_count; i++)
-			dc->res_pool->stream_enc[i]->vpg->funcs->vpg_powerdown(dc->res_pool->stream_enc[i]->vpg);
+			if (dc->res_pool->stream_enc[i]->vpg)
+				dc->res_pool->stream_enc[i]->vpg->funcs->vpg_powerdown(dc->res_pool->stream_enc[i]->vpg);
 #if defined(CONFIG_DRM_AMD_DC_FP)
 		for (i = 0; i < dc->res_pool->hpo_dp_stream_enc_count; i++)
 			dc->res_pool->hpo_dp_stream_enc[i]->vpg->funcs->vpg_powerdown(dc->res_pool->hpo_dp_stream_enc[i]->vpg);
@@ -112,6 +113,7 @@ void dcn31_init_hw(struct dc *dc)
 	struct dc_bios *dcb = dc->ctx->dc_bios;
 	struct resource_pool *res_pool = dc->res_pool;
 	uint32_t backlight = MAX_BACKLIGHT_LEVEL;
+	uint32_t user_level = MAX_BACKLIGHT_LEVEL;
 	int i;
 
 	if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks)
@@ -223,13 +225,15 @@ void dcn31_init_hw(struct dc *dc)
 	for (i = 0; i < dc->link_count; i++) {
 		struct dc_link *link = dc->links[i];
 
-		if (link->panel_cntl)
+		if (link->panel_cntl) {
 			backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl);
+			user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL;
+		}
 	}
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		if (abms[i] != NULL)
-			abms[i]->funcs->abm_init(abms[i], backlight);
+			abms[i]->funcs->abm_init(abms[i], backlight, user_level);
 	}
 
 	/* power AFMT HDMI memory TODO: may move to dis/en output save power*/
@@ -415,7 +419,7 @@ void dcn31_z10_save_init(struct dc *dc)
 	cmd.dcn_restore.header.type = DMUB_CMD__IDLE_OPT;
 	cmd.dcn_restore.header.sub_type = DMUB_CMD__IDLE_OPT_DCN_SAVE_INIT;
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 void dcn31_z10_restore(const struct dc *dc)
@@ -433,7 +437,7 @@ void dcn31_z10_restore(const struct dc *dc)
 	cmd.dcn_restore.header.type = DMUB_CMD__IDLE_OPT;
 	cmd.dcn_restore.header.sub_type = DMUB_CMD__IDLE_OPT_DCN_RESTORE;
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 void dcn31_hubp_pg_control(struct dce_hwseq *hws, unsigned int hubp_inst, bool power_on)
@@ -523,7 +527,8 @@ static void dcn31_reset_back_end_for_pipe(
 	if (pipe_ctx->stream_res.tg->funcs->set_odm_bypass)
 		pipe_ctx->stream_res.tg->funcs->set_odm_bypass(
 				pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing);
-	pipe_ctx->stream->link->phy_state.symclk_ref_cnts.otg = 0;
+	if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal))
+		pipe_ctx->stream->link->phy_state.symclk_ref_cnts.otg = 0;
 
 	if (pipe_ctx->stream_res.tg->funcs->set_drr)
 		pipe_ctx->stream_res.tg->funcs->set_drr(
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c
index 669f524bd064..669f524bd064 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.h
index a3db08c8bd35..a3db08c8bd35 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c
index ccb7e317e86a..ccb7e317e86a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.h
index 8f92e66577cf..8f92e66577cf 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
index 6a65af8c36b9..aa36d7a56ca8 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
@@ -51,6 +51,7 @@
 #include "dcn32/dcn32_resource.h"
 #include "link.h"
 #include "../dcn20/dcn20_hwseq.h"
+#include "dc_state_priv.h"
 
 #define DC_LOGGER_INIT(logger)
 
@@ -277,7 +278,7 @@ bool dcn32_apply_idle_power_optimizations(struct dc *dc, bool enable)
 				cmd.cab.header.sub_type = DMUB_CMD__CAB_NO_DCN_REQ;
 				cmd.cab.header.payload_bytes = sizeof(cmd.cab) - sizeof(cmd.cab.header);
 
-				dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
+				dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
 
 				return true;
 			}
@@ -311,7 +312,7 @@ bool dcn32_apply_idle_power_optimizations(struct dc *dc, bool enable)
 				cmd.cab.header.payload_bytes = sizeof(cmd.cab) - sizeof(cmd.cab.header);
 				cmd.cab.cab_alloc_ways = (uint8_t)ways;
 
-				dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
+				dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
 
 				return true;
 			}
@@ -327,7 +328,7 @@ bool dcn32_apply_idle_power_optimizations(struct dc *dc, bool enable)
 	cmd.cab.header.payload_bytes =
 			sizeof(cmd.cab) - sizeof(cmd.cab.header);
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	return true;
 }
@@ -348,8 +349,7 @@ void dcn32_commit_subvp_config(struct dc *dc, struct dc_state *context)
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
 
-		if (pipe_ctx->stream && pipe_ctx->stream->mall_stream_config.paired_stream &&
-				pipe_ctx->stream->mall_stream_config.type == SUBVP_MAIN) {
+		if (pipe_ctx->stream && dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_MAIN) {
 			// There is at least 1 SubVP pipe, so enable SubVP
 			enable_subvp = true;
 			break;
@@ -375,18 +375,20 @@ void dcn32_subvp_pipe_control_lock(struct dc *dc,
 	bool subvp_immediate_flip = false;
 	bool subvp_in_use = false;
 	struct pipe_ctx *pipe;
+	enum mall_stream_type pipe_mall_type = SUBVP_NONE;
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		pipe = &context->res_ctx.pipe_ctx[i];
+		pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe);
 
-		if (pipe->stream && pipe->plane_state && pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
+		if (pipe->stream && pipe->plane_state && pipe_mall_type == SUBVP_MAIN) {
 			subvp_in_use = true;
 			break;
 		}
 	}
 
 	if (top_pipe_to_program && top_pipe_to_program->stream && top_pipe_to_program->plane_state) {
-		if (top_pipe_to_program->stream->mall_stream_config.type == SUBVP_MAIN &&
+		if (dc_state_get_pipe_subvp_type(context, top_pipe_to_program) == SUBVP_MAIN &&
 				top_pipe_to_program->plane_state->flip_immediate)
 			subvp_immediate_flip = true;
 	}
@@ -398,7 +400,7 @@ void dcn32_subvp_pipe_control_lock(struct dc *dc,
 		if (!lock) {
 			for (i = 0; i < dc->res_pool->pipe_count; i++) {
 				pipe = &context->res_ctx.pipe_ctx[i];
-				if (pipe->stream && pipe->plane_state && pipe->stream->mall_stream_config.type == SUBVP_MAIN &&
+				if (pipe->stream && pipe->plane_state && pipe_mall_type == SUBVP_MAIN &&
 						should_lock_all_pipes)
 					pipe->stream_res.tg->funcs->wait_for_state(pipe->stream_res.tg, CRTC_STATE_VBLANK);
 			}
@@ -416,14 +418,7 @@ void dcn32_subvp_pipe_control_lock_fast(union block_sequence_params *params)
 {
 	struct dc *dc = params->subvp_pipe_control_lock_fast_params.dc;
 	bool lock = params->subvp_pipe_control_lock_fast_params.lock;
-	struct pipe_ctx *pipe_ctx = params->subvp_pipe_control_lock_fast_params.pipe_ctx;
-	bool subvp_immediate_flip = false;
-
-	if (pipe_ctx && pipe_ctx->stream && pipe_ctx->plane_state) {
-		if (pipe_ctx->stream->mall_stream_config.type == SUBVP_MAIN &&
-				pipe_ctx->plane_state->flip_immediate)
-			subvp_immediate_flip = true;
-	}
+	bool subvp_immediate_flip = params->subvp_pipe_control_lock_fast_params.subvp_immediate_flip;
 
 	// Don't need to lock for DRR VSYNC flips -- FW will wait for DRR pending update cleared.
 	if (subvp_immediate_flip) {
@@ -487,8 +482,7 @@ bool dcn32_set_mcm_luts(
 		if (plane_state->blend_tf->type == TF_TYPE_HWPWL)
 			lut_params = &plane_state->blend_tf->pwl;
 		else if (plane_state->blend_tf->type == TF_TYPE_DISTRIBUTED_POINTS) {
-			cm_helper_translate_curve_to_hw_format(plane_state->ctx,
-					plane_state->blend_tf,
+			cm3_helper_translate_curve_to_hw_format(plane_state->blend_tf,
 					&dpp_base->regamma_params, false);
 			lut_params = &dpp_base->regamma_params;
 		}
@@ -503,8 +497,7 @@ bool dcn32_set_mcm_luts(
 		else if (plane_state->in_shaper_func->type == TF_TYPE_DISTRIBUTED_POINTS) {
 			// TODO: dpp_base replace
 			ASSERT(false);
-			cm_helper_translate_curve_to_hw_format(plane_state->ctx,
-					plane_state->in_shaper_func,
+			cm3_helper_translate_curve_to_hw_format(plane_state->in_shaper_func,
 					&dpp_base->shaper_params, true);
 			lut_params = &dpp_base->shaper_params;
 		}
@@ -611,7 +604,7 @@ void dcn32_update_force_pstate(struct dc *dc, struct dc_state *context)
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 		struct hubp *hubp = pipe->plane_res.hubp;
 
-		if (!pipe->stream || !(pipe->stream->mall_stream_config.type == SUBVP_MAIN ||
+		if (!pipe->stream || !(dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN ||
 		    pipe->stream->fpo_in_use)) {
 			if (hubp && hubp->funcs->hubp_update_force_pstate_disallow)
 				hubp->funcs->hubp_update_force_pstate_disallow(hubp, false);
@@ -626,7 +619,7 @@ void dcn32_update_force_pstate(struct dc *dc, struct dc_state *context)
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 		struct hubp *hubp = pipe->plane_res.hubp;
 
-		if (pipe->stream && (pipe->stream->mall_stream_config.type == SUBVP_MAIN ||
+		if (pipe->stream && (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN ||
 				pipe->stream->fpo_in_use)) {
 			if (hubp && hubp->funcs->hubp_update_force_pstate_disallow)
 				hubp->funcs->hubp_update_force_pstate_disallow(hubp, true);
@@ -673,8 +666,8 @@ void dcn32_update_mall_sel(struct dc *dc, struct dc_state *context)
 			if (cursor_size > 16384)
 				cache_cursor = true;
 
-			if (pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
-					hubp->funcs->hubp_update_mall_sel(hubp, 1, false);
+			if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) {
+				hubp->funcs->hubp_update_mall_sel(hubp, 1, false);
 			} else {
 				// MALL not supported with Stereo3D
 				hubp->funcs->hubp_update_mall_sel(hubp,
@@ -716,9 +709,8 @@ void dcn32_program_mall_pipe_config(struct dc *dc, struct dc_state *context)
 			 *        see if CURSOR_REQ_MODE will be back to 1 for SubVP
 			 *        when it should be 0 for MPO
 			 */
-			if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
+			if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN)
 				hubp->funcs->hubp_prepare_subvp_buffering(hubp, true);
-			}
 		}
 	}
 }
@@ -761,6 +753,7 @@ void dcn32_init_hw(struct dc *dc)
 	int i;
 	int edp_num;
 	uint32_t backlight = MAX_BACKLIGHT_LEVEL;
+	uint32_t user_level = MAX_BACKLIGHT_LEVEL;
 
 	if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks)
 		dc->clk_mgr->funcs->init_clocks(dc->clk_mgr);
@@ -915,13 +908,15 @@ void dcn32_init_hw(struct dc *dc)
 	for (i = 0; i < dc->link_count; i++) {
 		struct dc_link *link = dc->links[i];
 
-		if (link->panel_cntl)
+		if (link->panel_cntl) {
 			backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl);
+			user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL;
+		}
 	}
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		if (abms[i] != NULL && abms[i]->funcs != NULL)
-			abms[i]->funcs->abm_init(abms[i], backlight);
+			abms[i]->funcs->abm_init(abms[i], backlight, user_level);
 	}
 
 	/* power AFMT HDMI memory TODO: may move to dis/en output save power*/
@@ -962,6 +957,12 @@ void dcn32_init_hw(struct dc *dc)
 		dc->caps.dmub_caps.subvp_psr = dc->ctx->dmub_srv->dmub->feature_caps.subvp_psr_support;
 		dc->caps.dmub_caps.gecc_enable = dc->ctx->dmub_srv->dmub->feature_caps.gecc_enable;
 		dc->caps.dmub_caps.mclk_sw = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch;
+
+		if (dc->ctx->dmub_srv->dmub->fw_version <
+		    DMUB_FW_VERSION(7, 0, 35)) {
+			dc->debug.force_disable_subvp = true;
+			dc->debug.disable_fpo_optimizations = true;
+		}
 	}
 }
 
@@ -991,9 +992,22 @@ static int calc_mpc_flow_ctrl_cnt(const struct dc_stream_state *stream,
 static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
 {
 	struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc;
+	struct dc *dc = pipe_ctx->stream->ctx->dc;
 	struct dc_stream_state *stream = pipe_ctx->stream;
 	struct pipe_ctx *odm_pipe;
 	int opp_cnt = 1;
+	struct dccg *dccg = dc->res_pool->dccg;
+	/* It has been found that when DSCCLK is lower than 16Mhz, we will get DCN
+	 * register access hung. When DSCCLk is based on refclk, DSCCLk is always a
+	 * fixed value higher than 16Mhz so the issue doesn't occur. When DSCCLK is
+	 * generated by DTO, DSCCLK would be based on 1/3 dispclk. For small timings
+	 * with DSC such as 480p60Hz, the dispclk could be low enough to trigger
+	 * this problem. We are implementing a workaround here to keep using dscclk
+	 * based on fixed value refclk when timing is smaller than 3x16Mhz (i.e
+	 * 48Mhz) pixel clock to avoid hitting this problem.
+	 */
+	bool should_use_dto_dscclk = (dccg->funcs->set_dto_dscclk != NULL) &&
+			stream->timing.pix_clk_100hz > 480000;
 
 	ASSERT(dsc);
 	for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe)
@@ -1016,12 +1030,16 @@ static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
 
 		dsc->funcs->dsc_set_config(dsc, &dsc_cfg, &dsc_optc_cfg);
 		dsc->funcs->dsc_enable(dsc, pipe_ctx->stream_res.opp->inst);
+		if (should_use_dto_dscclk)
+			dccg->funcs->set_dto_dscclk(dccg, dsc->inst);
 		for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
 			struct display_stream_compressor *odm_dsc = odm_pipe->stream_res.dsc;
 
 			ASSERT(odm_dsc);
 			odm_dsc->funcs->dsc_set_config(odm_dsc, &dsc_cfg, &dsc_optc_cfg);
 			odm_dsc->funcs->dsc_enable(odm_dsc, odm_pipe->stream_res.opp->inst);
+			if (should_use_dto_dscclk)
+				dccg->funcs->set_dto_dscclk(dccg, odm_dsc->inst);
 		}
 		dsc_cfg.dc_dsc_cfg.num_slices_h *= opp_cnt;
 		dsc_cfg.pic_width *= opp_cnt;
@@ -1041,9 +1059,13 @@ static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
 				OPTC_DSC_DISABLED, 0, 0);
 
 		/* disable DSC block */
+		if (dccg->funcs->set_ref_dscclk)
+			dccg->funcs->set_ref_dscclk(dccg, pipe_ctx->stream_res.dsc->inst);
 		dsc->funcs->dsc_disable(pipe_ctx->stream_res.dsc);
 		for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
 			ASSERT(odm_pipe->stream_res.dsc);
+			if (dccg->funcs->set_ref_dscclk)
+				dccg->funcs->set_ref_dscclk(dccg, odm_pipe->stream_res.dsc->inst);
 			odm_pipe->stream_res.dsc->funcs->dsc_disable(odm_pipe->stream_res.dsc);
 		}
 	}
@@ -1126,6 +1148,10 @@ void dcn32_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx *
 		if (!pipe_ctx->next_odm_pipe && current_pipe_ctx->next_odm_pipe &&
 				current_pipe_ctx->next_odm_pipe->stream_res.dsc) {
 			struct display_stream_compressor *dsc = current_pipe_ctx->next_odm_pipe->stream_res.dsc;
+			struct dccg *dccg = dc->res_pool->dccg;
+
+			if (dccg->funcs->set_ref_dscclk)
+				dccg->funcs->set_ref_dscclk(dccg, dsc->inst);
 			/* disconnect DSC block from stream */
 			dsc->funcs->dsc_disconnect(dsc);
 		}
@@ -1199,7 +1225,7 @@ void dcn32_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_
 			continue;
 
 		if ((pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal))
-			&& pipe->stream->mall_stream_config.type != SUBVP_PHANTOM) {
+			&& dc_state_get_pipe_subvp_type(dc->current_state, pipe) != SUBVP_PHANTOM) {
 			pipe->stream_res.tg->funcs->disable_crtc(pipe->stream_res.tg);
 			reset_sync_context_for_pipe(dc, context, i);
 			otg_disabled[i] = true;
@@ -1350,8 +1376,8 @@ void dcn32_update_phantom_vp_position(struct dc *dc,
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 
-		if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_MAIN &&
-				pipe->stream->mall_stream_config.paired_stream == phantom_pipe->stream) {
+		if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN &&
+				dc_state_get_paired_subvp_stream(context, pipe->stream) == phantom_pipe->stream) {
 			if (pipe->plane_state && pipe->plane_state->update_flags.bits.position_change) {
 
 				phantom_plane->src_rect.x = pipe->plane_state->src_rect.x;
@@ -1376,21 +1402,19 @@ void dcn32_update_phantom_vp_position(struct dc *dc,
 void dcn32_apply_update_flags_for_phantom(struct pipe_ctx *phantom_pipe)
 {
 	phantom_pipe->update_flags.raw = 0;
-	if (phantom_pipe->stream && phantom_pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
-		if (resource_is_pipe_type(phantom_pipe, DPP_PIPE)) {
-			phantom_pipe->update_flags.bits.enable = 1;
-			phantom_pipe->update_flags.bits.mpcc = 1;
-			phantom_pipe->update_flags.bits.dppclk = 1;
-			phantom_pipe->update_flags.bits.hubp_interdependent = 1;
-			phantom_pipe->update_flags.bits.hubp_rq_dlg_ttu = 1;
-			phantom_pipe->update_flags.bits.gamut_remap = 1;
-			phantom_pipe->update_flags.bits.scaler = 1;
-			phantom_pipe->update_flags.bits.viewport = 1;
-			phantom_pipe->update_flags.bits.det_size = 1;
-			if (resource_is_pipe_type(phantom_pipe, OTG_MASTER)) {
-				phantom_pipe->update_flags.bits.odm = 1;
-				phantom_pipe->update_flags.bits.global_sync = 1;
-			}
+	if (resource_is_pipe_type(phantom_pipe, DPP_PIPE)) {
+		phantom_pipe->update_flags.bits.enable = 1;
+		phantom_pipe->update_flags.bits.mpcc = 1;
+		phantom_pipe->update_flags.bits.dppclk = 1;
+		phantom_pipe->update_flags.bits.hubp_interdependent = 1;
+		phantom_pipe->update_flags.bits.hubp_rq_dlg_ttu = 1;
+		phantom_pipe->update_flags.bits.gamut_remap = 1;
+		phantom_pipe->update_flags.bits.scaler = 1;
+		phantom_pipe->update_flags.bits.viewport = 1;
+		phantom_pipe->update_flags.bits.det_size = 1;
+		if (resource_is_pipe_type(phantom_pipe, OTG_MASTER)) {
+			phantom_pipe->update_flags.bits.odm = 1;
+			phantom_pipe->update_flags.bits.global_sync = 1;
 		}
 	}
 }
@@ -1450,9 +1474,44 @@ void dcn32_update_dsc_pg(struct dc *dc,
 	}
 }
 
+void dcn32_disable_phantom_streams(struct dc *dc, struct dc_state *context)
+{
+	struct dce_hwseq *hws = dc->hwseq;
+	int i;
+
+	for (i = dc->res_pool->pipe_count - 1; i >= 0 ; i--) {
+		struct pipe_ctx *pipe_ctx_old =
+			&dc->current_state->res_ctx.pipe_ctx[i];
+		struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+		if (!pipe_ctx_old->stream)
+			continue;
+
+		if (dc_state_get_pipe_subvp_type(dc->current_state, pipe_ctx_old) != SUBVP_PHANTOM)
+			continue;
+
+		if (pipe_ctx_old->top_pipe || pipe_ctx_old->prev_odm_pipe)
+			continue;
+
+		if (!pipe_ctx->stream || pipe_need_reprogram(pipe_ctx_old, pipe_ctx) ||
+				(pipe_ctx->stream && dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM)) {
+			struct clock_source *old_clk = pipe_ctx_old->clock_source;
+
+			if (hws->funcs.reset_back_end_for_pipe)
+				hws->funcs.reset_back_end_for_pipe(dc, pipe_ctx_old, dc->current_state);
+			if (hws->funcs.enable_stream_gating)
+				hws->funcs.enable_stream_gating(dc, pipe_ctx_old);
+			if (old_clk)
+				old_clk->funcs->cs_power_down(old_clk);
+		}
+	}
+}
+
 void dcn32_enable_phantom_streams(struct dc *dc, struct dc_state *context)
 {
 	unsigned int i;
+	enum dc_status status = DC_OK;
+	struct dce_hwseq *hws = dc->hwseq;
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
@@ -1462,8 +1521,8 @@ void dcn32_enable_phantom_streams(struct dc *dc, struct dc_state *context)
 		 * pipe, wait for the double buffer update to complete first before we do
 		 * ANY phantom pipe programming.
 		 */
-		if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM &&
-				old_pipe->stream && old_pipe->stream->mall_stream_config.type != SUBVP_PHANTOM) {
+		if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM &&
+				old_pipe->stream && dc_state_get_pipe_subvp_type(dc->current_state, old_pipe) != SUBVP_PHANTOM) {
 			old_pipe->stream_res.tg->funcs->wait_for_state(
 					old_pipe->stream_res.tg,
 					CRTC_STATE_VBLANK);
@@ -1473,16 +1532,39 @@ void dcn32_enable_phantom_streams(struct dc *dc, struct dc_state *context)
 		}
 	}
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i];
-
-		if (new_pipe->stream && new_pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
-			// If old context or new context has phantom pipes, apply
-			// the phantom timings now. We can't change the phantom
-			// pipe configuration safely without driver acquiring
-			// the DMCUB lock first.
-			dc->hwss.apply_ctx_to_hw(dc, context);
-			break;
+		struct pipe_ctx *pipe_ctx_old =
+					&dc->current_state->res_ctx.pipe_ctx[i];
+		struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+		if (pipe_ctx->stream == NULL)
+			continue;
+
+		if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM)
+			continue;
+
+		if (pipe_ctx->stream == pipe_ctx_old->stream &&
+			pipe_ctx->stream->link->link_state_valid) {
+			continue;
 		}
+
+		if (pipe_ctx_old->stream && !pipe_need_reprogram(pipe_ctx_old, pipe_ctx))
+			continue;
+
+		if (pipe_ctx->top_pipe || pipe_ctx->prev_odm_pipe)
+			continue;
+
+		if (hws->funcs.apply_single_controller_ctx_to_hw)
+			status = hws->funcs.apply_single_controller_ctx_to_hw(
+					pipe_ctx,
+					context,
+					dc);
+
+		ASSERT(status == DC_OK);
+
+#ifdef CONFIG_DRM_AMD_DC_FP
+		if (hws->funcs.resync_fifo_dccg_dio)
+			hws->funcs.resync_fifo_dccg_dio(hws, dc, context);
+#endif
 	}
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h
index cecf7f0f5671..069e20bc87c0 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h
@@ -111,6 +111,8 @@ void dcn32_update_dsc_pg(struct dc *dc,
 
 void dcn32_enable_phantom_streams(struct dc *dc, struct dc_state *context);
 
+void dcn32_disable_phantom_streams(struct dc *dc, struct dc_state *context);
+
 void dcn32_init_blank(
 		struct dc *dc,
 		struct timing_generator *tg);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c
index 427cfc8c24a4..e8ac94a005b8 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c
@@ -109,6 +109,7 @@ static const struct hw_sequencer_funcs dcn32_funcs = {
 	.get_dcc_en_bits = dcn10_get_dcc_en_bits,
 	.commit_subvp_config = dcn32_commit_subvp_config,
 	.enable_phantom_streams = dcn32_enable_phantom_streams,
+	.disable_phantom_streams = dcn32_disable_phantom_streams,
 	.subvp_pipe_control_lock = dcn32_subvp_pipe_control_lock,
 	.update_visual_confirm_color = dcn10_update_visual_confirm_color,
 	.subvp_pipe_control_lock_fast = dcn32_subvp_pipe_control_lock_fast,
@@ -159,6 +160,8 @@ static const struct hwseq_private_funcs dcn32_private_funcs = {
 	.set_pixels_per_cycle = dcn32_set_pixels_per_cycle,
 	.resync_fifo_dccg_dio = dcn32_resync_fifo_dccg_dio,
 	.is_dp_dig_pixel_rate_div_policy = dcn32_is_dp_dig_pixel_rate_div_policy,
+	.apply_single_controller_ctx_to_hw = dce110_apply_single_controller_ctx_to_hw,
+	.reset_back_end_for_pipe = dcn20_reset_back_end_for_pipe,
 };
 
 void dcn32_hw_sequencer_init_functions(struct dc *dc)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.h
index 89a591eb2c23..89a591eb2c23 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
index 5a8258287438..8b6c49622f3b 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
@@ -56,6 +56,7 @@
 #include "dcn30/dcn30_cm_common.h"
 #include "dcn31/dcn31_hwseq.h"
 #include "dcn20/dcn20_hwseq.h"
+#include "dc_state_priv.h"
 
 #define DC_LOGGER_INIT(logger) \
 	struct dal_logger *dc_logger = logger
@@ -133,6 +134,7 @@ void dcn35_init_hw(struct dc *dc)
 	struct dc_bios *dcb = dc->ctx->dc_bios;
 	struct resource_pool *res_pool = dc->res_pool;
 	uint32_t backlight = MAX_BACKLIGHT_LEVEL;
+	uint32_t user_level = MAX_BACKLIGHT_LEVEL;
 	int i;
 
 	if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks)
@@ -145,17 +147,36 @@ void dcn35_init_hw(struct dc *dc)
 		hws->funcs.bios_golden_init(dc);
 	}
 
-	REG_WRITE(DCCG_GATE_DISABLE_CNTL, 0);
-	REG_WRITE(DCCG_GATE_DISABLE_CNTL2,  0);
-
-	/* Disable gating for PHYASYMCLK. This will be enabled in dccg if needed */
-	REG_UPDATE_5(DCCG_GATE_DISABLE_CNTL2, PHYASYMCLK_ROOT_GATE_DISABLE, 1,
-			PHYBSYMCLK_ROOT_GATE_DISABLE, 1,
-			PHYCSYMCLK_ROOT_GATE_DISABLE, 1,
-			PHYDSYMCLK_ROOT_GATE_DISABLE, 1,
-			PHYESYMCLK_ROOT_GATE_DISABLE, 1);
+	if (!dc->debug.disable_clock_gate) {
+		REG_WRITE(DCCG_GATE_DISABLE_CNTL, 0);
+		REG_WRITE(DCCG_GATE_DISABLE_CNTL2,  0);
+
+		/* Disable gating for PHYASYMCLK. This will be enabled in dccg if needed */
+		REG_UPDATE_5(DCCG_GATE_DISABLE_CNTL2, PHYASYMCLK_ROOT_GATE_DISABLE, 1,
+				PHYBSYMCLK_ROOT_GATE_DISABLE, 1,
+				PHYCSYMCLK_ROOT_GATE_DISABLE, 1,
+				PHYDSYMCLK_ROOT_GATE_DISABLE, 1,
+				PHYESYMCLK_ROOT_GATE_DISABLE, 1);
+
+		REG_UPDATE_4(DCCG_GATE_DISABLE_CNTL4,
+				DPIASYMCLK0_GATE_DISABLE, 0,
+				DPIASYMCLK1_GATE_DISABLE, 0,
+				DPIASYMCLK2_GATE_DISABLE, 0,
+				DPIASYMCLK3_GATE_DISABLE, 0);
+
+		REG_WRITE(DCCG_GATE_DISABLE_CNTL5, 0xFFFFFFFF);
+		REG_UPDATE_4(DCCG_GATE_DISABLE_CNTL5,
+				DTBCLK_P0_GATE_DISABLE, 0,
+				DTBCLK_P1_GATE_DISABLE, 0,
+				DTBCLK_P2_GATE_DISABLE, 0,
+				DTBCLK_P3_GATE_DISABLE, 0);
+		REG_UPDATE_4(DCCG_GATE_DISABLE_CNTL5,
+				DPSTREAMCLK0_GATE_DISABLE, 0,
+				DPSTREAMCLK1_GATE_DISABLE, 0,
+				DPSTREAMCLK2_GATE_DISABLE, 0,
+				DPSTREAMCLK3_GATE_DISABLE, 0);
 
-	REG_WRITE(DCCG_GATE_DISABLE_CNTL5, 0x1f7c3fcf);
+	}
 
 	// Initialize the dccg
 	if (res_pool->dccg->funcs->dccg_init)
@@ -260,13 +281,15 @@ void dcn35_init_hw(struct dc *dc)
 	for (i = 0; i < dc->link_count; i++) {
 		struct dc_link *link = dc->links[i];
 
-		if (link->panel_cntl)
+		if (link->panel_cntl) {
 			backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl);
+			user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL;
+		}
 	}
 	if (dc->ctx->dmub_srv) {
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		if (abms[i] != NULL && abms[i]->funcs != NULL)
-			abms[i]->funcs->abm_init(abms[i], backlight);
+			abms[i]->funcs->abm_init(abms[i], backlight, user_level);
 		}
 	}
 
@@ -332,9 +355,6 @@ void dcn35_init_hw(struct dc *dc)
 	if (dc->res_pool->pg_cntl) {
 		if (dc->res_pool->pg_cntl->funcs->init_pg_status)
 			dc->res_pool->pg_cntl->funcs->init_pg_status(dc->res_pool->pg_cntl);
-
-		if (dc->res_pool->pg_cntl->funcs->set_force_poweron_domain22)
-			dc->res_pool->pg_cntl->funcs->set_force_poweron_domain22(dc->res_pool->pg_cntl, false);
 	}
 }
 
@@ -660,7 +680,7 @@ void dcn35_power_down_on_boot(struct dc *dc)
 bool dcn35_apply_idle_power_optimizations(struct dc *dc, bool enable)
 {
 	struct dc_link *edp_links[MAX_NUM_EDP];
-	int edp_num;
+	int i, edp_num;
 	if (dc->debug.dmcub_emulation)
 		return true;
 
@@ -668,14 +688,17 @@ bool dcn35_apply_idle_power_optimizations(struct dc *dc, bool enable)
 		dc_get_edp_links(dc, edp_links, &edp_num);
 		if (edp_num == 0 || edp_num > 1)
 			return false;
+
+		for (i = 0; i < dc->current_state->stream_count; ++i) {
+			struct dc_stream_state *stream = dc->current_state->streams[i];
+
+			if (!stream->dpms_off && !dc_is_embedded_signal(stream->signal))
+				return false;
+		}
 	}
 
 	// TODO: review other cases when idle optimization is allowed
-
-	if (!enable)
-		dc_dmub_srv_exit_low_power_state(dc);
-	else
-		dc_dmub_srv_notify_idle(dc, enable);
+	dc_dmub_srv_apply_idle_power_optimizations(dc, enable);
 
 	return true;
 }
@@ -685,7 +708,7 @@ void dcn35_z10_restore(const struct dc *dc)
 	if (dc->debug.disable_z10)
 		return;
 
-	dc_dmub_srv_exit_low_power_state(dc);
+	dc_dmub_srv_apply_idle_power_optimizations(dc, false);
 
 	dcn31_z10_restore(dc);
 }
@@ -801,12 +824,12 @@ void dcn35_init_pipes(struct dc *dc, struct dc_state *context)
 		dc->res_pool->opps[i]->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true;
 		pipe_ctx->stream_res.opp = dc->res_pool->opps[i];
 
-		hws->funcs.plane_atomic_disconnect(dc, pipe_ctx);
+		hws->funcs.plane_atomic_disconnect(dc, context, pipe_ctx);
 
 		if (tg->funcs->is_tg_enabled(tg))
 			tg->funcs->unlock(tg);
 
-		dc->hwss.disable_plane(dc, pipe_ctx);
+		dc->hwss.disable_plane(dc, context, pipe_ctx);
 
 		pipe_ctx->stream_res.tg = NULL;
 		pipe_ctx->plane_res.hubp = NULL;
@@ -933,10 +956,10 @@ void dcn35_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx)
 	pipe_ctx->plane_state = NULL;
 }
 
-void dcn35_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx)
+void dcn35_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx)
 {
 	struct dce_hwseq *hws = dc->hwseq;
-	bool is_phantom = pipe_ctx->plane_state && pipe_ctx->plane_state->is_phantom;
+	bool is_phantom = dc_state_get_pipe_subvp_type(state, pipe_ctx) == SUBVP_PHANTOM;
 	struct timing_generator *tg = is_phantom ? pipe_ctx->stream_res.tg : NULL;
 
 	DC_LOGGER_INIT(dc->ctx->logger);
@@ -963,6 +986,8 @@ void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context,
 	bool hpo_frl_stream_enc_acquired = false;
 	bool hpo_dp_stream_enc_acquired = false;
 	int i = 0, j = 0;
+	int edp_num = 0;
+	struct dc_link *edp_links[MAX_NUM_EDP] = { NULL };
 
 	memset(update_state, 0, sizeof(struct pg_block_update));
 
@@ -1003,10 +1028,24 @@ void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context,
 
 		if (pipe_ctx->stream_res.opp)
 			update_state->pg_pipe_res_update[PG_OPP][pipe_ctx->stream_res.opp->inst] = false;
+	}
+	/*domain24 controls all the otg, mpc, opp, as long as one otg is still up, avoid enabling OTG PG*/
+	for (i = 0; i < dc->res_pool->timing_generator_count; i++) {
+		struct timing_generator *tg = dc->res_pool->timing_generators[i];
+		if (tg && tg->funcs->is_tg_enabled(tg)) {
+			update_state->pg_pipe_res_update[PG_OPTC][i] = false;
+			break;
+		}
+	}
 
-		if (pipe_ctx->stream_res.tg)
-			update_state->pg_pipe_res_update[PG_OPTC][pipe_ctx->stream_res.tg->inst] = false;
+	dc_get_edp_links(dc, edp_links, &edp_num);
+	if (edp_num == 0 ||
+		((!edp_links[0] || !edp_links[0]->edp_sink_present) &&
+			(!edp_links[1] || !edp_links[1]->edp_sink_present))) {
+		/*eDP not exist on this config, keep Domain24 power on, for S0i3, this will be handled in dmubfw*/
+		update_state->pg_pipe_res_update[PG_OPTC][0] = false;
 	}
+
 }
 
 void dcn35_calc_blocks_to_ungate(struct dc *dc, struct dc_state *context,
@@ -1092,8 +1131,29 @@ void dcn35_calc_blocks_to_ungate(struct dc *dc, struct dc_state *context,
 
 }
 
-void dcn35_block_power_control(struct dc *dc,
-	struct pg_block_update *update_state, bool power_on)
+/**
+ * dcn35_hw_block_power_down() - power down sequence
+ *
+ * The following sequence describes the ON-OFF (ONO) for power down:
+ *
+ *	ONO Region 3, DCPG 25: hpo - SKIPPED
+ *	ONO Region 4, DCPG 0: dchubp0, dpp0
+ *	ONO Region 6, DCPG 1: dchubp1, dpp1
+ *	ONO Region 8, DCPG 2: dchubp2, dpp2
+ *	ONO Region 10, DCPG 3: dchubp3, dpp3
+ *	ONO Region 1, DCPG 23: dchubbub dchvm dchubbubmem - SKIPPED. PMFW will pwr dwn at IPS2 entry
+ *	ONO Region 5, DCPG 16: dsc0
+ *	ONO Region 7, DCPG 17: dsc1
+ *	ONO Region 9, DCPG 18: dsc2
+ *	ONO Region 11, DCPG 19: dsc3
+ *	ONO Region 2, DCPG 24: mpc opp optc dwb
+ *	ONO Region 0, DCPG 22: dccg dio dcio - SKIPPED. will be pwr dwn after lono timer is armed
+ *
+ * @dc: Current DC state
+ * @update_state: update PG sequence states for HW block
+ */
+void dcn35_hw_block_power_down(struct dc *dc,
+	struct pg_block_update *update_state)
 {
 	int i = 0;
 	struct pg_cntl *pg_cntl = dc->res_pool->pg_cntl;
@@ -1102,64 +1162,106 @@ void dcn35_block_power_control(struct dc *dc,
 		return;
 	if (dc->debug.ignore_pg)
 		return;
+
 	if (update_state->pg_res_update[PG_HPO]) {
 		if (pg_cntl->funcs->hpo_pg_control)
-			pg_cntl->funcs->hpo_pg_control(pg_cntl, power_on);
+			pg_cntl->funcs->hpo_pg_control(pg_cntl, false);
 	}
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		if (update_state->pg_pipe_res_update[PG_HUBP][i] &&
 			update_state->pg_pipe_res_update[PG_DPP][i]) {
 			if (pg_cntl->funcs->hubp_dpp_pg_control)
-				pg_cntl->funcs->hubp_dpp_pg_control(pg_cntl, i, power_on);
+				pg_cntl->funcs->hubp_dpp_pg_control(pg_cntl, i, false);
 		}
-
+	}
+	for (i = 0; i < dc->res_pool->res_cap->num_dsc; i++)
 		if (update_state->pg_pipe_res_update[PG_DSC][i]) {
 			if (pg_cntl->funcs->dsc_pg_control)
-				pg_cntl->funcs->dsc_pg_control(pg_cntl, i, power_on);
+				pg_cntl->funcs->dsc_pg_control(pg_cntl, i, false);
 		}
 
-		if (update_state->pg_pipe_res_update[PG_MPCC][i]) {
-			if (pg_cntl->funcs->mpcc_pg_control)
-				pg_cntl->funcs->mpcc_pg_control(pg_cntl, i, power_on);
-		}
 
-		if (update_state->pg_pipe_res_update[PG_OPP][i]) {
-			if (pg_cntl->funcs->opp_pg_control)
-				pg_cntl->funcs->opp_pg_control(pg_cntl, i, power_on);
-		}
-
-		if (update_state->pg_pipe_res_update[PG_OPTC][i]) {
-			if (pg_cntl->funcs->optc_pg_control)
-				pg_cntl->funcs->optc_pg_control(pg_cntl, i, power_on);
-		}
-	}
+	/*this will need all the clients to unregister optc interruts let dmubfw handle this*/
+	if (pg_cntl->funcs->plane_otg_pg_control)
+		pg_cntl->funcs->plane_otg_pg_control(pg_cntl, false);
 
-	if (update_state->pg_res_update[PG_DWB]) {
-		if (pg_cntl->funcs->dwb_pg_control)
-			pg_cntl->funcs->dwb_pg_control(pg_cntl, power_on);
-	}
+	//domain22, 23, 25 currently always on.
 
-	if (pg_cntl->funcs->plane_otg_pg_control)
-		pg_cntl->funcs->plane_otg_pg_control(pg_cntl, power_on);
 }
 
-void dcn35_root_clock_control(struct dc *dc,
-	struct pg_block_update *update_state, bool power_on)
+/**
+ * dcn35_hw_block_power_up() - power up sequence
+ *
+ * The following sequence describes the ON-OFF (ONO) for power up:
+ *
+ *	ONO Region 0, DCPG 22: dccg dio dcio - SKIPPED
+ *	ONO Region 2, DCPG 24: mpc opp optc dwb
+ *	ONO Region 5, DCPG 16: dsc0
+ *	ONO Region 7, DCPG 17: dsc1
+ *	ONO Region 9, DCPG 18: dsc2
+ *	ONO Region 11, DCPG 19: dsc3
+ *	ONO Region 1, DCPG 23: dchubbub dchvm dchubbubmem - SKIPPED. PMFW will power up at IPS2 exit
+ *	ONO Region 4, DCPG 0: dchubp0, dpp0
+ *	ONO Region 6, DCPG 1: dchubp1, dpp1
+ *	ONO Region 8, DCPG 2: dchubp2, dpp2
+ *	ONO Region 10, DCPG 3: dchubp3, dpp3
+ *	ONO Region 3, DCPG 25: hpo - SKIPPED
+ *
+ * @dc: Current DC state
+ * @update_state: update PG sequence states for HW block
+ */
+void dcn35_hw_block_power_up(struct dc *dc,
+	struct pg_block_update *update_state)
 {
 	int i = 0;
 	struct pg_cntl *pg_cntl = dc->res_pool->pg_cntl;
 
 	if (!pg_cntl)
 		return;
+	if (dc->debug.ignore_pg)
+		return;
+	//domain22, 23, 25 currently always on.
+	/*this will need all the clients to unregister optc interruts let dmubfw handle this*/
+	if (pg_cntl->funcs->plane_otg_pg_control)
+		pg_cntl->funcs->plane_otg_pg_control(pg_cntl, true);
+
+	for (i = 0; i < dc->res_pool->res_cap->num_dsc; i++)
+		if (update_state->pg_pipe_res_update[PG_DSC][i]) {
+			if (pg_cntl->funcs->dsc_pg_control)
+				pg_cntl->funcs->dsc_pg_control(pg_cntl, i, true);
+		}
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		if (update_state->pg_pipe_res_update[PG_HUBP][i] &&
 			update_state->pg_pipe_res_update[PG_DPP][i]) {
-			if (dc->hwseq->funcs.dpp_root_clock_control)
-				dc->hwseq->funcs.dpp_root_clock_control(dc->hwseq, i, power_on);
+			if (pg_cntl->funcs->hubp_dpp_pg_control)
+				pg_cntl->funcs->hubp_dpp_pg_control(pg_cntl, i, true);
 		}
+	}
+	if (update_state->pg_res_update[PG_HPO]) {
+		if (pg_cntl->funcs->hpo_pg_control)
+			pg_cntl->funcs->hpo_pg_control(pg_cntl, true);
+	}
+}
+void dcn35_root_clock_control(struct dc *dc,
+	struct pg_block_update *update_state, bool power_on)
+{
+	int i = 0;
+	struct pg_cntl *pg_cntl = dc->res_pool->pg_cntl;
 
+	if (!pg_cntl)
+		return;
+	/*enable root clock first when power up*/
+	if (power_on)
+		for (i = 0; i < dc->res_pool->pipe_count; i++) {
+			if (update_state->pg_pipe_res_update[PG_HUBP][i] &&
+				update_state->pg_pipe_res_update[PG_DPP][i]) {
+				if (dc->hwseq->funcs.dpp_root_clock_control)
+					dc->hwseq->funcs.dpp_root_clock_control(dc->hwseq, i, power_on);
+			}
+		}
+	for (i = 0; i < dc->res_pool->res_cap->num_dsc; i++) {
 		if (update_state->pg_pipe_res_update[PG_DSC][i]) {
 			if (power_on) {
 				if (dc->res_pool->dccg->funcs->enable_dsc)
@@ -1170,6 +1272,15 @@ void dcn35_root_clock_control(struct dc *dc,
 			}
 		}
 	}
+	/*disable root clock first when power down*/
+	if (!power_on)
+		for (i = 0; i < dc->res_pool->pipe_count; i++) {
+			if (update_state->pg_pipe_res_update[PG_HUBP][i] &&
+				update_state->pg_pipe_res_update[PG_DPP][i]) {
+				if (dc->hwseq->funcs.dpp_root_clock_control)
+					dc->hwseq->funcs.dpp_root_clock_control(dc->hwseq, i, power_on);
+			}
+		}
 }
 
 void dcn35_prepare_bandwidth(
@@ -1183,9 +1294,9 @@ void dcn35_prepare_bandwidth(
 
 		if (dc->hwss.root_clock_control)
 			dc->hwss.root_clock_control(dc, &pg_update_state, true);
-
-		if (dc->hwss.block_power_control)
-			dc->hwss.block_power_control(dc, &pg_update_state, true);
+		/*power up required HW block*/
+		if (dc->hwss.hw_block_power_up)
+			dc->hwss.hw_block_power_up(dc, &pg_update_state);
 	}
 
 	dcn20_prepare_bandwidth(dc, context);
@@ -1201,9 +1312,9 @@ void dcn35_optimize_bandwidth(
 
 	if (dc->hwss.calc_blocks_to_gate) {
 		dc->hwss.calc_blocks_to_gate(dc, context, &pg_update_state);
-
-		if (dc->hwss.block_power_control)
-			dc->hwss.block_power_control(dc, &pg_update_state, false);
+		/*try to power down unused block*/
+		if (dc->hwss.hw_block_power_down)
+			dc->hwss.hw_block_power_down(dc, &pg_update_state);
 
 		if (dc->hwss.root_clock_control)
 			dc->hwss.root_clock_control(dc, &pg_update_state, false);
@@ -1225,3 +1336,44 @@ uint32_t dcn35_get_idle_state(const struct dc *dc)
 
 	return 0;
 }
+
+void dcn35_set_drr(struct pipe_ctx **pipe_ctx,
+		int num_pipes, struct dc_crtc_timing_adjust adjust)
+{
+	int i = 0;
+	struct drr_params params = {0};
+	// DRR set trigger event mapped to OTG_TRIG_A (bit 11) for manual control flow
+	unsigned int event_triggers = 0x800;
+	// Note DRR trigger events are generated regardless of whether num frames met.
+	unsigned int num_frames = 2;
+
+	params.vertical_total_max = adjust.v_total_max;
+	params.vertical_total_min = adjust.v_total_min;
+	params.vertical_total_mid = adjust.v_total_mid;
+	params.vertical_total_mid_frame_num = adjust.v_total_mid_frame_num;
+
+	for (i = 0; i < num_pipes; i++) {
+		if ((pipe_ctx[i]->stream_res.tg != NULL) && pipe_ctx[i]->stream_res.tg->funcs) {
+			struct dc_crtc_timing *timing = &pipe_ctx[i]->stream->timing;
+			struct dc *dc = pipe_ctx[i]->stream->ctx->dc;
+
+			if (dc->debug.static_screen_wait_frames) {
+				unsigned int frame_rate = timing->pix_clk_100hz / (timing->h_total * timing->v_total);
+
+				if (frame_rate >= 120 && dc->caps.ips_support &&
+					dc->config.disable_ips != DMUB_IPS_DISABLE_ALL) {
+					/*ips enable case*/
+					num_frames = 2 * (frame_rate % 60);
+				}
+			}
+			if (pipe_ctx[i]->stream_res.tg->funcs->set_drr)
+				pipe_ctx[i]->stream_res.tg->funcs->set_drr(
+					pipe_ctx[i]->stream_res.tg, &params);
+			if (adjust.v_total_max != 0 && adjust.v_total_min != 0)
+				if (pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control)
+					pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control(
+						pipe_ctx[i]->stream_res.tg,
+						event_triggers, num_frames);
+		}
+	}
+}
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h
index 0dff10d179b8..fd66316e33de 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h
@@ -57,14 +57,16 @@ void dcn35_init_pipes(struct dc *dc, struct dc_state *context);
 void dcn35_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx);
 void dcn35_enable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx,
 			       struct dc_state *context);
-void dcn35_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx);
+void dcn35_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx);
 
 void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context,
 	struct pg_block_update *update_state);
 void dcn35_calc_blocks_to_ungate(struct dc *dc, struct dc_state *context,
 	struct pg_block_update *update_state);
-void dcn35_block_power_control(struct dc *dc,
-	struct pg_block_update *update_state, bool power_on);
+void dcn35_hw_block_power_up(struct dc *dc,
+	struct pg_block_update *update_state);
+void dcn35_hw_block_power_down(struct dc *dc,
+	struct pg_block_update *update_state);
 void dcn35_root_clock_control(struct dc *dc,
 	struct pg_block_update *update_state, bool power_on);
 
@@ -84,4 +86,8 @@ void dcn35_dsc_pg_control(
 
 void dcn35_set_idle_state(const struct dc *dc, bool allow_idle);
 uint32_t dcn35_get_idle_state(const struct dc *dc);
+
+void dcn35_set_drr(struct pipe_ctx **pipe_ctx,
+		int num_pipes, struct dc_crtc_timing_adjust adjust);
+
 #endif /* __DC_HWSS_DCN35_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c
index 296bf3a38cb9..a630aa77dcec 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c
@@ -68,7 +68,7 @@ static const struct hw_sequencer_funcs dcn35_funcs = {
 	.prepare_bandwidth = dcn35_prepare_bandwidth,
 	.optimize_bandwidth = dcn35_optimize_bandwidth,
 	.update_bandwidth = dcn20_update_bandwidth,
-	.set_drr = dcn10_set_drr,
+	.set_drr = dcn35_set_drr,
 	.get_position = dcn10_get_position,
 	.set_static_screen_control = dcn30_set_static_screen_control,
 	.setup_stereo = dcn10_setup_stereo,
@@ -118,7 +118,8 @@ static const struct hw_sequencer_funcs dcn35_funcs = {
 	.update_dsc_pg = dcn32_update_dsc_pg,
 	.calc_blocks_to_gate = dcn35_calc_blocks_to_gate,
 	.calc_blocks_to_ungate = dcn35_calc_blocks_to_ungate,
-	.block_power_control = dcn35_block_power_control,
+	.hw_block_power_up = dcn35_hw_block_power_up,
+	.hw_block_power_down = dcn35_hw_block_power_down,
 	.root_clock_control = dcn35_root_clock_control,
 	.set_idle_state = dcn35_set_idle_state,
 	.get_idle_state = dcn35_get_idle_state
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.h
index b67015032c35..b67015032c35 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/CMakeLists.txt b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/CMakeLists.txt
new file mode 100644
index 000000000000..951ca2da4486
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/CMakeLists.txt
@@ -0,0 +1,4 @@
+dal3_subdirectory_sources(
+  dcn351_init.c
+  dcn351_init.h
+)
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/Makefile b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/Makefile
new file mode 100644
index 000000000000..b24ad27fe6ef
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/Makefile
@@ -0,0 +1,17 @@
+#
+# (c) Copyright 2022 Advanced Micro Devices, Inc. All the rights reserved
+#
+#  All rights reserved.  This notice is intended as a precaution against
+#  inadvertent publication and does not imply publication or any waiver
+#  of confidentiality.  The year included in the foregoing notice is the
+#  year of creation of the work.
+#
+#  Authors: AMD
+#
+# Makefile for DCN351.
+
+DCN351 = dcn351_init.o
+
+AMD_DAL_DCN351 = $(addprefix $(AMDDALPATH)/dc/dcn351/,$(DCN351))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DCN351)
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c
new file mode 100644
index 000000000000..143d3fc0221c
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c
@@ -0,0 +1,171 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dce110/dce110_hwseq.h"
+#include "dcn10/dcn10_hwseq.h"
+#include "dcn20/dcn20_hwseq.h"
+#include "dcn21/dcn21_hwseq.h"
+#include "dcn30/dcn30_hwseq.h"
+#include "dcn301/dcn301_hwseq.h"
+#include "dcn31/dcn31_hwseq.h"
+#include "dcn32/dcn32_hwseq.h"
+#include "dcn35/dcn35_hwseq.h"
+
+#include "dcn351_init.h"
+
+static const struct hw_sequencer_funcs dcn351_funcs = {
+	.program_gamut_remap = dcn30_program_gamut_remap,
+	.init_hw = dcn35_init_hw,
+	.power_down_on_boot = dcn35_power_down_on_boot,
+	.apply_ctx_to_hw = dce110_apply_ctx_to_hw,
+	.apply_ctx_for_surface = NULL,
+	.program_front_end_for_ctx = dcn20_program_front_end_for_ctx,
+	.wait_for_pending_cleared = dcn10_wait_for_pending_cleared,
+	.post_unlock_program_front_end = dcn20_post_unlock_program_front_end,
+	.update_plane_addr = dcn20_update_plane_addr,
+	.update_dchub = dcn10_update_dchub,
+	.update_pending_status = dcn10_update_pending_status,
+	.program_output_csc = dcn20_program_output_csc,
+	.enable_accelerated_mode = dce110_enable_accelerated_mode,
+	.enable_timing_synchronization = dcn10_enable_timing_synchronization,
+	.enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset,
+	.update_info_frame = dcn31_update_info_frame,
+	.send_immediate_sdp_message = dcn10_send_immediate_sdp_message,
+	.enable_stream = dcn20_enable_stream,
+	.disable_stream = dce110_disable_stream,
+	.unblank_stream = dcn32_unblank_stream,
+	.blank_stream = dce110_blank_stream,
+	.enable_audio_stream = dce110_enable_audio_stream,
+	.disable_audio_stream = dce110_disable_audio_stream,
+	.disable_plane = dcn35_disable_plane,
+	.disable_pixel_data = dcn20_disable_pixel_data,
+	.pipe_control_lock = dcn20_pipe_control_lock,
+	.interdependent_update_lock = dcn10_lock_all_pipes,
+	.cursor_lock = dcn10_cursor_lock,
+	.prepare_bandwidth = dcn35_prepare_bandwidth,
+	.optimize_bandwidth = dcn35_optimize_bandwidth,
+	.update_bandwidth = dcn20_update_bandwidth,
+	.set_drr = dcn10_set_drr,
+	.get_position = dcn10_get_position,
+	.set_static_screen_control = dcn30_set_static_screen_control,
+	.setup_stereo = dcn10_setup_stereo,
+	.set_avmute = dcn30_set_avmute,
+	.log_hw_state = dcn10_log_hw_state,
+	.get_hw_state = dcn10_get_hw_state,
+	.clear_status_bits = dcn10_clear_status_bits,
+	.wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect,
+	.edp_backlight_control = dce110_edp_backlight_control,
+	.edp_power_control = dce110_edp_power_control,
+	.edp_wait_for_T12 = dce110_edp_wait_for_T12,
+	.edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready,
+	.set_cursor_position = dcn10_set_cursor_position,
+	.set_cursor_attribute = dcn10_set_cursor_attribute,
+	.set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level,
+	.setup_periodic_interrupt = dcn10_setup_periodic_interrupt,
+	.set_clock = dcn10_set_clock,
+	.get_clock = dcn10_get_clock,
+	.program_triplebuffer = dcn20_program_triple_buffer,
+	.enable_writeback = dcn30_enable_writeback,
+	.disable_writeback = dcn30_disable_writeback,
+	.update_writeback = dcn30_update_writeback,
+	.mmhubbub_warmup = dcn30_mmhubbub_warmup,
+	.dmdata_status_done = dcn20_dmdata_status_done,
+	.program_dmdata_engine = dcn30_program_dmdata_engine,
+	.set_dmdata_attributes = dcn20_set_dmdata_attributes,
+	.init_sys_ctx = dcn31_init_sys_ctx,
+	.init_vm_ctx = dcn20_init_vm_ctx,
+	.set_flip_control_gsl = dcn20_set_flip_control_gsl,
+	.get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync,
+	.calc_vupdate_position = dcn10_calc_vupdate_position,
+	.power_down = dce110_power_down,
+	.set_backlight_level = dcn21_set_backlight_level,
+	.set_abm_immediate_disable = dcn21_set_abm_immediate_disable,
+	.set_pipe = dcn21_set_pipe,
+	.enable_lvds_link_output = dce110_enable_lvds_link_output,
+	.enable_tmds_link_output = dce110_enable_tmds_link_output,
+	.enable_dp_link_output = dce110_enable_dp_link_output,
+	.disable_link_output = dcn32_disable_link_output,
+	.z10_restore = dcn35_z10_restore,
+	.z10_save_init = dcn31_z10_save_init,
+	.set_disp_pattern_generator = dcn30_set_disp_pattern_generator,
+	.optimize_pwr_state = dcn21_optimize_pwr_state,
+	.exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state,
+	.update_visual_confirm_color = dcn10_update_visual_confirm_color,
+	.apply_idle_power_optimizations = dcn35_apply_idle_power_optimizations,
+	.update_dsc_pg = dcn32_update_dsc_pg,
+	.calc_blocks_to_gate = dcn35_calc_blocks_to_gate,
+	.calc_blocks_to_ungate = dcn35_calc_blocks_to_ungate,
+	.hw_block_power_up = dcn35_hw_block_power_up,
+	.hw_block_power_down = dcn35_hw_block_power_down,
+	.root_clock_control = dcn35_root_clock_control,
+	.set_idle_state = dcn35_set_idle_state,
+	.get_idle_state = dcn35_get_idle_state
+};
+
+static const struct hwseq_private_funcs dcn351_private_funcs = {
+	.init_pipes = dcn35_init_pipes,
+	.update_plane_addr = dcn20_update_plane_addr,
+	.plane_atomic_disconnect = dcn10_plane_atomic_disconnect,
+	.update_mpcc = dcn20_update_mpcc,
+	.set_input_transfer_func = dcn32_set_input_transfer_func,
+	.set_output_transfer_func = dcn32_set_output_transfer_func,
+	.power_down = dce110_power_down,
+	.enable_display_power_gating = dcn10_dummy_display_power_gating,
+	.blank_pixel_data = dcn20_blank_pixel_data,
+	.reset_hw_ctx_wrap = dcn31_reset_hw_ctx_wrap,
+	.enable_stream_timing = dcn20_enable_stream_timing,
+	.edp_backlight_control = dce110_edp_backlight_control,
+	.setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt,
+	.did_underflow_occur = dcn10_did_underflow_occur,
+	.init_blank = dcn20_init_blank,
+	.disable_vga = NULL,
+	.bios_golden_init = dcn10_bios_golden_init,
+	.plane_atomic_disable = dcn35_plane_atomic_disable,
+	//.plane_atomic_disable = dcn20_plane_atomic_disable,/*todo*/
+	//.hubp_pg_control = dcn35_hubp_pg_control,
+	.enable_power_gating_plane = dcn35_enable_power_gating_plane,
+	.dpp_root_clock_control = dcn35_dpp_root_clock_control,
+	.program_all_writeback_pipes_in_tree = dcn30_program_all_writeback_pipes_in_tree,
+	.update_odm = dcn35_update_odm,
+	.set_hdr_multiplier = dcn10_set_hdr_multiplier,
+	.verify_allow_pstate_change_high = dcn10_verify_allow_pstate_change_high,
+	.wait_for_blank_complete = dcn20_wait_for_blank_complete,
+	.dccg_init = dcn20_dccg_init,
+	.set_mcm_luts = dcn32_set_mcm_luts,
+	.setup_hpo_hw_control = dcn35_setup_hpo_hw_control,
+	.calculate_dccg_k1_k2_values = dcn32_calculate_dccg_k1_k2_values,
+	.set_pixels_per_cycle = dcn32_set_pixels_per_cycle,
+	.is_dp_dig_pixel_rate_div_policy = dcn32_is_dp_dig_pixel_rate_div_policy,
+	.dsc_pg_control = dcn35_dsc_pg_control,
+	.dsc_pg_status = dcn32_dsc_pg_status,
+	.enable_plane = dcn35_enable_plane,
+};
+
+void dcn351_hw_sequencer_construct(struct dc *dc)
+{
+	dc->hwss = dcn351_funcs;
+	dc->hwseq->funcs = dcn351_private_funcs;
+
+}
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.h
new file mode 100644
index 000000000000..970b01008b23
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DC_DCN351_INIT_H__
+#define __DC_DCN351_INIT_H__
+
+struct dc;
+
+void dcn351_hw_sequencer_construct(struct dc *dc);
+
+#endif /* __DC_DCN351_INIT_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h
index 452680fe9aab..64ca7c66509b 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h
@@ -50,7 +50,7 @@ struct pg_block_update;
 struct subvp_pipe_control_lock_fast_params {
 	struct dc *dc;
 	bool lock;
-	struct pipe_ctx *pipe_ctx;
+	bool subvp_immediate_flip;
 };
 
 struct pipe_control_lock_params {
@@ -200,7 +200,7 @@ struct hw_sequencer_funcs {
 			struct dc_state *context);
 	enum dc_status (*apply_ctx_to_hw)(struct dc *dc,
 			struct dc_state *context);
-	void (*disable_plane)(struct dc *dc, struct pipe_ctx *pipe_ctx);
+	void (*disable_plane)(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx);
 	void (*disable_pixel_data)(struct dc *dc, struct pipe_ctx *pipe_ctx, bool blank);
 	void (*apply_ctx_for_surface)(struct dc *dc,
 			const struct dc_stream_state *stream,
@@ -248,6 +248,7 @@ struct hw_sequencer_funcs {
 	void (*enable_per_frame_crtc_position_reset)(struct dc *dc,
 			int group_size, struct pipe_ctx *grouped_pipes[]);
 	void (*enable_timing_synchronization)(struct dc *dc,
+			struct dc_state *state,
 			int group_index, int group_size,
 			struct pipe_ctx *grouped_pipes[]);
 	void (*enable_vblanks_synchronization)(struct dc *dc,
@@ -378,6 +379,7 @@ struct hw_sequencer_funcs {
 			struct dc_cursor_attributes *cursor_attr);
 	void (*commit_subvp_config)(struct dc *dc, struct dc_state *context);
 	void (*enable_phantom_streams)(struct dc *dc, struct dc_state *context);
+	void (*disable_phantom_streams)(struct dc *dc, struct dc_state *context);
 	void (*subvp_pipe_control_lock)(struct dc *dc,
 			struct dc_state *context,
 			bool lock,
@@ -414,8 +416,10 @@ struct hw_sequencer_funcs {
 		struct pg_block_update *update_state);
 	void (*calc_blocks_to_ungate)(struct dc *dc, struct dc_state *context,
 		struct pg_block_update *update_state);
-	void (*block_power_control)(struct dc *dc,
-		struct pg_block_update *update_state, bool power_on);
+	void (*hw_block_power_up)(struct dc *dc,
+		struct pg_block_update *update_state);
+	void (*hw_block_power_down)(struct dc *dc,
+		struct pg_block_update *update_state);
 	void (*root_clock_control)(struct dc *dc,
 		struct pg_block_update *update_state, bool power_on);
 	void (*set_idle_state)(const struct dc *dc, bool allow_idle);
@@ -452,17 +456,18 @@ void get_mpctree_visual_confirm_color(
 		struct tg_color *color);
 
 void get_subvp_visual_confirm_color(
-	struct dc *dc,
-	struct dc_state *context,
 	struct pipe_ctx *pipe_ctx,
 	struct tg_color *color);
 
 void get_mclk_switch_visual_confirm_color(
-		struct dc *dc,
-		struct dc_state *context,
 		struct pipe_ctx *pipe_ctx,
 		struct tg_color *color);
 
+void set_p_state_switch_method(
+		struct dc *dc,
+		struct dc_state *context,
+		struct pipe_ctx *pipe_ctx);
+
 void hwss_execute_sequence(struct dc *dc,
 		struct block_sequence block_sequence[],
 		int num_steps);
@@ -472,7 +477,8 @@ void hwss_build_fast_sequence(struct dc *dc,
 		unsigned int dmub_cmd_count,
 		struct block_sequence block_sequence[],
 		int *num_steps,
-		struct pipe_ctx *pipe_ctx);
+		struct pipe_ctx *pipe_ctx,
+		struct dc_stream_status *stream_status);
 
 void hwss_send_dmcub_cmd(union block_sequence_params *params);
 
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h
index 82c592166875..b3c62a82cb1c 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h
@@ -79,6 +79,7 @@ struct hwseq_private_funcs {
 	void (*update_plane_addr)(const struct dc *dc,
 			struct pipe_ctx *pipe_ctx);
 	void (*plane_atomic_disconnect)(struct dc *dc,
+			struct dc_state *state,
 			struct pipe_ctx *pipe_ctx);
 	void (*update_mpcc)(struct dc *dc, struct pipe_ctx *pipe_ctx);
 	bool (*set_input_transfer_func)(struct dc *dc,
@@ -164,8 +165,15 @@ struct hwseq_private_funcs {
 	void (*set_pixels_per_cycle)(struct pipe_ctx *pipe_ctx);
 	void (*resync_fifo_dccg_dio)(struct dce_hwseq *hws, struct dc *dc,
 			struct dc_state *context);
+	enum dc_status (*apply_single_controller_ctx_to_hw)(
+			struct pipe_ctx *pipe_ctx,
+			struct dc_state *context,
+			struct dc *dc);
 	bool (*is_dp_dig_pixel_rate_div_policy)(struct pipe_ctx *pipe_ctx);
 #endif
+	void (*reset_back_end_for_pipe)(struct dc *dc,
+			struct pipe_ctx *pipe_ctx,
+			struct dc_state *context);
 };
 
 struct dce_hwseq {
diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
index bac1420b1de8..3a6bf77a6873 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
@@ -200,11 +200,8 @@ struct resource_funcs {
 			unsigned int pipe_cnt,
             unsigned int index);
 
-	bool (*remove_phantom_pipes)(struct dc *dc, struct dc_state *context, bool fast_update);
-	void (*retain_phantom_pipes)(struct dc *dc, struct dc_state *context);
 	void (*get_panel_config_defaults)(struct dc_panel_config *panel_config);
-	void (*save_mall_state)(struct dc *dc, struct dc_state *context, struct mall_temp_config *temp_config);
-	void (*restore_mall_state)(struct dc *dc, struct dc_state *context, struct mall_temp_config *temp_config);
+	void (*build_pipe_pix_clk_params)(struct pipe_ctx *pipe_ctx);
 };
 
 struct audio_support{
@@ -384,6 +381,16 @@ union pipe_update_flags {
 	uint32_t raw;
 };
 
+enum p_state_switch_method {
+	P_STATE_UNKNOWN						= 0,
+	P_STATE_V_BLANK						= 1,
+	P_STATE_FPO,
+	P_STATE_V_ACTIVE,
+	P_STATE_SUB_VP,
+	P_STATE_DRR_SUB_VP,
+	P_STATE_V_BLANK_SUB_VP
+};
+
 struct pipe_ctx {
 	struct dc_plane_state *plane_state;
 	struct dc_stream_state *stream;
@@ -432,6 +439,7 @@ struct pipe_ctx {
 	struct dwbc *dwbc;
 	struct mcif_wb *mcif_wb;
 	union pipe_update_flags update_flags;
+	enum p_state_switch_method p_state_type;
 	struct tg_color visual_confirm_color;
 	bool has_vactive_margin;
 	/* subvp_index: only valid if the pipe is a SUBVP_MAIN*/
@@ -461,6 +469,8 @@ struct resource_context {
 	unsigned int hpo_dp_link_enc_to_link_idx[MAX_HPO_DP2_LINK_ENCODERS];
 	int hpo_dp_link_enc_ref_cnts[MAX_HPO_DP2_LINK_ENCODERS];
 	bool is_mpc_3dlut_acquired[MAX_PIPES];
+	/* solely used for build scalar data in dml2 */
+	struct pipe_ctx temp_pipe;
 };
 
 struct dce_bw_output {
@@ -525,6 +535,14 @@ struct dc_state {
 	 * @stream_status: Planes status on a given stream
 	 */
 	struct dc_stream_status stream_status[MAX_PIPES];
+	/**
+	 * @phantom_streams: Stream state properties for phantoms
+	 */
+	struct dc_stream_state *phantom_streams[MAX_PHANTOM_PIPES];
+	/**
+	 * @phantom_planes: Planes state properties for phantoms
+	 */
+	struct dc_plane_state *phantom_planes[MAX_PHANTOM_PIPES];
 
 	/**
 	 * @stream_count: Total of streams in use
@@ -533,6 +551,14 @@ struct dc_state {
 	uint8_t stream_mask;
 
 	/**
+	 * @stream_count: Total phantom streams in use
+	 */
+	uint8_t phantom_stream_count;
+	/**
+	 * @stream_count: Total phantom planes in use
+	 */
+	uint8_t phantom_plane_count;
+	/**
 	 * @res_ctx: Persistent state of resources
 	 */
 	struct resource_context res_ctx;
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h b/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h
index 33db15d69f23..3f0161d64675 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h
@@ -36,7 +36,7 @@ struct abm {
 };
 
 struct abm_funcs {
-	void (*abm_init)(struct abm *abm, uint32_t back_light);
+	void (*abm_init)(struct abm *abm, uint32_t back_light, uint32_t user_level);
 	bool (*set_abm_level)(struct abm *abm, unsigned int abm_level);
 	bool (*set_abm_immediate_disable)(struct abm *abm, unsigned int panel_inst);
 	bool (*set_pipe)(struct abm *abm, unsigned int controller_id, unsigned int panel_inst);
@@ -64,7 +64,8 @@ struct abm_funcs {
 	bool (*set_pipe_ex)(struct abm *abm,
 			unsigned int otg_inst,
 			unsigned int option,
-			unsigned int panel_inst);
+			unsigned int panel_inst,
+			unsigned int pwrseq_inst);
 };
 
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
index fa9614bcb160..17e014d3bdc8 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
@@ -62,6 +62,25 @@ struct dcn3_clk_internal {
 	uint32_t CLK4_CLK0_CURRENT_CNT; //fclk
 };
 
+struct dcn35_clk_internal {
+	int dummy;
+	uint32_t CLK1_CLK0_CURRENT_CNT; //dispclk
+	uint32_t CLK1_CLK1_CURRENT_CNT; //dppclk
+	uint32_t CLK1_CLK2_CURRENT_CNT; //dprefclk
+	uint32_t CLK1_CLK3_CURRENT_CNT; //dcfclk
+	uint32_t CLK1_CLK4_CURRENT_CNT; //dtbclk
+	//uint32_t CLK1_CLK5_CURRENT_CNT; //dpiaclk
+	//uint32_t CLK1_CLK6_CURRENT_CNT; //srdbgclk
+	uint32_t CLK1_CLK3_DS_CNTL;	    //dcf_deep_sleep_divider
+	uint32_t CLK1_CLK3_ALLOW_DS;	//dcf_deep_sleep_allow
+
+	uint32_t CLK1_CLK0_BYPASS_CNTL; //dispclk bypass
+	uint32_t CLK1_CLK1_BYPASS_CNTL; //dppclk bypass
+	uint32_t CLK1_CLK2_BYPASS_CNTL; //dprefclk bypass
+	uint32_t CLK1_CLK3_BYPASS_CNTL; //dcfclk bypass
+	uint32_t CLK1_CLK4_BYPASS_CNTL; //dtbclk bypass
+};
+
 struct dcn301_clk_internal {
 	int dummy;
 	uint32_t CLK1_CLK0_CURRENT_CNT; //dispclk
@@ -314,6 +333,7 @@ struct clk_mgr {
 	bool force_smu_not_present;
 	bool dc_mode_softmax_enabled;
 	int dprefclk_khz; // Used by program pixel clock in clock source funcs, need to figureout where this goes
+	int dp_dto_source_clock_in_khz; // Used to program DP DTO with ss adjustment on DCN314
 	int dentist_vco_freq_khz;
 	struct clk_state_registers_and_bypass boot_snapshot;
 	struct clk_bw_params *bw_params;
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h
index ce2f0c0e82bd..b9a06bf84cc9 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h
@@ -59,8 +59,8 @@ enum dentist_dispclk_change_mode {
 struct dp_dto_params {
 	int otg_inst;
 	enum signal_type signal;
-	long long pixclk_hz;
-	long long refclk_hz;
+	uint64_t pixclk_hz;
+	uint64_t refclk_hz;
 };
 
 enum pixel_rate_div {
@@ -201,6 +201,10 @@ struct dccg_funcs {
 			struct dccg *dccg,
 			enum streamclk_source src,
 			uint32_t otg_inst);
+	void (*set_dto_dscclk)(
+			struct dccg *dccg,
+			uint32_t dsc_inst);
+	void (*set_ref_dscclk)(struct dccg *dccg, uint32_t dsc_inst);
 };
 
 #endif //__DAL_DCCG_H__
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h
index 86b711dcc785..729ca0064e94 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h
@@ -188,6 +188,10 @@ struct dwbc_funcs {
 	bool (*is_enabled)(
 		struct dwbc *dwbc);
 
+	void (*set_fc_enable)(
+		struct dwbc *dwbc,
+		enum dwb_frame_capture_enable enable);
+
 	void (*set_stereo)(
 		struct dwbc *dwbc,
 		struct dwb_stereo_params *stereo_params);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h
index b95ae9596c3b..dcae23faeee3 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h
@@ -43,6 +43,7 @@
  * to be used inside loops and for determining array sizes.
  */
 #define MAX_PIPES 6
+#define MAX_PHANTOM_PIPES (MAX_PIPES / 2)
 #define MAX_DIG_LINK_ENCODERS 7
 #define MAX_DWB_PIPES	1
 #define MAX_HPO_DP2_ENCODERS	4
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h b/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h
index 24af9d80b937..e97d964a1791 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h
@@ -40,6 +40,7 @@ struct panel_cntl_backlight_registers {
 	unsigned int BL_PWM_PERIOD_CNTL;
 	unsigned int LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV;
 	unsigned int PANEL_PWRSEQ_REF_DIV2;
+	unsigned int USER_LEVEL;
 };
 
 struct panel_cntl_funcs {
@@ -56,12 +57,14 @@ struct panel_cntl_funcs {
 struct panel_cntl_init_data {
 	struct dc_context *ctx;
 	uint32_t inst;
+	uint32_t eng_id;
 };
 
 struct panel_cntl {
 	const struct panel_cntl_funcs *funcs;
 	struct dc_context *ctx;
 	uint32_t inst;
+	uint32_t pwrseq_inst;
 	/* registers setting needs to be saved and restored at InitBacklight */
 	struct panel_cntl_backlight_registers stored_backlight_registers;
 };
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h b/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h
index b9812afb886b..00ea3864dd4d 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h
@@ -47,8 +47,6 @@ struct pg_cntl_funcs {
 	void (*optc_pg_control)(struct pg_cntl *pg_cntl, unsigned int optc_inst, bool power_on);
 	void (*dwb_pg_control)(struct pg_cntl *pg_cntl, bool power_on);
 	void (*init_pg_status)(struct pg_cntl *pg_cntl);
-
-	void (*set_force_poweron_domain22)(struct pg_cntl *pg_cntl, bool power_on);
 };
 
 #endif //__DC_PG_CNTL_H__
diff --git a/drivers/gpu/drm/amd/display/dc/inc/link.h b/drivers/gpu/drm/amd/display/dc/inc/link.h
index d7685368140a..26fe81f213da 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/link.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/link.h
@@ -281,11 +281,16 @@ struct link_service {
 			const unsigned int *power_opts);
 	bool (*edp_setup_replay)(struct dc_link *link,
 			const struct dc_stream_state *stream);
+	bool (*edp_send_replay_cmd)(struct dc_link *link,
+			enum replay_FW_Message_type msg,
+			union dmub_replay_cmd_set *cmd_data);
 	bool (*edp_set_coasting_vtotal)(
 			struct dc_link *link, uint16_t coasting_vtotal);
 	bool (*edp_replay_residency)(const struct dc_link *link,
 			unsigned int *residency, const bool is_start,
 			const bool is_alpm);
+	bool (*edp_set_replay_power_opt_and_coasting_vtotal)(struct dc_link *link,
+			const unsigned int *power_opts, uint16_t coasting_vtotal);
 
 	bool (*edp_wait_for_t12)(struct dc_link *link);
 	bool (*edp_is_ilr_optimization_required)(struct dc_link *link,
diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h
index 06ca8bfb91e7..77a60aa9f27b 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/resource.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h
@@ -427,22 +427,18 @@ struct pipe_ctx *resource_get_primary_dpp_pipe(const struct pipe_ctx *dpp_pipe);
 int resource_get_mpc_slice_index(const struct pipe_ctx *dpp_pipe);
 
 /*
- * Get number of MPC "cuts" of the plane associated with the pipe. MPC slice
- * count is equal to MPC splits + 1. For example if a plane is cut 3 times, it
- * will have 4 pieces of slice.
- * return - 0 if pipe is not used for a plane with MPCC combine. otherwise
- * the number of MPC "cuts" for the plane.
+ * Get the number of MPC slices associated with the pipe.
+ * The function returns 0 if the pipe is not associated with an MPC combine
+ * pipe topology.
  */
-int resource_get_mpc_slice_count(const struct pipe_ctx *opp_head);
+int resource_get_mpc_slice_count(const struct pipe_ctx *pipe);
 
 /*
- * Get number of ODM "cuts" of the timing associated with the pipe. ODM slice
- * count is equal to ODM splits + 1. For example if a timing is cut 3 times, it
- * will have 4 pieces of slice.
- * return - 0 if pipe is not used for ODM combine. otherwise
- * the number of ODM "cuts" for the timing.
+ * Get the number of ODM slices associated with the pipe.
+ * The function returns 0 if the pipe is not associated with an ODM combine
+ * pipe topology.
  */
-int resource_get_odm_slice_count(const struct pipe_ctx *otg_master);
+int resource_get_odm_slice_count(const struct pipe_ctx *pipe);
 
 /* Get the ODM slice index counting from 0 from left most slice */
 int resource_get_odm_slice_index(const struct pipe_ctx *opp_head);
@@ -501,6 +497,18 @@ int recource_find_free_pipe_not_used_in_cur_res_ctx(
 		const struct resource_pool *pool);
 
 /*
+ * Look for a free pipe in new resource context that is used in current resource
+ * context as an OTG master pipe.
+ *
+ * return - FREE_PIPE_INDEX_NOT_FOUND if free pipe is not found, otherwise
+ * pipe idx of the free pipe
+ */
+int recource_find_free_pipe_used_as_otg_master_in_cur_res_ctx(
+		const struct resource_context *cur_res_ctx,
+		struct resource_context *new_res_ctx,
+		const struct resource_pool *pool);
+
+/*
  * Look for a free pipe in new resource context that is used as a secondary DPP
  * pipe in any MPCC combine in current resource context.
  * return - FREE_PIPE_INDEX_NOT_FOUND if free pipe is not found, otherwise
@@ -561,9 +569,6 @@ void update_audio_usage(
 
 unsigned int resource_pixel_format_to_bpp(enum surface_pixel_format format);
 
-void get_audio_check(struct audio_info *aud_modes,
-	struct audio_check *aud_chk);
-
 bool get_temp_dp_link_res(struct dc_link *link,
 		struct link_resource *link_res,
 		struct dc_link_settings *link_settings);
@@ -610,5 +615,4 @@ enum dc_status update_dp_encoder_resources_for_test_harness(const struct dc *dc,
 		struct pipe_ctx *pipe_ctx);
 
 bool check_subvp_sw_cursor_fallback_req(const struct dc *dc, struct dc_stream_state *stream);
-
 #endif /* DRIVERS_GPU_DRM_AMD_DC_DEV_DC_INC_RESOURCE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c
index f2fe523f914f..24153b0df503 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c
@@ -879,7 +879,7 @@ static bool detect_link_and_local_sink(struct dc_link *link,
 			(link->dpcd_sink_ext_caps.bits.oled == 1)) {
 			dpcd_set_source_specific_data(link);
 			msleep(post_oui_delay);
-			set_cached_brightness_aux(link);
+			set_default_brightness_aux(link);
 		}
 
 		return true;
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
index 34a4a8c0e18c..3cbfbf8d107e 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
@@ -776,10 +776,26 @@ static bool dp_set_dsc_on_rx(struct pipe_ctx *pipe_ctx, bool enable)
  */
 void link_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
 {
+	/* TODO: Move this to HWSS as this is hardware programming sequence not a
+	 * link layer sequence
+	 */
 	struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc;
+	struct dc *dc = pipe_ctx->stream->ctx->dc;
 	struct dc_stream_state *stream = pipe_ctx->stream;
 	struct pipe_ctx *odm_pipe;
 	int opp_cnt = 1;
+	struct dccg *dccg = dc->res_pool->dccg;
+	/* It has been found that when DSCCLK is lower than 16Mhz, we will get DCN
+	 * register access hung. When DSCCLk is based on refclk, DSCCLk is always a
+	 * fixed value higher than 16Mhz so the issue doesn't occur. When DSCCLK is
+	 * generated by DTO, DSCCLK would be based on 1/3 dispclk. For small timings
+	 * with DSC such as 480p60Hz, the dispclk could be low enough to trigger
+	 * this problem. We are implementing a workaround here to keep using dscclk
+	 * based on fixed value refclk when timing is smaller than 3x16Mhz (i.e
+	 * 48Mhz) pixel clock to avoid hitting this problem.
+	 */
+	bool should_use_dto_dscclk = (dccg->funcs->set_dto_dscclk != NULL) &&
+			stream->timing.pix_clk_100hz > 480000;
 	DC_LOGGER_INIT(dsc->ctx->logger);
 
 	for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe)
@@ -802,11 +818,15 @@ void link_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
 
 		dsc->funcs->dsc_set_config(dsc, &dsc_cfg, &dsc_optc_cfg);
 		dsc->funcs->dsc_enable(dsc, pipe_ctx->stream_res.opp->inst);
+		if (should_use_dto_dscclk)
+			dccg->funcs->set_dto_dscclk(dccg, dsc->inst);
 		for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
 			struct display_stream_compressor *odm_dsc = odm_pipe->stream_res.dsc;
 
 			odm_dsc->funcs->dsc_set_config(odm_dsc, &dsc_cfg, &dsc_optc_cfg);
 			odm_dsc->funcs->dsc_enable(odm_dsc, odm_pipe->stream_res.opp->inst);
+			if (should_use_dto_dscclk)
+				dccg->funcs->set_dto_dscclk(dccg, odm_dsc->inst);
 		}
 		dsc_cfg.dc_dsc_cfg.num_slices_h *= opp_cnt;
 		dsc_cfg.pic_width *= opp_cnt;
@@ -856,9 +876,14 @@ void link_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
 		}
 
 		/* disable DSC block */
+		if (dccg->funcs->set_ref_dscclk)
+			dccg->funcs->set_ref_dscclk(dccg, pipe_ctx->stream_res.dsc->inst);
 		pipe_ctx->stream_res.dsc->funcs->dsc_disable(pipe_ctx->stream_res.dsc);
-		for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe)
+		for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
+			if (dccg->funcs->set_ref_dscclk)
+				dccg->funcs->set_ref_dscclk(dccg, odm_pipe->stream_res.dsc->inst);
 			odm_pipe->stream_res.dsc->funcs->dsc_disable(odm_pipe->stream_res.dsc);
+		}
 	}
 }
 
@@ -875,11 +900,15 @@ bool link_set_dsc_pps_packet(struct pipe_ctx *pipe_ctx, bool enable, bool immedi
 {
 	struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc;
 	struct dc_stream_state *stream = pipe_ctx->stream;
-	DC_LOGGER_INIT(dsc->ctx->logger);
 
-	if (!pipe_ctx->stream->timing.flags.DSC || !dsc)
+	if (!pipe_ctx->stream->timing.flags.DSC)
+		return false;
+
+	if (!dsc)
 		return false;
 
+	DC_LOGGER_INIT(dsc->ctx->logger);
+
 	if (enable) {
 		struct dsc_config dsc_cfg;
 		uint8_t dsc_packed_pps[128];
@@ -1057,18 +1086,21 @@ static struct fixed31_32 get_pbn_from_bw_in_kbps(uint64_t kbps)
 	uint32_t denominator = 1;
 
 	/*
-	 * margin 5300ppm + 300ppm ~ 0.6% as per spec, factor is 1.006
+	 * The 1.006 factor (margin 5300ppm + 300ppm ~ 0.6% as per spec) is not
+	 * required when determining PBN/time slot utilization on the link between
+	 * us and the branch, since that overhead is already accounted for in
+	 * the get_pbn_per_slot function.
+	 *
 	 * The unit of 54/64Mbytes/sec is an arbitrary unit chosen based on
 	 * common multiplier to render an integer PBN for all link rate/lane
 	 * counts combinations
 	 * calculate
-	 * peak_kbps *= (1006/1000)
 	 * peak_kbps *= (64/54)
-	 * peak_kbps *= 8    convert to bytes
+	 * peak_kbps /= (8 * 1000) convert to bytes
 	 */
 
-	numerator = 64 * PEAK_FACTOR_X1000;
-	denominator = 54 * 8 * 1000 * 1000;
+	numerator = 64;
+	denominator = 54 * 8 * 1000;
 	kbps *= numerator;
 	peak_kbps = dc_fixpt_from_fraction(kbps, denominator);
 
@@ -1247,86 +1279,6 @@ static void remove_stream_from_alloc_table(
 	}
 }
 
-static enum dc_status deallocate_mst_payload_with_temp_drm_wa(
-		struct pipe_ctx *pipe_ctx)
-{
-	struct dc_stream_state *stream = pipe_ctx->stream;
-	struct dc_link *link = stream->link;
-	struct dc_dp_mst_stream_allocation_table proposed_table = {0};
-	struct fixed31_32 avg_time_slots_per_mtp = dc_fixpt_from_int(0);
-	int i;
-	bool mst_mode = (link->type == dc_connection_mst_branch);
-	/* adjust for drm changes*/
-	const struct link_hwss *link_hwss = get_link_hwss(link, &pipe_ctx->link_res);
-	const struct dc_link_settings empty_link_settings = {0};
-	DC_LOGGER_INIT(link->ctx->logger);
-
-	if (link_hwss->ext.set_throttled_vcp_size)
-		link_hwss->ext.set_throttled_vcp_size(pipe_ctx, avg_time_slots_per_mtp);
-	if (link_hwss->ext.set_hblank_min_symbol_width)
-		link_hwss->ext.set_hblank_min_symbol_width(pipe_ctx,
-				&empty_link_settings,
-				avg_time_slots_per_mtp);
-
-	if (dm_helpers_dp_mst_write_payload_allocation_table(
-			stream->ctx,
-			stream,
-			&proposed_table,
-			false))
-		update_mst_stream_alloc_table(
-				link,
-				pipe_ctx->stream_res.stream_enc,
-				pipe_ctx->stream_res.hpo_dp_stream_enc,
-				&proposed_table);
-	else
-		DC_LOG_WARNING("Failed to update"
-				"MST allocation table for"
-				"pipe idx:%d\n",
-				pipe_ctx->pipe_idx);
-
-	DC_LOG_MST("%s"
-			"stream_count: %d: ",
-			__func__,
-			link->mst_stream_alloc_table.stream_count);
-
-	for (i = 0; i < MAX_CONTROLLER_NUM; i++) {
-		DC_LOG_MST("stream_enc[%d]: %p      "
-		"stream[%d].hpo_dp_stream_enc: %p      "
-		"stream[%d].vcp_id: %d      "
-		"stream[%d].slot_count: %d\n",
-		i,
-		(void *) link->mst_stream_alloc_table.stream_allocations[i].stream_enc,
-		i,
-		(void *) link->mst_stream_alloc_table.stream_allocations[i].hpo_dp_stream_enc,
-		i,
-		link->mst_stream_alloc_table.stream_allocations[i].vcp_id,
-		i,
-		link->mst_stream_alloc_table.stream_allocations[i].slot_count);
-	}
-
-	if (link_hwss->ext.update_stream_allocation_table == NULL ||
-			link_dp_get_encoding_format(&link->cur_link_settings) == DP_UNKNOWN_ENCODING) {
-		DC_LOG_DEBUG("Unknown encoding format\n");
-		return DC_ERROR_UNEXPECTED;
-	}
-
-	link_hwss->ext.update_stream_allocation_table(link, &pipe_ctx->link_res,
-			&link->mst_stream_alloc_table);
-
-	if (mst_mode) {
-		dm_helpers_dp_mst_poll_for_allocation_change_trigger(
-			stream->ctx,
-			stream);
-	}
-
-	dm_helpers_dp_mst_send_payload_allocation(
-			stream->ctx,
-			stream,
-			false);
-
-	return DC_OK;
-}
-
 static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx)
 {
 	struct dc_stream_state *stream = pipe_ctx->stream;
@@ -1339,9 +1291,6 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx)
 	const struct dc_link_settings empty_link_settings = {0};
 	DC_LOGGER_INIT(link->ctx->logger);
 
-	if (link->dc->debug.temp_mst_deallocation_sequence)
-		return deallocate_mst_payload_with_temp_drm_wa(pipe_ctx);
-
 	/* deallocate_mst_payload is called before disable link. When mode or
 	 * disable/enable monitor, new stream is created which is not in link
 	 * stream[] yet. For this, payload is not allocated yet, so de-alloc
@@ -1414,16 +1363,14 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx)
 	link_hwss->ext.update_stream_allocation_table(link, &pipe_ctx->link_res,
 			&link->mst_stream_alloc_table);
 
-	if (mst_mode) {
+	if (mst_mode)
 		dm_helpers_dp_mst_poll_for_allocation_change_trigger(
 			stream->ctx,
 			stream);
 
-		dm_helpers_dp_mst_send_payload_allocation(
-				stream->ctx,
-				stream,
-				false);
-	}
+	dm_helpers_dp_mst_update_mst_mgr_for_deallocation(
+			stream->ctx,
+			stream);
 
 	return DC_OK;
 }
@@ -1504,12 +1451,10 @@ static enum dc_status allocate_mst_payload(struct pipe_ctx *pipe_ctx)
 			stream->ctx,
 			stream);
 
-	if (ret != ACT_LINK_LOST) {
+	if (ret != ACT_LINK_LOST)
 		dm_helpers_dp_mst_send_payload_allocation(
 				stream->ctx,
-				stream,
-				true);
-	}
+				stream);
 
 	/* slot X.Y for only current stream */
 	pbn_per_slot = get_pbn_per_slot(stream);
@@ -1769,8 +1714,7 @@ enum dc_status link_reduce_mst_payload(struct pipe_ctx *pipe_ctx, uint32_t bw_in
 	/* send ALLOCATE_PAYLOAD sideband message with updated pbn */
 	dm_helpers_dp_mst_send_payload_allocation(
 			stream->ctx,
-			stream,
-			true);
+			stream);
 
 	/* notify immediate branch device table update */
 	if (dm_helpers_dp_mst_write_payload_allocation_table(
@@ -1899,8 +1843,7 @@ enum dc_status link_increase_mst_payload(struct pipe_ctx *pipe_ctx, uint32_t bw_
 		/* send ALLOCATE_PAYLOAD sideband message with updated pbn */
 		dm_helpers_dp_mst_send_payload_allocation(
 				stream->ctx,
-				stream,
-				true);
+				stream);
 	}
 
 	/* increase throttled vcp size */
@@ -2066,17 +2009,11 @@ static enum dc_status enable_link_dp(struct dc_state *state,
 		}
 	}
 
-	/*
-	 * If the link is DP-over-USB4 do the following:
-	 * - Train with fallback when enabling DPIA link. Conventional links are
+	/* Train with fallback when enabling DPIA link. Conventional links are
 	 * trained with fallback during sink detection.
-	 * - Allocate only what the stream needs for bw in Gbps. Inform the CM
-	 * in case stream needs more or less bw from what has been allocated
-	 * earlier at plug time.
 	 */
-	if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) {
+	if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA)
 		do_fallback = true;
-	}
 
 	/*
 	 * Temporary w/a to get DP2.0 link rates to work with SST.
@@ -2142,8 +2079,7 @@ static enum dc_status enable_link_dp(struct dc_state *state,
 	if (link->dpcd_sink_ext_caps.bits.oled == 1 ||
 		link->dpcd_sink_ext_caps.bits.sdr_aux_backlight_control == 1 ||
 		link->dpcd_sink_ext_caps.bits.hdr_aux_backlight_control == 1) {
-		set_cached_brightness_aux(link);
-
+		set_default_brightness_aux(link);
 		if (link->dpcd_sink_ext_caps.bits.oled == 1)
 			msleep(bl_oled_enable_delay);
 		edp_backlight_enable_aux(link, true);
@@ -2259,6 +2195,32 @@ static enum dc_status enable_link(
 	return status;
 }
 
+static bool allocate_usb4_bandwidth_for_stream(struct dc_stream_state *stream, int bw)
+{
+	return true;
+}
+
+static bool allocate_usb4_bandwidth(struct dc_stream_state *stream)
+{
+	bool ret;
+
+	int bw = dc_bandwidth_in_kbps_from_timing(&stream->timing,
+			dc_link_get_highest_encoding_format(stream->sink->link));
+
+	ret = allocate_usb4_bandwidth_for_stream(stream, bw);
+
+	return ret;
+}
+
+static bool deallocate_usb4_bandwidth(struct dc_stream_state *stream)
+{
+	bool ret;
+
+	ret = allocate_usb4_bandwidth_for_stream(stream, 0);
+
+	return ret;
+}
+
 void link_set_dpms_off(struct pipe_ctx *pipe_ctx)
 {
 	struct dc  *dc = pipe_ctx->stream->ctx->dc;
@@ -2294,6 +2256,9 @@ void link_set_dpms_off(struct pipe_ctx *pipe_ctx)
 	update_psp_stream_config(pipe_ctx, true);
 	dc->hwss.blank_stream(pipe_ctx);
 
+	if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA)
+		deallocate_usb4_bandwidth(pipe_ctx->stream);
+
 	if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
 		deallocate_mst_payload(pipe_ctx);
 	else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT &&
@@ -2536,6 +2501,9 @@ void link_set_dpms_on(
 		}
 	}
 
+	if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA)
+		allocate_usb4_bandwidth(pipe_ctx->stream);
+
 	if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
 		allocate_mst_payload(pipe_ctx);
 	else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT &&
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c
index 7abfc67d10a6..cf22b8f28ba6 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c
@@ -213,8 +213,10 @@ static void construct_link_service_edp_panel_control(struct link_service *link_s
 	link_srv->edp_get_replay_state = edp_get_replay_state;
 	link_srv->edp_set_replay_allow_active = edp_set_replay_allow_active;
 	link_srv->edp_setup_replay = edp_setup_replay;
+	link_srv->edp_send_replay_cmd = edp_send_replay_cmd;
 	link_srv->edp_set_coasting_vtotal = edp_set_coasting_vtotal;
 	link_srv->edp_replay_residency = edp_replay_residency;
+	link_srv->edp_set_replay_power_opt_and_coasting_vtotal = edp_set_replay_power_opt_and_coasting_vtotal;
 
 	link_srv->edp_wait_for_t12 = edp_wait_for_t12;
 	link_srv->edp_is_ilr_optimization_required =
@@ -595,24 +597,6 @@ static bool construct_phy(struct dc_link *link,
 	link->ddc_hw_inst =
 		dal_ddc_get_line(get_ddc_pin(link->ddc));
 
-
-	if (link->dc->res_pool->funcs->panel_cntl_create &&
-		(link->link_id.id == CONNECTOR_ID_EDP ||
-			link->link_id.id == CONNECTOR_ID_LVDS)) {
-		panel_cntl_init_data.ctx = dc_ctx;
-		panel_cntl_init_data.inst =
-			panel_cntl_init_data.ctx->dc_edp_id_count;
-		link->panel_cntl =
-			link->dc->res_pool->funcs->panel_cntl_create(
-								&panel_cntl_init_data);
-		panel_cntl_init_data.ctx->dc_edp_id_count++;
-
-		if (link->panel_cntl == NULL) {
-			DC_ERROR("Failed to create link panel_cntl!\n");
-			goto panel_cntl_create_fail;
-		}
-	}
-
 	enc_init_data.ctx = dc_ctx;
 	bp_funcs->get_src_obj(dc_ctx->dc_bios, link->link_id, 0,
 			      &enc_init_data.encoder);
@@ -643,6 +627,23 @@ static bool construct_phy(struct dc_link *link,
 	link->dc->res_pool->dig_link_enc_count++;
 
 	link->link_enc_hw_inst = link->link_enc->transmitter;
+
+	if (link->dc->res_pool->funcs->panel_cntl_create &&
+		(link->link_id.id == CONNECTOR_ID_EDP ||
+			link->link_id.id == CONNECTOR_ID_LVDS)) {
+		panel_cntl_init_data.ctx = dc_ctx;
+		panel_cntl_init_data.inst = panel_cntl_init_data.ctx->dc_edp_id_count;
+		panel_cntl_init_data.eng_id = link->eng_id;
+		link->panel_cntl =
+			link->dc->res_pool->funcs->panel_cntl_create(
+								&panel_cntl_init_data);
+		panel_cntl_init_data.ctx->dc_edp_id_count++;
+
+		if (link->panel_cntl == NULL) {
+			DC_ERROR("Failed to create link panel_cntl!\n");
+			goto panel_cntl_create_fail;
+		}
+	}
 	for (i = 0; i < 4; i++) {
 		if (bp_funcs->get_device_tag(dc_ctx->dc_bios,
 					     link->link_id, i,
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.c b/drivers/gpu/drm/amd/display/dc/link/link_validation.c
index b45fda96eaf6..5b0bc7f6a188 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_validation.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.c
@@ -346,23 +346,61 @@ enum dc_status link_validate_mode_timing(
 	return DC_OK;
 }
 
+/*
+ * This function calculates the bandwidth required for the stream timing
+ * and aggregates the stream bandwidth for the respective dpia link
+ *
+ * @stream: pointer to the dc_stream_state struct instance
+ * @num_streams: number of streams to be validated
+ *
+ * return: true if validation is succeeded
+ */
 bool link_validate_dpia_bandwidth(const struct dc_stream_state *stream, const unsigned int num_streams)
 {
-	bool ret = true;
-	int bw_needed[MAX_DPIA_NUM];
-	struct dc_link *link[MAX_DPIA_NUM];
+	int bw_needed[MAX_DPIA_NUM] = {0};
+	struct dc_link *dpia_link[MAX_DPIA_NUM] = {0};
+	int num_dpias = 0;
+
+	for (unsigned int i = 0; i < num_streams; ++i) {
+		if (stream[i].signal == SIGNAL_TYPE_DISPLAY_PORT) {
+			/* new dpia sst stream, check whether it exceeds max dpia */
+			if (num_dpias >= MAX_DPIA_NUM)
+				return false;
 
-	if (!num_streams || num_streams > MAX_DPIA_NUM)
-		return ret;
+			dpia_link[num_dpias] = stream[i].link;
+			bw_needed[num_dpias] = dc_bandwidth_in_kbps_from_timing(&stream[i].timing,
+					dc_link_get_highest_encoding_format(dpia_link[num_dpias]));
+			num_dpias++;
+		} else if (stream[i].signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
+			uint8_t j = 0;
+			/* check whether its a known dpia link */
+			for (; j < num_dpias; ++j) {
+				if (dpia_link[j] == stream[i].link)
+					break;
+			}
+
+			if (j == num_dpias) {
+				/* new dpia mst stream, check whether it exceeds max dpia */
+				if (num_dpias >= MAX_DPIA_NUM)
+					return false;
+				else {
+					dpia_link[j] = stream[i].link;
+					num_dpias++;
+				}
+			}
+
+			bw_needed[j] += dc_bandwidth_in_kbps_from_timing(&stream[i].timing,
+				dc_link_get_highest_encoding_format(dpia_link[j]));
+		}
+	}
 
-	for (uint8_t i = 0; i < num_streams; ++i) {
+	/* Include dp overheads */
+	for (uint8_t i = 0; i < num_dpias; ++i) {
+		int dp_overhead = 0;
 
-		link[i] = stream[i].link;
-		bw_needed[i] = dc_bandwidth_in_kbps_from_timing(&stream[i].timing,
-				dc_link_get_highest_encoding_format(link[i]));
+		dp_overhead = link_dp_dpia_get_dp_overhead_in_dp_tunneling(dpia_link[i]);
+		bw_needed[i] += dp_overhead;
 	}
 
-	ret = dpia_validate_usb4_bw(link, bw_needed, num_streams);
-
-	return ret;
+	return dpia_validate_usb4_bw(dpia_link, bw_needed, num_dpias);
 }
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.h b/drivers/gpu/drm/amd/display/dc/link/link_validation.h
index 4a954317d0da..595fb05946e9 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_validation.h
+++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.h
@@ -25,6 +25,7 @@
 #ifndef __LINK_VALIDATION_H__
 #define __LINK_VALIDATION_H__
 #include "link.h"
+
 enum dc_status link_validate_mode_timing(
 		const struct dc_stream_state *stream,
 		struct dc_link *link,
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c
index db87aa7b5c90..289f5d133342 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c
@@ -412,12 +412,18 @@ static enum dc_link_rate get_cable_max_link_rate(struct dc_link *link)
 {
 	enum dc_link_rate cable_max_link_rate = LINK_RATE_UNKNOWN;
 
-	if (link->dpcd_caps.cable_id.bits.UHBR10_20_CAPABILITY & DP_UHBR20)
+	if (link->dpcd_caps.cable_id.bits.UHBR10_20_CAPABILITY & DP_UHBR20) {
 		cable_max_link_rate = LINK_RATE_UHBR20;
-	else if (link->dpcd_caps.cable_id.bits.UHBR13_5_CAPABILITY)
+	} else if (link->dpcd_caps.cable_id.bits.UHBR13_5_CAPABILITY) {
 		cable_max_link_rate = LINK_RATE_UHBR13_5;
-	else if (link->dpcd_caps.cable_id.bits.UHBR10_20_CAPABILITY & DP_UHBR10)
-		cable_max_link_rate = LINK_RATE_UHBR10;
+	} else if (link->dpcd_caps.cable_id.bits.UHBR10_20_CAPABILITY & DP_UHBR10) {
+		// allow DP40 cables to do UHBR13.5 for passive or unknown cable type
+		if (link->dpcd_caps.cable_id.bits.CABLE_TYPE < 2) {
+			cable_max_link_rate = LINK_RATE_UHBR13_5;
+		} else {
+			cable_max_link_rate = LINK_RATE_UHBR10;
+		}
+	}
 
 	return cable_max_link_rate;
 }
@@ -1392,7 +1398,7 @@ static bool get_usbc_cable_id(struct dc_link *link, union dp_cable_id *cable_id)
 	cmd.cable_id.header.payload_bytes = sizeof(cmd.cable_id.data);
 	cmd.cable_id.data.input.phy_inst = resource_transmitter_to_phy_idx(
 			link->dc, link->link_enc->transmitter);
-	if (dm_execute_dmub_cmd(link->dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) &&
+	if (dc_wake_and_execute_dmub_cmd(link->dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) &&
 			cmd.cable_id.header.ret_status == 1) {
 		cable_id->raw = cmd.cable_id.data.output_raw;
 		DC_LOG_DC("usbc_cable_id = %d.\n", cable_id->raw);
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c
index 0bb749133909..6af42ba9885c 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c
@@ -82,24 +82,33 @@ bool dpia_query_hpd_status(struct dc_link *link)
 {
 	union dmub_rb_cmd cmd = {0};
 	struct dc_dmub_srv *dmub_srv = link->ctx->dmub_srv;
-	bool is_hpd_high = false;
 
 	/* prepare QUERY_HPD command */
 	cmd.query_hpd.header.type = DMUB_CMD__QUERY_HPD_STATE;
 	cmd.query_hpd.data.instance = link->link_id.enum_id - ENUM_ID_1;
 	cmd.query_hpd.data.ch_type = AUX_CHANNEL_DPIA;
 
-	/* Return HPD status reported by DMUB if query successfully executed. */
-	if (dm_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) && cmd.query_hpd.data.status == AUX_RET_SUCCESS)
-		is_hpd_high = cmd.query_hpd.data.result;
-
-	DC_LOG_DEBUG("%s: link(%d) dpia(%d) cmd_status(%d) result(%d)\n",
-		__func__,
-		link->link_index,
-		link->link_id.enum_id - ENUM_ID_1,
-		cmd.query_hpd.data.status,
-		cmd.query_hpd.data.result);
-
-	return is_hpd_high;
+	/* Query dpia hpd status from dmub */
+	if (dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd,
+		DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) &&
+	    cmd.query_hpd.data.status == AUX_RET_SUCCESS) {
+		DC_LOG_DEBUG("%s: for link(%d) dpia(%d) success, current_hpd_status(%d) new_hpd_status(%d)\n",
+			__func__,
+			link->link_index,
+			link->link_id.enum_id - ENUM_ID_1,
+			link->hpd_status,
+			cmd.query_hpd.data.result);
+		link->hpd_status = cmd.query_hpd.data.result;
+	} else {
+		DC_LOG_ERROR("%s: for link(%d) dpia(%d) failed with status(%d), current_hpd_status(%d) new_hpd_status(0)\n",
+			__func__,
+			link->link_index,
+			link->link_id.enum_id - ENUM_ID_1,
+			cmd.query_hpd.data.status,
+			link->hpd_status);
+		link->hpd_status = false;
+	}
+
+	return link->hpd_status;
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
index 7581023daa47..5491b707cec8 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
@@ -50,15 +50,28 @@ static bool get_bw_alloc_proceed_flag(struct dc_link *tmp)
 			&& tmp->hpd_status
 			&& tmp->dpia_bw_alloc_config.bw_alloc_enabled);
 }
+
 static void reset_bw_alloc_struct(struct dc_link *link)
 {
 	link->dpia_bw_alloc_config.bw_alloc_enabled = false;
-	link->dpia_bw_alloc_config.sink_verified_bw = 0;
-	link->dpia_bw_alloc_config.sink_max_bw = 0;
+	link->dpia_bw_alloc_config.link_verified_bw = 0;
+	link->dpia_bw_alloc_config.link_max_bw = 0;
+	link->dpia_bw_alloc_config.allocated_bw = 0;
 	link->dpia_bw_alloc_config.estimated_bw = 0;
 	link->dpia_bw_alloc_config.bw_granularity = 0;
+	link->dpia_bw_alloc_config.dp_overhead = 0;
 	link->dpia_bw_alloc_config.response_ready = false;
+	link->dpia_bw_alloc_config.nrd_max_lane_count = 0;
+	link->dpia_bw_alloc_config.nrd_max_link_rate = 0;
+	for (int i = 0; i < MAX_SINKS_PER_LINK; i++)
+		link->dpia_bw_alloc_config.remote_sink_req_bw[i] = 0;
+	DC_LOG_DEBUG("reset usb4 bw alloc of link(%d)\n", link->link_index);
 }
+
+#define BW_GRANULARITY_0 4 // 0.25 Gbps
+#define BW_GRANULARITY_1 2 // 0.5 Gbps
+#define BW_GRANULARITY_2 1 // 1 Gbps
+
 static uint8_t get_bw_granularity(struct dc_link *link)
 {
 	uint8_t bw_granularity = 0;
@@ -71,16 +84,20 @@ static uint8_t get_bw_granularity(struct dc_link *link)
 
 	switch (bw_granularity & 0x3) {
 	case 0:
-		bw_granularity = 4;
+		bw_granularity = BW_GRANULARITY_0;
 		break;
 	case 1:
+		bw_granularity = BW_GRANULARITY_1;
+		break;
+	case 2:
 	default:
-		bw_granularity = 2;
+		bw_granularity = BW_GRANULARITY_2;
 		break;
 	}
 
 	return bw_granularity;
 }
+
 static int get_estimated_bw(struct dc_link *link)
 {
 	uint8_t bw_estimated_bw = 0;
@@ -93,31 +110,33 @@ static int get_estimated_bw(struct dc_link *link)
 
 	return bw_estimated_bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity);
 }
-static bool allocate_usb4_bw(int *stream_allocated_bw, int bw_needed, struct dc_link *link)
+
+static int get_non_reduced_max_link_rate(struct dc_link *link)
 {
-	if (bw_needed > 0)
-		*stream_allocated_bw += bw_needed;
+	uint8_t nrd_max_link_rate = 0;
 
-	return true;
+	core_link_read_dpcd(
+			link,
+			DP_TUNNELING_MAX_LINK_RATE,
+			&nrd_max_link_rate,
+			sizeof(uint8_t));
+
+	return nrd_max_link_rate;
 }
-static bool deallocate_usb4_bw(int *stream_allocated_bw, int bw_to_dealloc, struct dc_link *link)
-{
-	bool ret = false;
 
-	if (*stream_allocated_bw > 0) {
-		*stream_allocated_bw -= bw_to_dealloc;
-		ret = true;
-	} else {
-		//Do nothing for now
-		ret = true;
-	}
+static int get_non_reduced_max_lane_count(struct dc_link *link)
+{
+	uint8_t nrd_max_lane_count = 0;
 
-	// Unplug so reset values
-	if (!link->hpd_status)
-		reset_bw_alloc_struct(link);
+	core_link_read_dpcd(
+			link,
+			DP_TUNNELING_MAX_LANE_COUNT,
+			&nrd_max_lane_count,
+			sizeof(uint8_t));
 
-	return ret;
+	return nrd_max_lane_count;
 }
+
 /*
  * Read all New BW alloc configuration ex: estimated_bw, allocated_bw,
  * granuality, Driver_ID, CM_Group, & populate the BW allocation structs
@@ -125,10 +144,22 @@ static bool deallocate_usb4_bw(int *stream_allocated_bw, int bw_to_dealloc, stru
  */
 static void init_usb4_bw_struct(struct dc_link *link)
 {
-	// Init the known values
+	reset_bw_alloc_struct(link);
+
+	/* init the known values */
 	link->dpia_bw_alloc_config.bw_granularity = get_bw_granularity(link);
 	link->dpia_bw_alloc_config.estimated_bw = get_estimated_bw(link);
+	link->dpia_bw_alloc_config.nrd_max_link_rate = get_non_reduced_max_link_rate(link);
+	link->dpia_bw_alloc_config.nrd_max_lane_count = get_non_reduced_max_lane_count(link);
+
+	DC_LOG_DEBUG("%s: bw_granularity(%d), estimated_bw(%d)\n",
+		__func__, link->dpia_bw_alloc_config.bw_granularity,
+		link->dpia_bw_alloc_config.estimated_bw);
+	DC_LOG_DEBUG("%s: nrd_max_link_rate(%d), nrd_max_lane_count(%d)\n",
+		__func__, link->dpia_bw_alloc_config.nrd_max_link_rate,
+		link->dpia_bw_alloc_config.nrd_max_lane_count);
 }
+
 static uint8_t get_lowest_dpia_index(struct dc_link *link)
 {
 	const struct dc *dc_struct = link->dc;
@@ -141,51 +172,66 @@ static uint8_t get_lowest_dpia_index(struct dc_link *link)
 				dc_struct->links[i]->ep_type != DISPLAY_ENDPOINT_USB4_DPIA)
 			continue;
 
-		if (idx > dc_struct->links[i]->link_index)
+		if (idx > dc_struct->links[i]->link_index) {
 			idx = dc_struct->links[i]->link_index;
+			break;
+		}
 	}
 
 	return idx;
 }
+
 /*
- * Get the Max Available BW or Max Estimated BW for each Host Router
+ * Get the maximum dp tunnel banwidth of host router
  *
- * @link: pointer to the dc_link struct instance
- * @type: ESTIMATD BW or MAX AVAILABLE BW
+ * @dc: pointer to the dc struct instance
+ * @hr_index: host router index
  *
- * return: response_ready flag from dc_link struct
+ * return: host router maximum dp tunnel bandwidth
  */
-static int get_host_router_total_bw(struct dc_link *link, uint8_t type)
+static int get_host_router_total_dp_tunnel_bw(const struct dc *dc, uint8_t hr_index)
 {
-	const struct dc *dc_struct = link->dc;
-	uint8_t lowest_dpia_index = get_lowest_dpia_index(link);
-	uint8_t idx = (link->link_index - lowest_dpia_index) / 2, idx_temp = 0;
-	struct dc_link *link_temp;
+	uint8_t lowest_dpia_index = get_lowest_dpia_index(dc->links[0]);
+	uint8_t hr_index_temp = 0;
+	struct dc_link *link_dpia_primary, *link_dpia_secondary;
 	int total_bw = 0;
-	int i;
-
-	for (i = 0; i < MAX_PIPES * 2; ++i) {
 
-		if (!dc_struct->links[i] || dc_struct->links[i]->ep_type != DISPLAY_ENDPOINT_USB4_DPIA)
-			continue;
+	for (uint8_t i = 0; i < (MAX_PIPES * 2) - 1; ++i) {
 
-		link_temp = dc_struct->links[i];
-		if (!link_temp || !link_temp->hpd_status)
+		if (!dc->links[i] || dc->links[i]->ep_type != DISPLAY_ENDPOINT_USB4_DPIA)
 			continue;
 
-		idx_temp = (link_temp->link_index - lowest_dpia_index) / 2;
-
-		if (idx_temp == idx) {
-
-			if (type == HOST_ROUTER_BW_ESTIMATED)
-				total_bw += link_temp->dpia_bw_alloc_config.estimated_bw;
-			else if (type == HOST_ROUTER_BW_ALLOCATED)
-				total_bw += link_temp->dpia_bw_alloc_config.sink_allocated_bw;
+		hr_index_temp = (dc->links[i]->link_index - lowest_dpia_index) / 2;
+
+		if (hr_index_temp == hr_index) {
+			link_dpia_primary = dc->links[i];
+			link_dpia_secondary = dc->links[i + 1];
+
+			/**
+			 * If BW allocation enabled on both DPIAs, then
+			 * HR BW = Estimated(dpia_primary) + Allocated(dpia_secondary)
+			 * otherwise HR BW = Estimated(bw alloc enabled dpia)
+			 */
+			if ((link_dpia_primary->hpd_status &&
+				link_dpia_primary->dpia_bw_alloc_config.bw_alloc_enabled) &&
+				(link_dpia_secondary->hpd_status &&
+				link_dpia_secondary->dpia_bw_alloc_config.bw_alloc_enabled)) {
+					total_bw += link_dpia_primary->dpia_bw_alloc_config.estimated_bw +
+						link_dpia_secondary->dpia_bw_alloc_config.allocated_bw;
+			} else if (link_dpia_primary->hpd_status &&
+					link_dpia_primary->dpia_bw_alloc_config.bw_alloc_enabled) {
+				total_bw = link_dpia_primary->dpia_bw_alloc_config.estimated_bw;
+			} else if (link_dpia_secondary->hpd_status &&
+				link_dpia_secondary->dpia_bw_alloc_config.bw_alloc_enabled) {
+				total_bw += link_dpia_secondary->dpia_bw_alloc_config.estimated_bw;
+			}
+			break;
 		}
 	}
 
 	return total_bw;
 }
+
 /*
  * Cleanup function for when the dpia is unplugged to reset struct
  * and perform any required clean up
@@ -194,42 +240,49 @@ static int get_host_router_total_bw(struct dc_link *link, uint8_t type)
  *
  * return: none
  */
-static bool dpia_bw_alloc_unplug(struct dc_link *link)
+static void dpia_bw_alloc_unplug(struct dc_link *link)
 {
-	if (!link)
-		return true;
-
-	return deallocate_usb4_bw(&link->dpia_bw_alloc_config.sink_allocated_bw,
-			link->dpia_bw_alloc_config.sink_allocated_bw, link);
+	if (link) {
+		DC_LOG_DEBUG("%s: resetting bw alloc config for link(%d)\n",
+			__func__, link->link_index);
+		reset_bw_alloc_struct(link);
+	}
 }
+
 static void set_usb4_req_bw_req(struct dc_link *link, int req_bw)
 {
 	uint8_t requested_bw;
 	uint32_t temp;
 
-	// 1. Add check for this corner case #1
-	if (req_bw > link->dpia_bw_alloc_config.estimated_bw)
+	/* Error check whether request bw greater than allocated */
+	if (req_bw > link->dpia_bw_alloc_config.estimated_bw) {
+		DC_LOG_ERROR("%s: Request bw greater than estimated bw for link(%d)\n",
+			__func__, link->link_index);
 		req_bw = link->dpia_bw_alloc_config.estimated_bw;
+	}
 
 	temp = req_bw * link->dpia_bw_alloc_config.bw_granularity;
 	requested_bw = temp / Kbps_TO_Gbps;
 
-	// Always make sure to add more to account for floating points
+	/* Always make sure to add more to account for floating points */
 	if (temp % Kbps_TO_Gbps)
 		++requested_bw;
 
-	// 2. Add check for this corner case #2
+	/* Error check whether requested and allocated are equal */
 	req_bw = requested_bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity);
-	if (req_bw == link->dpia_bw_alloc_config.sink_allocated_bw)
-		return;
+	if (req_bw == link->dpia_bw_alloc_config.allocated_bw) {
+		DC_LOG_ERROR("%s: Request bw equals to allocated bw for link(%d)\n",
+			__func__, link->link_index);
+	}
 
-	if (core_link_write_dpcd(
+	link->dpia_bw_alloc_config.response_ready = false; // Reset flag
+	core_link_write_dpcd(
 		link,
 		REQUESTED_BW,
 		&requested_bw,
-		sizeof(uint8_t)) == DC_OK)
-		link->dpia_bw_alloc_config.response_ready = false; // Reset flag
+		sizeof(uint8_t));
 }
+
 /*
  * Return the response_ready flag from dc_link struct
  *
@@ -241,6 +294,7 @@ static bool get_cm_response_ready_flag(struct dc_link *link)
 {
 	return link->dpia_bw_alloc_config.response_ready;
 }
+
 // ------------------------------------------------------------------
 //					PUBLIC FUNCTIONS
 // ------------------------------------------------------------------
@@ -277,27 +331,27 @@ bool link_dp_dpia_set_dptx_usb4_bw_alloc_support(struct dc_link *link)
 				DPTX_BW_ALLOCATION_MODE_CONTROL,
 				&response,
 				sizeof(uint8_t)) != DC_OK) {
-			DC_LOG_DEBUG("%s: **** FAILURE Enabling DPtx BW Allocation Mode Support ***\n",
-					__func__);
+			DC_LOG_DEBUG("%s: FAILURE Enabling DPtx BW Allocation Mode Support for link(%d)\n",
+				__func__, link->link_index);
 		} else {
 			// SUCCESS Enabled DPtx BW Allocation Mode Support
-			link->dpia_bw_alloc_config.bw_alloc_enabled = true;
-			DC_LOG_DEBUG("%s: **** SUCCESS Enabling DPtx BW Allocation Mode Support ***\n",
-					__func__);
+			DC_LOG_DEBUG("%s: SUCCESS Enabling DPtx BW Allocation Mode Support for link(%d)\n",
+				__func__, link->link_index);
 
 			ret = true;
 			init_usb4_bw_struct(link);
+			link->dpia_bw_alloc_config.bw_alloc_enabled = true;
 		}
 	}
 
 out:
 	return ret;
 }
+
 void dpia_handle_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t result)
 {
 	int bw_needed = 0;
 	int estimated = 0;
-	int host_router_total_estimated_bw = 0;
 
 	if (!get_bw_alloc_proceed_flag((link)))
 		return;
@@ -306,14 +360,22 @@ void dpia_handle_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t res
 
 	case DPIA_BW_REQ_FAILED:
 
-		DC_LOG_DEBUG("%s: *** *** BW REQ FAILURE for DP-TX Request *** ***\n", __func__);
+		/*
+		 * Ideally, we shouldn't run into this case as we always validate available
+		 * bandwidth and request within that limit
+		 */
+		estimated = bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity);
+
+		DC_LOG_ERROR("%s: BW REQ FAILURE for DP-TX Request for link(%d)\n",
+			__func__, link->link_index);
+		DC_LOG_ERROR("%s: current estimated_bw(%d), new estimated_bw(%d)\n",
+			__func__, link->dpia_bw_alloc_config.estimated_bw, estimated);
 
-		// Update the new Estimated BW value updated by CM
-		link->dpia_bw_alloc_config.estimated_bw =
-				bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity);
+		/* Update the new Estimated BW value updated by CM */
+		link->dpia_bw_alloc_config.estimated_bw = estimated;
 
+		/* Allocate the previously requested bandwidth */
 		set_usb4_req_bw_req(link, link->dpia_bw_alloc_config.estimated_bw);
-		link->dpia_bw_alloc_config.response_ready = false;
 
 		/*
 		 * If FAIL then it is either:
@@ -326,68 +388,34 @@ void dpia_handle_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t res
 
 	case DPIA_BW_REQ_SUCCESS:
 
-		DC_LOG_DEBUG("%s: *** BW REQ SUCCESS for DP-TX Request ***\n", __func__);
-
-		// 1. SUCCESS 1st time before any Pruning is done
-		// 2. SUCCESS after prev. FAIL before any Pruning is done
-		// 3. SUCCESS after Pruning is done but before enabling link
-
 		bw_needed = bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity);
 
-		// 1.
-		if (!link->dpia_bw_alloc_config.sink_allocated_bw) {
-
-			allocate_usb4_bw(&link->dpia_bw_alloc_config.sink_allocated_bw, bw_needed, link);
-			link->dpia_bw_alloc_config.sink_verified_bw =
-					link->dpia_bw_alloc_config.sink_allocated_bw;
-
-			// SUCCESS from first attempt
-			if (link->dpia_bw_alloc_config.sink_allocated_bw >
-			link->dpia_bw_alloc_config.sink_max_bw)
-				link->dpia_bw_alloc_config.sink_verified_bw =
-						link->dpia_bw_alloc_config.sink_max_bw;
-		}
-		// 3.
-		else if (link->dpia_bw_alloc_config.sink_allocated_bw) {
-
-			// Find out how much do we need to de-alloc
-			if (link->dpia_bw_alloc_config.sink_allocated_bw > bw_needed)
-				deallocate_usb4_bw(&link->dpia_bw_alloc_config.sink_allocated_bw,
-						link->dpia_bw_alloc_config.sink_allocated_bw - bw_needed, link);
-			else
-				allocate_usb4_bw(&link->dpia_bw_alloc_config.sink_allocated_bw,
-						bw_needed - link->dpia_bw_alloc_config.sink_allocated_bw, link);
-		}
+		DC_LOG_DEBUG("%s: BW REQ SUCCESS for DP-TX Request for link(%d)\n",
+			__func__, link->link_index);
+		DC_LOG_DEBUG("%s: current allocated_bw(%d), new allocated_bw(%d)\n",
+			__func__, link->dpia_bw_alloc_config.allocated_bw, bw_needed);
 
-		// 4. If this is the 2nd sink then any unused bw will be reallocated to master DPIA
-		// => check if estimated_bw changed
+		link->dpia_bw_alloc_config.allocated_bw = bw_needed;
 
 		link->dpia_bw_alloc_config.response_ready = true;
 		break;
 
 	case DPIA_EST_BW_CHANGED:
 
-		DC_LOG_DEBUG("%s: *** ESTIMATED BW CHANGED for DP-TX Request ***\n", __func__);
-
 		estimated = bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity);
-		host_router_total_estimated_bw = get_host_router_total_bw(link, HOST_ROUTER_BW_ESTIMATED);
 
-		// 1. If due to unplug of other sink
-		if (estimated == host_router_total_estimated_bw) {
-			// First update the estimated & max_bw fields
-			if (link->dpia_bw_alloc_config.estimated_bw < estimated)
-				link->dpia_bw_alloc_config.estimated_bw = estimated;
-		}
-		// 2. If due to realloc bw btw 2 dpia due to plug OR realloc unused Bw
-		else {
-			// We lost estimated bw usually due to plug event of other dpia
-			link->dpia_bw_alloc_config.estimated_bw = estimated;
-		}
+		DC_LOG_DEBUG("%s: ESTIMATED BW CHANGED for link(%d)\n",
+			__func__, link->link_index);
+		DC_LOG_DEBUG("%s: current estimated_bw(%d), new estimated_bw(%d)\n",
+			__func__, link->dpia_bw_alloc_config.estimated_bw, estimated);
+
+		link->dpia_bw_alloc_config.estimated_bw = estimated;
 		break;
 
 	case DPIA_BW_ALLOC_CAPS_CHANGED:
 
-		DC_LOG_DEBUG("%s: *** BW ALLOC CAPABILITY CHANGED for DP-TX Request ***\n", __func__);
+		DC_LOG_ERROR("%s: BW ALLOC CAPABILITY CHANGED to Disabled for link(%d)\n",
+			__func__, link->link_index);
 		link->dpia_bw_alloc_config.bw_alloc_enabled = false;
 		break;
 	}
@@ -405,21 +433,21 @@ int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pea
 	if (link->hpd_status && peak_bw > 0) {
 
 		// If DP over USB4 then we need to check BW allocation
-		link->dpia_bw_alloc_config.sink_max_bw = peak_bw;
-		set_usb4_req_bw_req(link, link->dpia_bw_alloc_config.sink_max_bw);
+		link->dpia_bw_alloc_config.link_max_bw = peak_bw;
+		set_usb4_req_bw_req(link, link->dpia_bw_alloc_config.link_max_bw);
 
 		do {
-			if (!(timeout > 0))
+			if (timeout > 0)
 				timeout--;
 			else
 				break;
-			fsleep(10 * 1000);
+			msleep(10);
 		} while (!get_cm_response_ready_flag(link));
 
 		if (!timeout)
 			ret = 0;// ERROR TIMEOUT waiting for response for allocating bw
-		else if (link->dpia_bw_alloc_config.sink_allocated_bw > 0)
-			ret = get_host_router_total_bw(link, HOST_ROUTER_BW_ALLOCATED);
+		else if (link->dpia_bw_alloc_config.allocated_bw > 0)
+			ret = link->dpia_bw_alloc_config.allocated_bw;
 	}
 	//2. Cold Unplug
 	else if (!link->hpd_status)
@@ -428,65 +456,102 @@ int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pea
 out:
 	return ret;
 }
-int link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw)
+bool link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw)
 {
-	int ret = 0;
+	bool ret = false;
 	uint8_t timeout = 10;
 
+	DC_LOG_DEBUG("%s: ENTER: link(%d), hpd_status(%d), current allocated_bw(%d), req_bw(%d)\n",
+		__func__, link->link_index, link->hpd_status,
+		link->dpia_bw_alloc_config.allocated_bw, req_bw);
+
 	if (!get_bw_alloc_proceed_flag(link))
 		goto out;
 
-	/*
-	 * Sometimes stream uses same timing parameters as the already
-	 * allocated max sink bw so no need to re-alloc
-	 */
-	if (req_bw != link->dpia_bw_alloc_config.sink_allocated_bw) {
-		set_usb4_req_bw_req(link, req_bw);
-		do {
-			if (!(timeout > 0))
-				timeout--;
-			else
-				break;
-			udelay(10 * 1000);
-		} while (!get_cm_response_ready_flag(link));
+	set_usb4_req_bw_req(link, req_bw);
+	do {
+		if (timeout > 0)
+			timeout--;
+		else
+			break;
+		msleep(10);
+	} while (!get_cm_response_ready_flag(link));
 
-		if (!timeout)
-			ret = 0;// ERROR TIMEOUT waiting for response for allocating bw
-		else if (link->dpia_bw_alloc_config.sink_allocated_bw > 0)
-			ret = get_host_router_total_bw(link, HOST_ROUTER_BW_ALLOCATED);
-	}
+	if (timeout)
+		ret = true;
 
 out:
+	DC_LOG_DEBUG("%s: EXIT: timeout(%d), ret(%d)\n", __func__, timeout, ret);
 	return ret;
 }
+
 bool dpia_validate_usb4_bw(struct dc_link **link, int *bw_needed_per_dpia, const unsigned int num_dpias)
 {
 	bool ret = true;
-	int bw_needed_per_hr[MAX_HR_NUM] = { 0, 0 };
-	uint8_t lowest_dpia_index = 0, dpia_index = 0;
-	uint8_t i;
+	int bw_needed_per_hr[MAX_HR_NUM] = { 0, 0 }, host_router_total_dp_bw = 0;
+	uint8_t lowest_dpia_index, i, hr_index;
 
 	if (!num_dpias || num_dpias > MAX_DPIA_NUM)
 		return ret;
 
-	//Get total Host Router BW & Validate against each Host Router max BW
+	lowest_dpia_index = get_lowest_dpia_index(link[0]);
+
+	/* get total Host Router BW with granularity for the given modes */
 	for (i = 0; i < num_dpias; ++i) {
+		int granularity_Gbps = 0;
+		int bw_granularity = 0;
 
 		if (!link[i]->dpia_bw_alloc_config.bw_alloc_enabled)
 			continue;
 
-		lowest_dpia_index = get_lowest_dpia_index(link[i]);
 		if (link[i]->link_index < lowest_dpia_index)
 			continue;
 
-		dpia_index = (link[i]->link_index - lowest_dpia_index) / 2;
-		bw_needed_per_hr[dpia_index] += bw_needed_per_dpia[i];
-		if (bw_needed_per_hr[dpia_index] > get_host_router_total_bw(link[i], HOST_ROUTER_BW_ALLOCATED)) {
+		granularity_Gbps = (Kbps_TO_Gbps / link[i]->dpia_bw_alloc_config.bw_granularity);
+		bw_granularity = (bw_needed_per_dpia[i] / granularity_Gbps) * granularity_Gbps +
+				((bw_needed_per_dpia[i] % granularity_Gbps) ? granularity_Gbps : 0);
 
-			ret = false;
-			break;
+		hr_index = (link[i]->link_index - lowest_dpia_index) / 2;
+		bw_needed_per_hr[hr_index] += bw_granularity;
+	}
+
+	/* validate against each Host Router max BW */
+	for (hr_index = 0; hr_index < MAX_HR_NUM; ++hr_index) {
+		if (bw_needed_per_hr[hr_index]) {
+			host_router_total_dp_bw = get_host_router_total_dp_tunnel_bw(link[0]->dc, hr_index);
+			if (bw_needed_per_hr[hr_index] > host_router_total_dp_bw) {
+				ret = false;
+				break;
+			}
 		}
 	}
 
 	return ret;
 }
+
+int link_dp_dpia_get_dp_overhead_in_dp_tunneling(struct dc_link *link)
+{
+	int dp_overhead = 0, link_mst_overhead = 0;
+
+	if (!get_bw_alloc_proceed_flag((link)))
+		return dp_overhead;
+
+	/* if its mst link, add MTPH overhead */
+	if ((link->type == dc_connection_mst_branch) &&
+		!link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED) {
+		/* For 8b/10b encoding: MTP is 64 time slots long, slot 0 is used for MTPH
+		 * MST overhead is 1/64 of link bandwidth (excluding any overhead)
+		 */
+		const struct dc_link_settings *link_cap =
+			dc_link_get_link_cap(link);
+		uint32_t link_bw_in_kbps = (uint32_t)link_cap->link_rate *
+					   (uint32_t)link_cap->lane_count *
+					   LINK_RATE_REF_FREQ_IN_KHZ * 8;
+		link_mst_overhead = (link_bw_in_kbps / 64) + ((link_bw_in_kbps % 64) ? 1 : 0);
+	}
+
+	/* add all the overheads */
+	dp_overhead = link_mst_overhead;
+
+	return dp_overhead;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h
index 7292690383ae..3b6d8494f9d5 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h
@@ -59,9 +59,9 @@ bool link_dp_dpia_set_dptx_usb4_bw_alloc_support(struct dc_link *link);
  * @link: pointer to the dc_link struct instance
  * @req_bw: Bw requested by the stream
  *
- * return: allocated bw else return 0
+ * return: true if allocated successfully
  */
-int link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw);
+bool link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw);
 
 /*
  * Handle the USB4 BW Allocation related functionality here:
@@ -99,4 +99,13 @@ void dpia_handle_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t res
  */
 bool dpia_validate_usb4_bw(struct dc_link **link, int *bw_needed, const unsigned int num_dpias);
 
+/*
+ * Obtain all the DP overheads in dp tunneling for the dpia link
+ *
+ * @link: pointer to the dc_link struct instance
+ *
+ * return: DP overheads in DP tunneling
+ */
+int link_dp_dpia_get_dp_overhead_in_dp_tunneling(struct dc_link *link);
+
 #endif /* DC_INC_LINK_DP_DPIA_BW_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c
index 0c00e94e90b1..ba69874be5a4 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c
@@ -190,9 +190,6 @@ static void handle_hpd_irq_replay_sink(struct dc_link *link)
 	/*AMD Replay version reuse DP_PSR_ERROR_STATUS for REPLAY_ERROR status.*/
 	union psr_error_status replay_error_status;
 
-	if (link->replay_settings.config.force_disable_desync_error_check)
-		return;
-
 	if (!link->replay_settings.replay_feature_enabled)
 		return;
 
@@ -210,9 +207,6 @@ static void handle_hpd_irq_replay_sink(struct dc_link *link)
 		&replay_error_status.raw,
 		sizeof(replay_error_status.raw));
 
-	if (replay_configuration.bits.DESYNC_ERROR_STATUS)
-		link->replay_settings.config.received_desync_error_hpd = 1;
-
 	link->replay_settings.config.replay_error_status.bits.LINK_CRC_ERROR =
 		replay_error_status.bits.LINK_CRC_ERROR;
 	link->replay_settings.config.replay_error_status.bits.DESYNC_ERROR =
@@ -225,6 +219,12 @@ static void handle_hpd_irq_replay_sink(struct dc_link *link)
 		link->replay_settings.config.replay_error_status.bits.STATE_TRANSITION_ERROR) {
 		bool allow_active;
 
+		if (link->replay_settings.config.replay_error_status.bits.DESYNC_ERROR)
+			link->replay_settings.config.received_desync_error_hpd = 1;
+
+		if (link->replay_settings.config.force_disable_desync_error_check)
+			return;
+
 		/* Acknowledge and clear configuration bits */
 		dm_helpers_dp_write_dpcd(
 			link->ctx,
@@ -265,7 +265,7 @@ void dp_handle_link_loss(struct dc_link *link)
 
 	for (i = count - 1; i >= 0; i--) {
 		// Always use max settings here for DP 1.4a LL Compliance CTS
-		if (link->is_automated) {
+		if (link->skip_fallback_on_link_loss) {
 			pipes[i]->link_config.dp_link_settings.lane_count =
 					link->verified_link_cap.lane_count;
 			pipes[i]->link_config.dp_link_settings.link_rate =
@@ -404,7 +404,9 @@ bool dp_handle_hpd_rx_irq(struct dc_link *link,
 
 	if (hpd_irq_dpcd_data.bytes.device_service_irq.bits.AUTOMATED_TEST) {
 		// Workaround for DP 1.4a LL Compliance CTS as USB4 has to share encoders unlike DP and USBC
-		link->is_automated = true;
+		if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA)
+			link->skip_fallback_on_link_loss = true;
+
 		device_service_clear.bits.AUTOMATED_TEST = 1;
 		core_link_write_dpcd(
 			link,
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c
index 90339c2dfd84..16a62e018712 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c
@@ -517,6 +517,7 @@ enum link_training_result dp_check_link_loss_status(
 {
 	enum link_training_result status = LINK_TRAINING_SUCCESS;
 	union lane_status lane_status;
+	union lane_align_status_updated dpcd_lane_status_updated;
 	uint8_t dpcd_buf[6] = {0};
 	uint32_t lane;
 
@@ -532,10 +533,12 @@ enum link_training_result dp_check_link_loss_status(
 		 * check lanes status
 		 */
 		lane_status.raw = dp_get_nibble_at_index(&dpcd_buf[2], lane);
+		dpcd_lane_status_updated.raw = dpcd_buf[4];
 
 		if (!lane_status.bits.CHANNEL_EQ_DONE_0 ||
 			!lane_status.bits.CR_DONE_0 ||
-			!lane_status.bits.SYMBOL_LOCKED_0) {
+			!lane_status.bits.SYMBOL_LOCKED_0 ||
+			!dp_is_interlane_aligned(dpcd_lane_status_updated)) {
 			/* if one of the channel equalization, clock
 			 * recovery or symbol lock is dropped
 			 * consider it as (link has been
@@ -807,7 +810,7 @@ void dp_decide_lane_settings(
 		const struct link_training_settings *lt_settings,
 		const union lane_adjust ln_adjust[LANE_COUNT_DP_MAX],
 		struct dc_lane_settings hw_lane_settings[LANE_COUNT_DP_MAX],
-		union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX])
+		union dpcd_training_lane *dpcd_lane_settings)
 {
 	uint32_t lane;
 
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h
index 7d027bac8255..851bd17317a0 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h
@@ -111,7 +111,7 @@ void dp_decide_lane_settings(
 	const struct link_training_settings *lt_settings,
 	const union lane_adjust ln_adjust[LANE_COUNT_DP_MAX],
 	struct dc_lane_settings hw_lane_settings[LANE_COUNT_DP_MAX],
-	union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX]);
+	union dpcd_training_lane *dpcd_lane_settings);
 
 enum dc_dp_training_pattern decide_cr_training_pattern(
 		const struct dc_link_settings *link_settings);
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c
index 4f4e899e5c46..5d36bab0029c 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c
@@ -619,7 +619,7 @@ static enum link_training_result dpia_training_eq_non_transparent(
 	uint32_t retries_eq = 0;
 	enum dc_status status;
 	enum dc_dp_training_pattern tr_pattern;
-	uint32_t wait_time_microsec;
+	uint32_t wait_time_microsec = 0;
 	enum dc_lane_count lane_count = lt_settings->link_settings.lane_count;
 	union lane_align_status_updated dpcd_lane_status_updated = {0};
 	union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX] = {0};
@@ -811,7 +811,7 @@ static enum link_training_result dpia_training_eq_transparent(
 			/* Take into consideration corner case for DP 1.4a LL Compliance CTS as USB4
 			 * has to share encoders unlike DP and USBC
 			 */
-			if (dp_is_interlane_aligned(dpcd_lane_status_updated) || (link->is_automated && retries_eq)) {
+			if (dp_is_interlane_aligned(dpcd_lane_status_updated) || (link->skip_fallback_on_link_loss && retries_eq)) {
 				result =  LINK_TRAINING_SUCCESS;
 				break;
 			}
@@ -1037,7 +1037,7 @@ enum link_training_result dpia_perform_link_training(
 	 */
 	if (result == LINK_TRAINING_SUCCESS) {
 		fsleep(5000);
-		if (!link->is_automated)
+		if (!link->skip_fallback_on_link_loss)
 			result = dp_check_link_loss_status(link, &lt_settings);
 	} else if (result == LINK_TRAINING_ABORT)
 		dpia_training_abort(link, &lt_settings, repeater_id);
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c
index fd8f6f198146..7087cdc9e977 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c
@@ -115,7 +115,7 @@ static enum link_training_result perform_fixed_vs_pe_nontransparent_training_seq
 		lt_settings->cr_pattern_time = 16000;
 
 	/* Fixed VS/PE specific: Toggle link rate */
-	apply_toggle_rate_wa = (link->vendor_specific_lttpr_link_rate_wa == target_rate);
+	apply_toggle_rate_wa = ((link->vendor_specific_lttpr_link_rate_wa == target_rate) || (link->vendor_specific_lttpr_link_rate_wa == 0));
 	target_rate = get_dpcd_link_rate(&lt_settings->link_settings);
 	toggle_rate = (target_rate == 0x6) ? 0xA : 0x6;
 
@@ -205,6 +205,7 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence_legacy(
 	const uint8_t vendor_lttpr_write_data_4lane_3[4] = {0x1, 0x6D, 0xF2, 0x18};
 	const uint8_t vendor_lttpr_write_data_4lane_4[4] = {0x1, 0x6C, 0xF2, 0x03};
 	const uint8_t vendor_lttpr_write_data_4lane_5[4] = {0x1, 0x03, 0xF3, 0x06};
+	const uint8_t vendor_lttpr_write_data_dpmf[4] = {0x1, 0x6, 0x70, 0x87};
 	enum link_training_result status = LINK_TRAINING_SUCCESS;
 	uint8_t lane = 0;
 	union down_spread_ctrl downspread = {0};
@@ -271,7 +272,7 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence_legacy(
 	/* Vendor specific: Toggle link rate */
 	toggle_rate = (rate == 0x6) ? 0xA : 0x6;
 
-	if (link->vendor_specific_lttpr_link_rate_wa == rate) {
+	if (link->vendor_specific_lttpr_link_rate_wa == rate || link->vendor_specific_lttpr_link_rate_wa == 0) {
 		core_link_write_dpcd(
 				link,
 				DP_LINK_BW_SET,
@@ -293,6 +294,10 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence_legacy(
 		DP_DOWNSPREAD_CTRL,
 		lt_settings->link_settings.link_spread);
 
+	link_configure_fixed_vs_pe_retimer(link->ddc,
+			&vendor_lttpr_write_data_dpmf[0],
+			sizeof(vendor_lttpr_write_data_dpmf));
+
 	if (lt_settings->link_settings.lane_count == LANE_COUNT_FOUR) {
 		link_configure_fixed_vs_pe_retimer(link->ddc,
 				&vendor_lttpr_write_data_4lane_1[0], sizeof(vendor_lttpr_write_data_4lane_1));
@@ -552,6 +557,7 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence(
 	const uint8_t vendor_lttpr_write_data_4lane_3[4] = {0x1, 0x6D, 0xF2, 0x18};
 	const uint8_t vendor_lttpr_write_data_4lane_4[4] = {0x1, 0x6C, 0xF2, 0x03};
 	const uint8_t vendor_lttpr_write_data_4lane_5[4] = {0x1, 0x03, 0xF3, 0x06};
+	const uint8_t vendor_lttpr_write_data_dpmf[4] = {0x1, 0x6, 0x70, 0x87};
 	enum link_training_result status = LINK_TRAINING_SUCCESS;
 	uint8_t lane = 0;
 	union down_spread_ctrl downspread = {0};
@@ -617,7 +623,7 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence(
 	/* Vendor specific: Toggle link rate */
 	toggle_rate = (rate == 0x6) ? 0xA : 0x6;
 
-	if (link->vendor_specific_lttpr_link_rate_wa == rate) {
+	if (link->vendor_specific_lttpr_link_rate_wa == rate || link->vendor_specific_lttpr_link_rate_wa == 0) {
 		core_link_write_dpcd(
 				link,
 				DP_LINK_BW_SET,
@@ -639,6 +645,10 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence(
 		DP_DOWNSPREAD_CTRL,
 		lt_settings->link_settings.link_spread);
 
+	link_configure_fixed_vs_pe_retimer(link->ddc,
+			&vendor_lttpr_write_data_dpmf[0],
+			sizeof(vendor_lttpr_write_data_dpmf));
+
 	if (lt_settings->link_settings.lane_count == LANE_COUNT_FOUR) {
 		link_configure_fixed_vs_pe_retimer(link->ddc,
 				&vendor_lttpr_write_data_4lane_1[0], sizeof(vendor_lttpr_write_data_4lane_1));
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.c
index 5c9a30211c10..fc50931c2aec 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.c
@@ -205,7 +205,7 @@ enum dc_status core_link_read_dpcd(
 	uint32_t extended_size;
 	/* size of the remaining partitioned address space */
 	uint32_t size_left_to_read;
-	enum dc_status status;
+	enum dc_status status = DC_ERROR_UNEXPECTED;
 	/* size of the next partition to be read from */
 	uint32_t partition_size;
 	uint32_t data_index = 0;
@@ -234,7 +234,7 @@ enum dc_status core_link_write_dpcd(
 {
 	uint32_t partition_size;
 	uint32_t data_index = 0;
-	enum dc_status status;
+	enum dc_status status = DC_ERROR_UNEXPECTED;
 
 	while (size) {
 		partition_size = dpcd_get_next_partition_size(address, size);
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
index e32a7974a4bc..046d3e205415 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
@@ -170,7 +170,6 @@ bool edp_set_backlight_level_nits(struct dc_link *link,
 	*(uint32_t *)&dpcd_backlight_set.backlight_level_millinits = backlight_millinits;
 	*(uint16_t *)&dpcd_backlight_set.backlight_transition_time_ms = (uint16_t)transition_time_in_ms;
 
-	link->backlight_settings.backlight_millinits = backlight_millinits;
 
 	if (!link->dpcd_caps.panel_luminance_control) {
 		if (core_link_write_dpcd(link, DP_SOURCE_BACKLIGHT_LEVEL,
@@ -290,7 +289,7 @@ bool set_default_brightness_aux(struct dc_link *link)
 			default_backlight = 150000;
 		// if < 1 nits or > 5000, it might be wrong readback
 		if (default_backlight < 1000 || default_backlight > 5000000)
-			default_backlight = 150000; //
+			default_backlight = 150000;
 
 		return edp_set_backlight_level_nits(link, true,
 				default_backlight, 0);
@@ -298,15 +297,6 @@ bool set_default_brightness_aux(struct dc_link *link)
 	return false;
 }
 
-bool set_cached_brightness_aux(struct dc_link *link)
-{
-	if (link->backlight_settings.backlight_millinits)
-		return edp_set_backlight_level_nits(link, true,
-						    link->backlight_settings.backlight_millinits, 0);
-	else
-		return set_default_brightness_aux(link);
-	return false;
-}
 bool edp_is_ilr_optimization_enabled(struct dc_link *link)
 {
 	if (link->dpcd_caps.edp_supported_link_rates_count == 0 || !link->panel_config.ilr.optimize_edp_link_rate)
@@ -539,6 +529,9 @@ bool edp_set_backlight_level(const struct dc_link *link,
 	if (dc_is_embedded_signal(link->connector_signal)) {
 		struct pipe_ctx *pipe_ctx = get_pipe_from_link(link);
 
+		if (link->panel_cntl)
+			link->panel_cntl->stored_backlight_registers.USER_LEVEL = backlight_pwm_u16_16;
+
 		if (pipe_ctx) {
 			/* Disable brightness ramping when the display is blanked
 			 * as it can hang the DMCU
@@ -937,8 +930,8 @@ bool edp_get_replay_state(const struct dc_link *link, uint64_t *state)
 bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream)
 {
 	/* To-do: Setup Replay */
-	struct dc *dc = link->ctx->dc;
-	struct dmub_replay *replay = dc->res_pool->replay;
+	struct dc *dc;
+	struct dmub_replay *replay;
 	int i;
 	unsigned int panel_inst;
 	struct replay_context replay_context = { 0 };
@@ -954,6 +947,10 @@ bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream
 	if (!link)
 		return false;
 
+	dc = link->ctx->dc;
+
+	replay = dc->res_pool->replay;
+
 	if (!replay)
 		return false;
 
@@ -982,8 +979,7 @@ bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream
 
 	replay_context.line_time_in_ns = lineTimeInNs;
 
-	if (replay)
-		link->replay_settings.replay_feature_enabled =
+	link->replay_settings.replay_feature_enabled =
 			replay->funcs->replay_copy_settings(replay, link, &replay_context, panel_inst);
 	if (link->replay_settings.replay_feature_enabled) {
 
@@ -1007,6 +1003,36 @@ bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream
 	return true;
 }
 
+/*
+ * This is general Interface for Replay to set an 32 bit variable to dmub
+ * replay_FW_Message_type: Indicates which instruction or variable pass to DMUB
+ * cmd_data: Value of the config.
+ */
+bool edp_send_replay_cmd(struct dc_link *link,
+			enum replay_FW_Message_type msg,
+			union dmub_replay_cmd_set *cmd_data)
+{
+	struct dc *dc = link->ctx->dc;
+	struct dmub_replay *replay = dc->res_pool->replay;
+	unsigned int panel_inst;
+
+	if (!replay)
+		return false;
+
+	DC_LOGGER_INIT(link->ctx->logger);
+
+	if (dc_get_edp_link_panel_inst(dc, link, &panel_inst))
+		cmd_data->panel_inst = panel_inst;
+	else {
+		DC_LOG_DC("%s(): get edp panel inst fail ", __func__);
+		return false;
+	}
+
+	replay->funcs->replay_send_cmd(replay, msg, cmd_data);
+
+	return true;
+}
+
 bool edp_set_coasting_vtotal(struct dc_link *link, uint16_t coasting_vtotal)
 {
 	struct dc *dc = link->ctx->dc;
@@ -1045,6 +1071,33 @@ bool edp_replay_residency(const struct dc_link *link,
 	return true;
 }
 
+bool edp_set_replay_power_opt_and_coasting_vtotal(struct dc_link *link,
+	const unsigned int *power_opts, uint16_t coasting_vtotal)
+{
+	struct dc  *dc = link->ctx->dc;
+	struct dmub_replay *replay = dc->res_pool->replay;
+	unsigned int panel_inst;
+
+	if (!dc_get_edp_link_panel_inst(dc, link, &panel_inst))
+		return false;
+
+	/* Only both power and coasting vtotal changed, this func could return true */
+	if (power_opts && link->replay_settings.replay_power_opt_active != *power_opts &&
+		coasting_vtotal && link->replay_settings.coasting_vtotal != coasting_vtotal) {
+		if (link->replay_settings.replay_feature_enabled &&
+			replay->funcs->replay_set_power_opt_and_coasting_vtotal) {
+			replay->funcs->replay_set_power_opt_and_coasting_vtotal(replay,
+				*power_opts, panel_inst, coasting_vtotal);
+			link->replay_settings.replay_power_opt_active = *power_opts;
+			link->replay_settings.coasting_vtotal = coasting_vtotal;
+		} else
+			return false;
+	} else
+		return false;
+
+	return true;
+}
+
 static struct abm *get_abm_from_stream_res(const struct dc_link *link)
 {
 	int i;
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h
index ebf7deb63d13..34e521af7bb4 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h
@@ -30,7 +30,6 @@
 enum dp_panel_mode dp_get_panel_mode(struct dc_link *link);
 void dp_set_panel_mode(struct dc_link *link, enum dp_panel_mode panel_mode);
 bool set_default_brightness_aux(struct dc_link *link);
-bool set_cached_brightness_aux(struct dc_link *link);
 void edp_panel_backlight_power_on(struct dc_link *link, bool wait_for_hpd);
 int edp_get_backlight_level(const struct dc_link *link);
 bool edp_get_backlight_level_nits(struct dc_link *link,
@@ -57,10 +56,15 @@ bool edp_set_replay_allow_active(struct dc_link *dc_link, const bool *enable,
 	bool wait, bool force_static, const unsigned int *power_opts);
 bool edp_setup_replay(struct dc_link *link,
 		const struct dc_stream_state *stream);
+bool edp_send_replay_cmd(struct dc_link *link,
+			enum replay_FW_Message_type msg,
+			union dmub_replay_cmd_set *cmd_data);
 bool edp_set_coasting_vtotal(struct dc_link *link, uint16_t coasting_vtotal);
 bool edp_replay_residency(const struct dc_link *link,
 	unsigned int *residency, const bool is_start, const bool is_alpm);
 bool edp_get_replay_state(const struct dc_link *link, uint64_t *state);
+bool edp_set_replay_power_opt_and_coasting_vtotal(struct dc_link *link,
+	const unsigned int *power_opts, uint16_t coasting_vtotal);
 bool edp_wait_for_t12(struct dc_link *link);
 bool edp_is_ilr_optimization_required(struct dc_link *link,
        struct dc_crtc_timing *crtc_timing);
diff --git a/drivers/gpu/drm/amd/display/dc/optc/Makefile b/drivers/gpu/drm/amd/display/dc/optc/Makefile
new file mode 100644
index 000000000000..bb213335fb9f
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/optc/Makefile
@@ -0,0 +1,108 @@
+
+# Copyright 2022 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# Makefile for the 'optc' sub-component of DAL.
+#
+
+
+ifdef CONFIG_DRM_AMD_DC_FP
+###############################################################################
+# DCN
+###############################################################################
+
+OPTC_DCN10 = dcn10_optc.o
+
+AMD_DAL_OPTC_DCN10 = $(addprefix $(AMDDALPATH)/dc/optc/dcn10/,$(OPTC_DCN10))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN10)
+
+###############################################################################
+
+OPTC_DCN20 = dcn20_optc.o
+
+AMD_DAL_OPTC_DCN20 = $(addprefix $(AMDDALPATH)/dc/optc/dcn20/,$(OPTC_DCN20))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN20)
+
+###############################################################################
+
+OPTC_DCN201 = dcn201_optc.o
+
+AMD_DAL_OPTC_DCN201 = $(addprefix $(AMDDALPATH)/dc/optc/dcn201/,$(OPTC_DCN201))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN201)
+
+###############################################################################
+
+###############################################################################
+
+###############################################################################
+
+OPTC_DCN30 = dcn30_optc.o
+
+AMD_DAL_OPTC_DCN30 = $(addprefix $(AMDDALPATH)/dc/optc/dcn30/,$(OPTC_DCN30))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN30)
+
+###############################################################################
+
+OPTC_DCN301 = dcn301_optc.o
+
+AMD_DAL_OPTC_DCN301 = $(addprefix $(AMDDALPATH)/dc/optc/dcn301/,$(OPTC_DCN301))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN301)
+
+###############################################################################
+
+OPTC_DCN31 = dcn31_optc.o
+
+AMD_DAL_OPTC_DCN31 = $(addprefix $(AMDDALPATH)/dc/optc/dcn31/,$(OPTC_DCN31))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN31)
+
+###############################################################################
+
+OPTC_DCN314 = dcn314_optc.o
+
+AMD_DAL_OPTC_DCN314 = $(addprefix $(AMDDALPATH)/dc/optc/dcn314/,$(OPTC_DCN314))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN314)
+
+###############################################################################
+
+OPTC_DCN32 = dcn32_optc.o
+
+AMD_DAL_OPTC_DCN32 = $(addprefix $(AMDDALPATH)/dc/optc/dcn32/,$(OPTC_DCN32))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN32)
+
+###############################################################################
+
+OPTC_DCN35 = dcn35_optc.o
+
+AMD_DAL_OPTC_DCN35 = $(addprefix $(AMDDALPATH)/dc/optc/dcn35/,$(OPTC_DCN35))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN35)
+
+###############################################################################
+
+###############################################################################
+endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c
index 0e8f4f36c87c..0e8f4f36c87c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h
index ab81594a7fad..ab81594a7fad 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.c
index 58bdbd859bf9..58bdbd859bf9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.h
index f7968b9ca16e..c2e03ced392e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.h
@@ -26,7 +26,7 @@
 #ifndef __DC_OPTC_DCN20_H__
 #define __DC_OPTC_DCN20_H__
 
-#include "../dcn10/dcn10_optc.h"
+#include "dcn10/dcn10_optc.h"
 
 #define TG_COMMON_REG_LIST_DCN2_0(inst) \
 	TG_COMMON_REG_LIST_DCN(inst),\
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn201/dcn201_optc.c
index 70fcbec03fb6..70fcbec03fb6 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn201/dcn201_optc.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn201/dcn201_optc.h
index e9545b73513a..e9545b73513a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn201/dcn201_optc.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.c
index b97bdb868a0e..b97bdb868a0e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.h
index d3a056c12b0d..d3a056c12b0d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.c
index b3cfcb887905..b3cfcb887905 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.h
index b49585682a15..b49585682a15 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c
index 63a677c8ee27..63a677c8ee27 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.h
index 30b81a448ce2..30b81a448ce2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.c
index 0086cafb0f7a..0086cafb0f7a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.h
index 99c098e76116..99c098e76116 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c
index a2c4db2cebdd..823493543325 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c
@@ -166,6 +166,16 @@ static bool optc32_disable_crtc(struct timing_generator *optc)
 {
 	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
+	REG_UPDATE_5(OPTC_DATA_SOURCE_SELECT,
+			OPTC_SEG0_SRC_SEL, 0xf,
+			OPTC_SEG1_SRC_SEL, 0xf,
+			OPTC_SEG2_SRC_SEL, 0xf,
+			OPTC_SEG3_SRC_SEL, 0xf,
+			OPTC_NUM_OF_INPUT_SEGMENT, 0);
+
+	REG_UPDATE(OPTC_MEMORY_CONFIG,
+			OPTC_MEM_SEL, 0);
+
 	/* disable otg request until end of the first line
 	 * in the vertical blank region
 	 */
@@ -198,6 +208,13 @@ static void optc32_disable_phantom_otg(struct timing_generator *optc)
 {
 	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
+	REG_UPDATE_5(OPTC_DATA_SOURCE_SELECT,
+			OPTC_SEG0_SRC_SEL, 0xf,
+			OPTC_SEG1_SRC_SEL, 0xf,
+			OPTC_SEG2_SRC_SEL, 0xf,
+			OPTC_SEG3_SRC_SEL, 0xf,
+			OPTC_NUM_OF_INPUT_SEGMENT, 0);
+
 	REG_UPDATE(OTG_CONTROL, OTG_MASTER_EN, 0);
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.h
index 8ce3b178cab0..8ce3b178cab0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c
index a4a39f1638cf..5b1547508850 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c
@@ -138,6 +138,16 @@ static bool optc35_disable_crtc(struct timing_generator *optc)
 {
 	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
+	REG_UPDATE_5(OPTC_DATA_SOURCE_SELECT,
+			OPTC_SEG0_SRC_SEL, 0xf,
+			OPTC_SEG1_SRC_SEL, 0xf,
+			OPTC_SEG2_SRC_SEL, 0xf,
+			OPTC_SEG3_SRC_SEL, 0xf,
+			OPTC_NUM_OF_INPUT_SEGMENT, 0);
+
+	REG_UPDATE(OPTC_MEMORY_CONFIG,
+			OPTC_MEM_SEL, 0);
+
 	/* disable otg request until end of the first line
 	 * in the vertical blank region
 	 */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.h
index 1f422e4c468f..1f422e4c468f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.h
diff --git a/drivers/gpu/drm/amd/display/dc/resource/Makefile b/drivers/gpu/drm/amd/display/dc/resource/Makefile
new file mode 100644
index 000000000000..0a75ed8962a5
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/resource/Makefile
@@ -0,0 +1,199 @@
+
+# Copyright 2022 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# Makefile for the 'resource' sub-component of DAL.
+#
+
+
+###############################################################################
+#  DCE
+###############################################################################
+
+RESOURCE_DCE100 = dce100_resource.o
+
+AMD_DAL_RESOURCE_DCE100 = $(addprefix $(AMDDALPATH)/dc/resource/dce100/,$(RESOURCE_DCE100))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE100)
+
+###############################################################################
+
+RESOURCE_DCE110 = dce110_resource.o
+
+AMD_DAL_RESOURCE_DCE110 = $(addprefix $(AMDDALPATH)/dc/resource/dce110/,$(RESOURCE_DCE110))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE110)
+
+###############################################################################
+
+RESOURCE_DCE112 = dce112_resource.o
+
+AMD_DAL_RESOURCE_DCE112 = $(addprefix $(AMDDALPATH)/dc/resource/dce112/,$(RESOURCE_DCE112))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE112)
+
+###############################################################################
+
+RESOURCE_DCE120 = dce120_resource.o
+
+AMD_DAL_RESOURCE_DCE120 = $(addprefix $(AMDDALPATH)/dc/resource/dce120/,$(RESOURCE_DCE120))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE120)
+
+###############################################################################
+
+RESOURCE_DCE80 = dce80_resource.o
+
+AMD_DAL_RESOURCE_DCE80 = $(addprefix $(AMDDALPATH)/dc/resource/dce80/,$(RESOURCE_DCE80))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE80)
+
+ifdef CONFIG_DRM_AMD_DC_FP
+###############################################################################
+# DCN
+###############################################################################
+
+RESOURCE_DCN10 = dcn10_resource.o
+
+AMD_DAL_RESOURCE_DCN10 = $(addprefix $(AMDDALPATH)/dc/resource/dcn10/,$(RESOURCE_DCN10))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN10)
+
+###############################################################################
+
+RESOURCE_DCN20 = dcn20_resource.o
+
+AMD_DAL_RESOURCE_DCN20 = $(addprefix $(AMDDALPATH)/dc/resource/dcn20/,$(RESOURCE_DCN20))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN20)
+
+###############################################################################
+
+RESOURCE_DCN201 = dcn201_resource.o
+
+AMD_DAL_RESOURCE_DCN201 = $(addprefix $(AMDDALPATH)/dc/resource/dcn201/,$(RESOURCE_DCN201))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN201)
+
+###############################################################################
+
+RESOURCE_DCN21 = dcn21_resource.o
+
+AMD_DAL_RESOURCE_DCN21 = $(addprefix $(AMDDALPATH)/dc/resource/dcn21/,$(RESOURCE_DCN21))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN21)
+
+###############################################################################
+
+###############################################################################
+
+###############################################################################
+
+RESOURCE_DCN30 = dcn30_resource.o
+
+AMD_DAL_RESOURCE_DCN30 = $(addprefix $(AMDDALPATH)/dc/resource/dcn30/,$(RESOURCE_DCN30))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN30)
+
+###############################################################################
+
+RESOURCE_DCN301 = dcn301_resource.o
+
+AMD_DAL_RESOURCE_DCN301 = $(addprefix $(AMDDALPATH)/dc/resource/dcn301/,$(RESOURCE_DCN301))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN301)
+
+###############################################################################
+
+RESOURCE_DCN302 = dcn302_resource.o
+
+AMD_DAL_RESOURCE_DCN302 = $(addprefix $(AMDDALPATH)/dc/resource/dcn302/,$(RESOURCE_DCN302))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN302)
+
+###############################################################################
+
+RESOURCE_DCN303 = dcn303_resource.o
+
+AMD_DAL_RESOURCE_DCN303 = $(addprefix $(AMDDALPATH)/dc/resource/dcn303/,$(RESOURCE_DCN303))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN303)
+
+###############################################################################
+
+RESOURCE_DCN31 = dcn31_resource.o
+
+AMD_DAL_RESOURCE_DCN31 = $(addprefix $(AMDDALPATH)/dc/resource/dcn31/,$(RESOURCE_DCN31))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN31)
+
+###############################################################################
+
+RESOURCE_DCN314 = dcn314_resource.o
+
+AMD_DAL_RESOURCE_DCN314 = $(addprefix $(AMDDALPATH)/dc/resource/dcn314/,$(RESOURCE_DCN314))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN314)
+
+###############################################################################
+
+RESOURCE_DCN315 = dcn315_resource.o
+
+AMD_DAL_RESOURCE_DCN315 = $(addprefix $(AMDDALPATH)/dc/resource/dcn315/,$(RESOURCE_DCN315))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN315)
+
+###############################################################################
+
+RESOURCE_DCN316 = dcn316_resource.o
+
+AMD_DAL_RESOURCE_DCN316 = $(addprefix $(AMDDALPATH)/dc/resource/dcn316/,$(RESOURCE_DCN316))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN316)
+
+###############################################################################
+
+RESOURCE_DCN32 = dcn32_resource.o
+
+AMD_DAL_RESOURCE_DCN32 = $(addprefix $(AMDDALPATH)/dc/resource/dcn32/,$(RESOURCE_DCN32))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN32)
+
+###############################################################################
+
+RESOURCE_DCN321 = dcn321_resource.o
+
+AMD_DAL_RESOURCE_DCN321 = $(addprefix $(AMDDALPATH)/dc/resource/dcn321/,$(RESOURCE_DCN321))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN321)
+
+###############################################################################
+
+RESOURCE_DCN35 = dcn35_resource.o
+
+AMD_DAL_RESOURCE_DCN35 = $(addprefix $(AMDDALPATH)/dc/resource/dcn35/,$(RESOURCE_DCN35))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN35)
+
+###############################################################################
+
+###############################################################################
+
+endif
diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c
index 53a5f4cb648c..53a5f4cb648c 100644
--- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c
diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h
index fecab7c560f5..fecab7c560f5 100644
--- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c
index fe518fd27b08..fe518fd27b08 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.h
index aa4531e0800e..aa4531e0800e 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c
index d1edac46c9a0..d1edac46c9a0 100644
--- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c
diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.h
index 1f57ebc6f9b4..1f57ebc6f9b4 100644
--- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c
index 962de79be169..20662edd0ae4 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c
@@ -36,7 +36,7 @@
 
 #include "dce110/dce110_resource.h"
 #include "virtual/virtual_stream_encoder.h"
-#include "dce120_timing_generator.h"
+#include "dce120/dce120_timing_generator.h"
 #include "irq/dce120/irq_service_dce120.h"
 #include "dce/dce_opp.h"
 #include "dce/dce_clock_source.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.h
index 3d1f3cf012f4..3d1f3cf012f4 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce80/CMakeLists.txt b/drivers/gpu/drm/amd/display/dc/resource/dce80/CMakeLists.txt
new file mode 100644
index 000000000000..19dd73bc9ab0
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce80/CMakeLists.txt
@@ -0,0 +1,4 @@
+dal3_subdirectory_sources(
+  dce80_resource.c
+  dce80_resource.h
+  )
+\ No newline at end of file
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c
index 35a2cce0c2b8..35a2cce0c2b8 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.h
index eff31ab83a39..eff31ab83a39 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c
index b94c5c97eee7..d08d10969251 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c
@@ -26,29 +26,32 @@
 #include "dm_services.h"
 #include "dc.h"
 
-#include "dcn10_init.h"
+#include "dcn10/dcn10_init.h"
 
 #include "resource.h"
 #include "include/irq_service_interface.h"
-#include "dcn10_resource.h"
-#include "dcn10_ipp.h"
-#include "dcn10_mpc.h"
+#include "dcn10/dcn10_resource.h"
+#include "dcn10/dcn10_ipp.h"
+#include "dcn10/dcn10_mpc.h"
+
+#include "dcn10/dcn10_dwb.h"
+
 #include "irq/dcn10/irq_service_dcn10.h"
-#include "dcn10_dpp.h"
-#include "dcn10_optc.h"
+#include "dcn10/dcn10_dpp.h"
+#include "dcn10/dcn10_optc.h"
 #include "dcn10/dcn10_hwseq.h"
 #include "dce110/dce110_hwseq.h"
-#include "dcn10_opp.h"
-#include "dcn10_link_encoder.h"
-#include "dcn10_stream_encoder.h"
+#include "dcn10/dcn10_opp.h"
+#include "dcn10/dcn10_link_encoder.h"
+#include "dcn10/dcn10_stream_encoder.h"
 #include "dce/dce_clock_source.h"
 #include "dce/dce_audio.h"
 #include "dce/dce_hwseq.h"
 #include "virtual/virtual_stream_encoder.h"
 #include "dce110/dce110_resource.h"
 #include "dce112/dce112_resource.h"
-#include "dcn10_hubp.h"
-#include "dcn10_hubbub.h"
+#include "dcn10/dcn10_hubp.h"
+#include "dcn10/dcn10_hubbub.h"
 #include "dce/dce_panel_cntl.h"
 
 #include "soc15_hw_ip.h"
@@ -1247,7 +1250,10 @@ struct stream_encoder *dcn10_find_first_free_match_stream_enc_for_link(
 			/* Store first available for MST second display
 			 * in daisy chain use case
 			 */
-			j = i;
+
+			if (pool->stream_enc[i]->id != ENGINE_ID_VIRTUAL)
+				j = i;
+
 			if (link->ep_type == DISPLAY_ENDPOINT_PHY && pool->stream_enc[i]->id ==
 					link->link_enc->preferred_engine)
 				return pool->stream_enc[i];
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.h
index bf8e33cd8147..bf8e33cd8147 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c
index 0a422fbb14bc..f9c5bc624be3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c
@@ -29,7 +29,7 @@
 #include "dm_services.h"
 #include "dc.h"
 
-#include "dcn20_init.h"
+#include "dcn20/dcn20_init.h"
 
 #include "resource.h"
 #include "include/irq_service_interface.h"
@@ -39,29 +39,29 @@
 
 #include "dcn10/dcn10_hubp.h"
 #include "dcn10/dcn10_ipp.h"
-#include "dcn20_hubbub.h"
-#include "dcn20_mpc.h"
-#include "dcn20_hubp.h"
+#include "dcn20/dcn20_hubbub.h"
+#include "dcn20/dcn20_mpc.h"
+#include "dcn20/dcn20_hubp.h"
 #include "irq/dcn20/irq_service_dcn20.h"
-#include "dcn20_dpp.h"
-#include "dcn20_optc.h"
+#include "dcn20/dcn20_dpp.h"
+#include "dcn20/dcn20_optc.h"
 #include "dcn20/dcn20_hwseq.h"
 #include "dce110/dce110_hwseq.h"
 #include "dcn10/dcn10_resource.h"
-#include "dcn20_opp.h"
+#include "dcn20/dcn20_opp.h"
 
-#include "dcn20_dsc.h"
+#include "dcn20/dcn20_dsc.h"
 
-#include "dcn20_link_encoder.h"
-#include "dcn20_stream_encoder.h"
+#include "dcn20/dcn20_link_encoder.h"
+#include "dcn20/dcn20_stream_encoder.h"
 #include "dce/dce_clock_source.h"
 #include "dce/dce_audio.h"
 #include "dce/dce_hwseq.h"
 #include "virtual/virtual_stream_encoder.h"
 #include "dce110/dce110_resource.h"
 #include "dml/display_mode_vba.h"
-#include "dcn20_dccg.h"
-#include "dcn20_vmid.h"
+#include "dcn20/dcn20_dccg.h"
+#include "dcn20/dcn20_vmid.h"
 #include "dce/dce_panel_cntl.h"
 
 #include "navi10_ip_offset.h"
@@ -1273,15 +1273,19 @@ static void build_clamping_params(struct dc_stream_state *stream)
 	stream->clamping.pixel_encoding = stream->timing.pixel_encoding;
 }
 
-static enum dc_status build_pipe_hw_param(struct pipe_ctx *pipe_ctx)
+void dcn20_build_pipe_pix_clk_params(struct pipe_ctx *pipe_ctx)
 {
-
 	get_pixel_clock_parameters(pipe_ctx, &pipe_ctx->stream_res.pix_clk_params);
-
 	pipe_ctx->clock_source->funcs->get_pix_clk_dividers(
-		pipe_ctx->clock_source,
-		&pipe_ctx->stream_res.pix_clk_params,
-		&pipe_ctx->pll_settings);
+			pipe_ctx->clock_source,
+			&pipe_ctx->stream_res.pix_clk_params,
+			&pipe_ctx->pll_settings);
+}
+
+static enum dc_status build_pipe_hw_param(struct pipe_ctx *pipe_ctx)
+{
+
+	dcn20_build_pipe_pix_clk_params(pipe_ctx);
 
 	pipe_ctx->stream->clamping.pixel_encoding = pipe_ctx->stream->timing.pixel_encoding;
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.h
index 37ecaccc5d12..4cee3fa11a7f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.h
@@ -165,6 +165,7 @@ enum dc_status dcn20_add_stream_to_ctx(struct dc *dc, struct dc_state *new_ctx,
 enum dc_status dcn20_add_dsc_to_stream_resource(struct dc *dc, struct dc_state *dc_ctx, struct dc_stream_state *dc_stream);
 enum dc_status dcn20_remove_stream_from_ctx(struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *dc_stream);
 enum dc_status dcn20_patch_unknown_plane_state(struct dc_plane_state *plane_state);
+void dcn20_build_pipe_pix_clk_params(struct pipe_ctx *pipe_ctx);
 
 #endif /* __DC_RESOURCE_DCN20_H__ */
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c
index bca22d867696..914b234d7f6b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c
@@ -26,7 +26,7 @@
 #include "dm_services.h"
 #include "dc.h"
 
-#include "dcn201_init.h"
+#include "dcn201/dcn201_init.h"
 #include "dml/dcn20/dcn20_fpu.h"
 #include "resource.h"
 #include "include/irq_service_interface.h"
@@ -36,16 +36,16 @@
 
 #include "dcn10/dcn10_hubp.h"
 #include "dcn10/dcn10_ipp.h"
-#include "dcn201_mpc.h"
-#include "dcn201_hubp.h"
+#include "dcn201/dcn201_mpc.h"
+#include "dcn201/dcn201_hubp.h"
 #include "irq/dcn201/irq_service_dcn201.h"
 #include "dcn201/dcn201_dpp.h"
 #include "dcn201/dcn201_hubbub.h"
-#include "dcn201_dccg.h"
-#include "dcn201_optc.h"
+#include "dcn201/dcn201_dccg.h"
+#include "dcn201/dcn201_optc.h"
 #include "dcn201/dcn201_hwseq.h"
 #include "dce110/dce110_hwseq.h"
-#include "dcn201_opp.h"
+#include "dcn201/dcn201_opp.h"
 #include "dcn201/dcn201_link_encoder.h"
 #include "dcn20/dcn20_stream_encoder.h"
 #include "dce/dce_clock_source.h"
@@ -55,7 +55,7 @@
 #include "dce110/dce110_resource.h"
 #include "dce/dce_aux.h"
 #include "dce/dce_i2c.h"
-#include "dcn201_hubbub.h"
+#include "dcn201/dcn201_hubbub.h"
 #include "dcn10/dcn10_resource.h"
 
 #include "cyan_skillfish_ip_offset.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.h
index e0467d17d4ae..e0467d17d4ae 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c
index 42277b280586..65d337731f56 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c
@@ -29,7 +29,7 @@
 #include "dm_services.h"
 #include "dc.h"
 
-#include "dcn21_init.h"
+#include "dcn21/dcn21_init.h"
 
 #include "resource.h"
 #include "include/irq_service_interface.h"
@@ -44,7 +44,7 @@
 #include "dcn20/dcn20_hubbub.h"
 #include "dcn20/dcn20_mpc.h"
 #include "dcn20/dcn20_hubp.h"
-#include "dcn21_hubp.h"
+#include "dcn21/dcn21_hubp.h"
 #include "irq/dcn21/irq_service_dcn21.h"
 #include "dcn20/dcn20_dpp.h"
 #include "dcn20/dcn20_optc.h"
@@ -61,7 +61,7 @@
 #include "dml/display_mode_vba.h"
 #include "dcn20/dcn20_dccg.h"
 #include "dcn21/dcn21_dccg.h"
-#include "dcn21_hubbub.h"
+#include "dcn21/dcn21_hubbub.h"
 #include "dcn10/dcn10_resource.h"
 #include "dce/dce_panel_cntl.h"
 
@@ -713,9 +713,8 @@ static void dcn21_resource_destruct(struct dcn21_resource_pool *pool)
 			pool->base.hubps[i] = NULL;
 		}
 
-		if (pool->base.irqs != NULL) {
+		if (pool->base.irqs != NULL)
 			dal_irq_service_destroy(&pool->base.irqs);
-		}
 	}
 
 	for (i = 0; i < pool->base.res_cap->num_ddc; i++) {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.h
index f7ecc002c2f7..f7ecc002c2f7 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c
index 7b259cb5f418..37a64186f324 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c
@@ -27,7 +27,7 @@
 #include "dm_services.h"
 #include "dc.h"
 
-#include "dcn30_init.h"
+#include "dcn30/dcn30_init.h"
 
 #include "resource.h"
 #include "include/irq_service_interface.h"
@@ -1682,6 +1682,7 @@ noinline bool dcn30_internal_validate_bw(
 		 * We don't actually support prefetch mode 2, so require that we
 		 * at least support prefetch mode 1.
 		 */
+		context->bw_ctx.dml.validate_max_state = fast_validate;
 		context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank =
 			dm_allow_self_refresh;
 
@@ -1691,6 +1692,7 @@ noinline bool dcn30_internal_validate_bw(
 			memset(merge, 0, sizeof(merge));
 			vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge);
 		}
+		context->bw_ctx.dml.validate_max_state = false;
 	}
 
 	dml_log_mode_support_params(&context->bw_ctx.dml);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.h
index 8e6b8b7368fd..8e6b8b7368fd 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c
index f3b75f283aa2..7538b548c572 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c
@@ -27,7 +27,7 @@
 #include "dm_services.h"
 #include "dc.h"
 
-#include "dcn301_init.h"
+#include "dcn301/dcn301_init.h"
 
 #include "resource.h"
 #include "include/irq_service_interface.h"
@@ -61,7 +61,7 @@
 #include "dcn10/dcn10_resource.h"
 #include "dcn30/dcn30_dio_stream_encoder.h"
 #include "dcn301/dcn301_dio_link_encoder.h"
-#include "dcn301_panel_cntl.h"
+#include "dcn301/dcn301_panel_cntl.h"
 
 #include "vangogh_ip_offset.h"
 
@@ -999,7 +999,7 @@ static struct stream_encoder *dcn301_stream_encoder_create(enum engine_id eng_id
 	vpg = dcn301_vpg_create(ctx, vpg_inst);
 	afmt = dcn301_afmt_create(ctx, afmt_inst);
 
-	if (!enc1 || !vpg || !afmt) {
+	if (!enc1 || !vpg || !afmt || eng_id >= ARRAY_SIZE(stream_enc_regs)) {
 		kfree(enc1);
 		kfree(vpg);
 		kfree(afmt);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.h
index ae8672680cdd..ae8672680cdd 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c
index 63ac984a04f7..5791b5cc2875 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c
@@ -23,9 +23,9 @@
  *
  */
 
-#include "dcn302_init.h"
+#include "dcn302/dcn302_init.h"
 #include "dcn302_resource.h"
-#include "dcn302_dccg.h"
+#include "dcn302/dcn302_dccg.h"
 #include "irq/dcn302/irq_service_dcn302.h"
 
 #include "dcn30/dcn30_dio_link_encoder.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.h
index 9f24e73b92b3..9f24e73b92b3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c
index 49cb7fde416a..25cd6236b054 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c
@@ -23,9 +23,9 @@
  * Authors: AMD
  */
 
-#include "dcn303_init.h"
+#include "dcn303/dcn303_init.h"
 #include "dcn303_resource.h"
-#include "dcn303_dccg.h"
+#include "dcn303/dcn303_dccg.h"
 #include "irq/dcn303/irq_service_dcn303.h"
 
 #include "dcn30/dcn30_dio_link_encoder.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.h
index 37cf1525820b..37cf1525820b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c
index 79416cfb22f0..31035fc3d868 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c
@@ -70,7 +70,7 @@
 #include "dml/dcn31/dcn31_fpu.h"
 #include "dcn31/dcn31_dccg.h"
 #include "dcn10/dcn10_resource.h"
-#include "dcn31_panel_cntl.h"
+#include "dcn31/dcn31_panel_cntl.h"
 
 #include "dcn30/dcn30_dwb.h"
 #include "dcn30/dcn30_mmhubbub.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h
index 901436591ed4..901436591ed4 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c
index 677361d74a4e..c97391edb5ff 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c
@@ -871,7 +871,7 @@ static const struct dc_plane_cap plane_cap = {
 static const struct dc_debug_options debug_defaults_drv = {
 	.disable_z10 = false,
 	.enable_z9_disable_interface = true,
-	.minimum_z8_residency_time = 2000,
+	.minimum_z8_residency_time = 2100,
 	.psr_skip_crtc_disable = true,
 	.replay_skip_crtc_disabled = true,
 	.disable_dmcu = true,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.h
index 49ffe71018df..49ffe71018df 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c
index cb8024eee8e4..515ba435f759 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c
@@ -1631,8 +1631,10 @@ static bool allow_pixel_rate_crb(struct dc *dc, struct dc_state *context)
 	int i;
 	struct resource_context *res_ctx = &context->res_ctx;
 
-	/*Don't apply for single stream*/
-	if (context->stream_count < 2)
+	/* Only apply for dual stream scenarios with edp*/
+	if (context->stream_count != 2)
+		return false;
+	if (context->streams[0]->signal != SIGNAL_TYPE_EDP && context->streams[1]->signal != SIGNAL_TYPE_EDP)
 		return false;
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.h
index 22849eaa6f24..22849eaa6f24 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c
index b9753d4606f8..b9753d4606f8 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.h
index aba6d634131b..aba6d634131b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
index 89b072447dba..6f10052caeef 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
@@ -27,7 +27,7 @@
 #include "dm_services.h"
 #include "dc.h"
 
-#include "dcn32_init.h"
+#include "dcn32/dcn32_init.h"
 
 #include "resource.h"
 #include "include/irq_service_interface.h"
@@ -41,7 +41,7 @@
 #include "dcn31/dcn31_hubbub.h"
 #include "dcn32/dcn32_hubbub.h"
 #include "dcn32/dcn32_mpc.h"
-#include "dcn32_hubp.h"
+#include "dcn32/dcn32_hubp.h"
 #include "irq/dcn32/irq_service_dcn32.h"
 #include "dcn32/dcn32_dpp.h"
 #include "dcn32/dcn32_optc.h"
@@ -89,6 +89,8 @@
 #include "dcn20/dcn20_vmid.h"
 #include "dml/dcn32/dcn32_fpu.h"
 
+#include "dc_state_priv.h"
+
 #include "dml2/dml2_wrapper.h"
 
 #define DC_LOGGER_INIT(logger)
@@ -1644,7 +1646,7 @@ static void dcn32_enable_phantom_plane(struct dc *dc,
 		if (curr_pipe->top_pipe && curr_pipe->top_pipe->plane_state == curr_pipe->plane_state)
 			phantom_plane = prev_phantom_plane;
 		else
-			phantom_plane = dc_create_plane_state(dc);
+			phantom_plane = dc_state_create_phantom_plane(dc, context, curr_pipe->plane_state);
 
 		memcpy(&phantom_plane->address, &curr_pipe->plane_state->address, sizeof(phantom_plane->address));
 		memcpy(&phantom_plane->scaling_quality, &curr_pipe->plane_state->scaling_quality,
@@ -1665,9 +1667,7 @@ static void dcn32_enable_phantom_plane(struct dc *dc,
 		phantom_plane->clip_rect.y = 0;
 		phantom_plane->clip_rect.height = phantom_stream->src.height;
 
-		phantom_plane->is_phantom = true;
-
-		dc_add_plane_to_context(dc, phantom_stream, phantom_plane, context);
+		dc_state_add_phantom_plane(dc, phantom_stream, phantom_plane, context);
 
 		curr_pipe = curr_pipe->bottom_pipe;
 		prev_phantom_plane = phantom_plane;
@@ -1683,13 +1683,7 @@ static struct dc_stream_state *dcn32_enable_phantom_stream(struct dc *dc,
 	struct dc_stream_state *phantom_stream = NULL;
 	struct pipe_ctx *ref_pipe = &context->res_ctx.pipe_ctx[dc_pipe_idx];
 
-	phantom_stream = dc_create_stream_for_sink(ref_pipe->stream->sink);
-	phantom_stream->signal = SIGNAL_TYPE_VIRTUAL;
-	phantom_stream->dpms_off = true;
-	phantom_stream->mall_stream_config.type = SUBVP_PHANTOM;
-	phantom_stream->mall_stream_config.paired_stream = ref_pipe->stream;
-	ref_pipe->stream->mall_stream_config.type = SUBVP_MAIN;
-	ref_pipe->stream->mall_stream_config.paired_stream = phantom_stream;
+	phantom_stream = dc_state_create_phantom_stream(dc, context, ref_pipe->stream);
 
 	/* stream has limited viewport and small timing */
 	memcpy(&phantom_stream->timing, &ref_pipe->stream->timing, sizeof(phantom_stream->timing));
@@ -1699,81 +1693,10 @@ static struct dc_stream_state *dcn32_enable_phantom_stream(struct dc *dc,
 	dcn32_set_phantom_stream_timing(dc, context, ref_pipe, phantom_stream, pipes, pipe_cnt, dc_pipe_idx);
 	DC_FP_END();
 
-	dc_add_stream_to_ctx(dc, context, phantom_stream);
+	dc_state_add_phantom_stream(dc, context, phantom_stream, ref_pipe->stream);
 	return phantom_stream;
 }
 
-void dcn32_retain_phantom_pipes(struct dc *dc, struct dc_state *context)
-{
-	int i;
-	struct dc_plane_state *phantom_plane = NULL;
-	struct dc_stream_state *phantom_stream = NULL;
-
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
-		if (resource_is_pipe_type(pipe, OTG_MASTER) &&
-				resource_is_pipe_type(pipe, DPP_PIPE) &&
-				pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
-			phantom_plane = pipe->plane_state;
-			phantom_stream = pipe->stream;
-
-			dc_plane_state_retain(phantom_plane);
-			dc_stream_retain(phantom_stream);
-		}
-	}
-}
-
-// return true if removed piped from ctx, false otherwise
-bool dcn32_remove_phantom_pipes(struct dc *dc, struct dc_state *context, bool fast_update)
-{
-	int i;
-	bool removed_pipe = false;
-	struct dc_plane_state *phantom_plane = NULL;
-	struct dc_stream_state *phantom_stream = NULL;
-
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-		// build scaling params for phantom pipes
-		if (pipe->plane_state && pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
-			phantom_plane = pipe->plane_state;
-			phantom_stream = pipe->stream;
-
-			dc_rem_all_planes_for_stream(dc, pipe->stream, context);
-			dc_remove_stream_from_ctx(dc, context, pipe->stream);
-
-			/* Ref count is incremented on allocation and also when added to the context.
-			 * Therefore we must call release for the the phantom plane and stream once
-			 * they are removed from the ctx to finally decrement the refcount to 0 to free.
-			 */
-			dc_plane_state_release(phantom_plane);
-			dc_stream_release(phantom_stream);
-
-			removed_pipe = true;
-		}
-
-		/* For non-full updates, a shallow copy of the current state
-		 * is created. In this case we don't want to erase the current
-		 * state (there can be 2 HIRQL threads, one in flip, and one in
-		 * checkMPO) that can cause a race condition.
-		 *
-		 * This is just a workaround, needs a proper fix.
-		 */
-		if (!fast_update) {
-			// Clear all phantom stream info
-			if (pipe->stream) {
-				pipe->stream->mall_stream_config.type = SUBVP_NONE;
-				pipe->stream->mall_stream_config.paired_stream = NULL;
-			}
-
-			if (pipe->plane_state) {
-				pipe->plane_state->is_phantom = false;
-			}
-		}
-	}
-	return removed_pipe;
-}
-
 /* TODO: Input to this function should indicate which pipe indexes (or streams)
  * require a phantom pipe / stream
  */
@@ -1798,7 +1721,7 @@ void dcn32_add_phantom_pipes(struct dc *dc, struct dc_state *context,
 		// We determine which phantom pipes were added by comparing with
 		// the phantom stream.
 		if (pipe->plane_state && pipe->stream && pipe->stream == phantom_stream &&
-				pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+				dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) {
 			pipe->stream->use_dynamic_meta = false;
 			pipe->plane_state->flip_immediate = false;
 			if (!resource_build_scaling_params(pipe)) {
@@ -1817,7 +1740,6 @@ static bool dml1_validate(struct dc *dc, struct dc_state *context, bool fast_val
 	int vlevel = 0;
 	int pipe_cnt = 0;
 	display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_KERNEL);
-	struct mall_temp_config mall_temp_config;
 
 	/* To handle Freesync properly, setting FreeSync DML parameters
 	 * to its default state for the first stage of validation
@@ -1827,29 +1749,12 @@ static bool dml1_validate(struct dc *dc, struct dc_state *context, bool fast_val
 
 	DC_LOGGER_INIT(dc->ctx->logger);
 
-	/* For fast validation, there are situations where a shallow copy of
-	 * of the dc->current_state is created for the validation. In this case
-	 * we want to save and restore the mall config because we always
-	 * teardown subvp at the beginning of validation (and don't attempt
-	 * to add it back if it's fast validation). If we don't restore the
-	 * subvp config in cases of fast validation + shallow copy of the
-	 * dc->current_state, the dc->current_state will have a partially
-	 * removed subvp state when we did not intend to remove it.
-	 */
-	if (fast_validate) {
-		memset(&mall_temp_config, 0, sizeof(mall_temp_config));
-		dcn32_save_mall_state(dc, context, &mall_temp_config);
-	}
-
 	BW_VAL_TRACE_COUNT();
 
 	DC_FP_START();
 	out = dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate);
 	DC_FP_END();
 
-	if (fast_validate)
-		dcn32_restore_mall_state(dc, context, &mall_temp_config);
-
 	if (pipe_cnt == 0)
 		goto validate_out;
 
@@ -1924,7 +1829,21 @@ int dcn32_populate_dml_pipes_from_context(
 		dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt);
 		DC_FP_END();
 		pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
-		pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal;
+		if (dc->config.enable_windowed_mpo_odm &&
+				dc->debug.enable_single_display_2to1_odm_policy) {
+			switch (resource_get_odm_slice_count(pipe)) {
+			case 2:
+				pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1;
+				break;
+			case 4:
+				pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_4to1;
+				break;
+			default:
+				pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal;
+			}
+		} else {
+			pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal;
+		}
 		pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // according to spreadsheet
 		pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
 		pipes[pipe_cnt].pipe.scale_ratio_depth.lb_depth = dm_lb_19;
@@ -1933,7 +1852,7 @@ int dcn32_populate_dml_pipes_from_context(
 		 * This is just a workaround -- needs a proper fix.
 		 */
 		if (!fast_validate) {
-			switch (pipe->stream->mall_stream_config.type) {
+			switch (dc_state_get_pipe_subvp_type(context, pipe)) {
 			case SUBVP_MAIN:
 				pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_sub_viewport;
 				subvp_in_use = true;
@@ -2037,10 +1956,7 @@ static struct resource_funcs dcn32_res_pool_funcs = {
 	.patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
 	.update_soc_for_wm_a = dcn30_update_soc_for_wm_a,
 	.add_phantom_pipes = dcn32_add_phantom_pipes,
-	.remove_phantom_pipes = dcn32_remove_phantom_pipes,
-	.retain_phantom_pipes = dcn32_retain_phantom_pipes,
-	.save_mall_state = dcn32_save_mall_state,
-	.restore_mall_state = dcn32_restore_mall_state,
+	.build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params,
 };
 
 static uint32_t read_pipe_fuses(struct dc_context *ctx)
@@ -2453,16 +2369,19 @@ static bool dcn32_resource_construct(
 	dc->dml2_options.callbacks.get_opp_head = &resource_get_opp_head;
 
 	dc->dml2_options.svp_pstate.callbacks.dc = dc;
-	dc->dml2_options.svp_pstate.callbacks.add_plane_to_context = &dc_add_plane_to_context;
-	dc->dml2_options.svp_pstate.callbacks.add_stream_to_ctx = &dc_add_stream_to_ctx;
+	dc->dml2_options.svp_pstate.callbacks.add_phantom_plane = &dc_state_add_phantom_plane;
+	dc->dml2_options.svp_pstate.callbacks.add_phantom_stream = &dc_state_add_phantom_stream;
 	dc->dml2_options.svp_pstate.callbacks.build_scaling_params = &resource_build_scaling_params;
-	dc->dml2_options.svp_pstate.callbacks.create_plane = &dc_create_plane_state;
-	dc->dml2_options.svp_pstate.callbacks.remove_plane_from_context = &dc_remove_plane_from_context;
-	dc->dml2_options.svp_pstate.callbacks.remove_stream_from_ctx = &dc_remove_stream_from_ctx;
-	dc->dml2_options.svp_pstate.callbacks.create_stream_for_sink = &dc_create_stream_for_sink;
-	dc->dml2_options.svp_pstate.callbacks.plane_state_release = &dc_plane_state_release;
-	dc->dml2_options.svp_pstate.callbacks.stream_release = &dc_stream_release;
+	dc->dml2_options.svp_pstate.callbacks.create_phantom_plane = &dc_state_create_phantom_plane;
+	dc->dml2_options.svp_pstate.callbacks.remove_phantom_plane = &dc_state_remove_phantom_plane;
+	dc->dml2_options.svp_pstate.callbacks.remove_phantom_stream = &dc_state_remove_phantom_stream;
+	dc->dml2_options.svp_pstate.callbacks.create_phantom_stream = &dc_state_create_phantom_stream;
+	dc->dml2_options.svp_pstate.callbacks.release_phantom_plane = &dc_state_release_phantom_plane;
+	dc->dml2_options.svp_pstate.callbacks.release_phantom_stream = &dc_state_release_phantom_stream;
 	dc->dml2_options.svp_pstate.callbacks.release_dsc = &dcn20_release_dsc;
+	dc->dml2_options.svp_pstate.callbacks.get_pipe_subvp_type = &dc_state_get_pipe_subvp_type;
+	dc->dml2_options.svp_pstate.callbacks.get_stream_subvp_type = &dc_state_get_stream_subvp_type;
+	dc->dml2_options.svp_pstate.callbacks.get_paired_subvp_stream = &dc_state_get_paired_subvp_stream;
 
 	dc->dml2_options.svp_pstate.subvp_fw_processing_delay_us = dc->caps.subvp_fw_processing_delay_us;
 	dc->dml2_options.svp_pstate.subvp_prefetch_end_to_mall_start_us = dc->caps.subvp_prefetch_end_to_mall_start_us;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h
index b931008114c9..0c87b0fabba7 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h
@@ -39,6 +39,7 @@
 #define DCN3_2_MBLK_HEIGHT_8BPE 64
 #define DCN3_2_DCFCLK_DS_INIT_KHZ 10000 // Choose 10Mhz for init DCFCLK DS freq
 #define SUBVP_HIGH_REFRESH_LIST_LEN 4
+#define SUBVP_ACTIVE_MARGIN_LIST_LEN 2
 #define DCN3_2_MAX_SUBVP_PIXEL_RATE_MHZ 1800
 #define DCN3_2_VMIN_DISPCLK_HZ 717000000
 
@@ -57,6 +58,15 @@ struct subvp_high_refresh_list {
 	} res[SUBVP_HIGH_REFRESH_LIST_LEN];
 };
 
+struct subvp_active_margin_list {
+	int min_refresh;
+	int max_refresh;
+	struct {
+		int width;
+		int height;
+	} res[SUBVP_ACTIVE_MARGIN_LIST_LEN];
+};
+
 struct dcn32_resource_pool {
 	struct resource_pool base;
 };
@@ -81,12 +91,6 @@ bool dcn32_release_post_bldn_3dlut(
 		struct dc_3dlut **lut,
 		struct dc_transfer_func **shaper);
 
-bool dcn32_remove_phantom_pipes(struct dc *dc,
-		struct dc_state *context, bool fast_update);
-
-void dcn32_retain_phantom_pipes(struct dc *dc,
-		struct dc_state *context);
-
 void dcn32_add_phantom_pipes(struct dc *dc,
 		struct dc_state *context,
 		display_e2e_pipe_params_st *pipes,
@@ -159,15 +163,7 @@ void dcn32_determine_det_override(struct dc *dc,
 void dcn32_set_det_allocations(struct dc *dc, struct dc_state *context,
 	display_e2e_pipe_params_st *pipes);
 
-void dcn32_save_mall_state(struct dc *dc,
-		struct dc_state *context,
-		struct mall_temp_config *temp_config);
-
-void dcn32_restore_mall_state(struct dc *dc,
-		struct dc_state *context,
-		struct mall_temp_config *temp_config);
-
-struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, const struct dc_state *context);
+struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context);
 
 bool dcn32_allow_subvp_with_active_margin(struct pipe_ctx *pipe);
 
@@ -183,6 +179,8 @@ bool dcn32_subvp_drr_admissable(struct dc *dc, struct dc_state *context);
 
 bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int vlevel);
 
+void dcn32_update_dml_pipes_odm_policy_based_on_context(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes);
+
 /* definitions for run time init of reg offsets */
 
 /* CLK SRC */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
index f7de3eca1225..74412e5f03fe 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
@@ -63,7 +63,7 @@
 #include "dcn31/dcn31_apg.h"
 #include "dcn31/dcn31_dio_link_encoder.h"
 #include "dcn32/dcn32_dio_link_encoder.h"
-#include "dcn321_dio_link_encoder.h"
+#include "dcn321/dcn321_dio_link_encoder.h"
 #include "dce/dce_clock_source.h"
 #include "dce/dce_audio.h"
 #include "dce/dce_hwseq.h"
@@ -92,6 +92,8 @@
 #include "vm_helper.h"
 #include "dcn20/dcn20_vmid.h"
 
+#include "dc_state_priv.h"
+
 #define DC_LOGGER_INIT(logger)
 
 enum dcn321_clk_src_array_id {
@@ -1605,10 +1607,7 @@ static struct resource_funcs dcn321_res_pool_funcs = {
 	.patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
 	.update_soc_for_wm_a = dcn30_update_soc_for_wm_a,
 	.add_phantom_pipes = dcn32_add_phantom_pipes,
-	.remove_phantom_pipes = dcn32_remove_phantom_pipes,
-	.retain_phantom_pipes = dcn32_retain_phantom_pipes,
-	.save_mall_state = dcn32_save_mall_state,
-	.restore_mall_state = dcn32_restore_mall_state,
+	.build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params,
 };
 
 static uint32_t read_pipe_fuses(struct dc_context *ctx)
@@ -2007,16 +2006,19 @@ static bool dcn321_resource_construct(
 	dc->dml2_options.callbacks.get_opp_head = &resource_get_opp_head;
 
 	dc->dml2_options.svp_pstate.callbacks.dc = dc;
-	dc->dml2_options.svp_pstate.callbacks.add_plane_to_context = &dc_add_plane_to_context;
-	dc->dml2_options.svp_pstate.callbacks.add_stream_to_ctx = &dc_add_stream_to_ctx;
+	dc->dml2_options.svp_pstate.callbacks.add_phantom_plane = &dc_state_add_phantom_plane;
+	dc->dml2_options.svp_pstate.callbacks.add_phantom_stream = &dc_state_add_phantom_stream;
 	dc->dml2_options.svp_pstate.callbacks.build_scaling_params = &resource_build_scaling_params;
-	dc->dml2_options.svp_pstate.callbacks.create_plane = &dc_create_plane_state;
-	dc->dml2_options.svp_pstate.callbacks.remove_plane_from_context = &dc_remove_plane_from_context;
-	dc->dml2_options.svp_pstate.callbacks.remove_stream_from_ctx = &dc_remove_stream_from_ctx;
-	dc->dml2_options.svp_pstate.callbacks.create_stream_for_sink = &dc_create_stream_for_sink;
-	dc->dml2_options.svp_pstate.callbacks.plane_state_release = &dc_plane_state_release;
-	dc->dml2_options.svp_pstate.callbacks.stream_release = &dc_stream_release;
+	dc->dml2_options.svp_pstate.callbacks.create_phantom_plane = &dc_state_create_phantom_plane;
+	dc->dml2_options.svp_pstate.callbacks.remove_phantom_plane = &dc_state_remove_phantom_plane;
+	dc->dml2_options.svp_pstate.callbacks.remove_phantom_stream = &dc_state_remove_phantom_stream;
+	dc->dml2_options.svp_pstate.callbacks.create_phantom_stream = &dc_state_create_phantom_stream;
+	dc->dml2_options.svp_pstate.callbacks.release_phantom_plane = &dc_state_release_phantom_plane;
+	dc->dml2_options.svp_pstate.callbacks.release_phantom_stream = &dc_state_release_phantom_stream;
 	dc->dml2_options.svp_pstate.callbacks.release_dsc = &dcn20_release_dsc;
+	dc->dml2_options.svp_pstate.callbacks.get_pipe_subvp_type = &dc_state_get_pipe_subvp_type;
+	dc->dml2_options.svp_pstate.callbacks.get_stream_subvp_type = &dc_state_get_stream_subvp_type;
+	dc->dml2_options.svp_pstate.callbacks.get_paired_subvp_stream = &dc_state_get_paired_subvp_stream;
 
 	dc->dml2_options.svp_pstate.subvp_fw_processing_delay_us = dc->caps.subvp_fw_processing_delay_us;
 	dc->dml2_options.svp_pstate.subvp_prefetch_end_to_mall_start_us = dc->caps.subvp_prefetch_end_to_mall_start_us;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.h
index 82cbf009f2d3..82cbf009f2d3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
index c7e011d26d41..5fdcda8f8602 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
@@ -78,7 +78,7 @@
 #include "dcn10/dcn10_resource.h"
 #include "dcn31/dcn31_panel_cntl.h"
 #include "dcn35/dcn35_hwseq.h"
-#include "dcn35_dio_link_encoder.h"
+#include "dcn35/dcn35_dio_link_encoder.h"
 #include "dml/dcn31/dcn31_fpu.h" /*todo*/
 #include "dml/dcn35/dcn35_fpu.h"
 #include "dcn35/dcn35_dwb.h"
@@ -96,12 +96,15 @@
 #include "reg_helper.h"
 #include "dce/dmub_abm.h"
 #include "dce/dmub_psr.h"
+#include "dce/dmub_replay.h"
 #include "dce/dce_aux.h"
 #include "dce/dce_i2c.h"
 #include "dml/dcn31/display_mode_vba_31.h" /*temp*/
 #include "vm_helper.h"
 #include "dcn20/dcn20_vmid.h"
 
+#include "dc_state_priv.h"
+
 #include "link_enc_cfg.h"
 #define DC_LOGGER_INIT(logger)
 
@@ -626,7 +629,19 @@ static struct dce_hwseq_registers hwseq_reg;
 	HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYBSYMCLK_ROOT_GATE_DISABLE, mask_sh), \
 	HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYCSYMCLK_ROOT_GATE_DISABLE, mask_sh), \
 	HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYDSYMCLK_ROOT_GATE_DISABLE, mask_sh), \
-	HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYESYMCLK_ROOT_GATE_DISABLE, mask_sh)
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYESYMCLK_ROOT_GATE_DISABLE, mask_sh),\
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, mask_sh),\
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, mask_sh),\
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, mask_sh),\
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, mask_sh),\
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK0_GATE_DISABLE, mask_sh),\
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK1_GATE_DISABLE, mask_sh),\
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK2_GATE_DISABLE, mask_sh),\
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_GATE_DISABLE, mask_sh),\
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL4, DPIASYMCLK0_GATE_DISABLE, mask_sh),\
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL4, DPIASYMCLK1_GATE_DISABLE, mask_sh),\
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL4, DPIASYMCLK2_GATE_DISABLE, mask_sh),\
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL4, DPIASYMCLK3_GATE_DISABLE, mask_sh)
 
 static const struct dce_hwseq_shift hwseq_shift = {
 		HWSEQ_DCN35_MASK_SH_LIST(__SHIFT)
@@ -705,7 +720,9 @@ static const struct dc_debug_options debug_defaults_drv = {
 	.disable_dcc = DCC_ENABLE,
 	.disable_dpp_power_gate = true,
 	.disable_hubp_power_gate = true,
-	.disable_clock_gate = true,
+	.disable_optc_power_gate = true, /*should the same as above two*/
+	.disable_hpo_power_gate = true, /*dmubfw force domain25 on*/
+	.disable_clock_gate = false,
 	.disable_dsc_power_gate = true,
 	.vsr_support = true,
 	.performance_trace = false,
@@ -724,7 +741,7 @@ static const struct dc_debug_options debug_defaults_drv = {
 			.i2c = true,
 			.dmcu = false, // This is previously known to cause hang on S3 cycles if enabled
 			.dscl = true,
-			.cm = false,
+			.cm = true,
 			.mpc = true,
 			.optc = true,
 			.vpg = true,
@@ -752,7 +769,7 @@ static const struct dc_debug_options debug_defaults_drv = {
 	.enable_hpo_pg_support = false,
 	.enable_legacy_fast_update = true,
 	.enable_single_display_2to1_odm_policy = false,
-	.disable_idle_power_optimizations = true,
+	.disable_idle_power_optimizations = false,
 	.dmcub_emulation = false,
 	.disable_boot_optimizations = false,
 	.disable_unbounded_requesting = false,
@@ -763,14 +780,16 @@ static const struct dc_debug_options debug_defaults_drv = {
 	.disable_z10 = false,
 	.ignore_pg = true,
 	.psp_disabled_wa = true,
-	.ips2_eval_delay_us = 200,
-	.ips2_entry_delay_us = 400
+	.ips2_eval_delay_us = 2000,
+	.ips2_entry_delay_us = 800,
+	.static_screen_wait_frames = 2,
 };
 
 static const struct dc_panel_config panel_config_defaults = {
 	.psr = {
 		.disable_psr = false,
 		.disallow_psrsu = false,
+		.disallow_replay = false,
 	},
 	.ilr = {
 		.optimize_edp_link_rate = true,
@@ -1529,6 +1548,9 @@ static void dcn35_resource_destruct(struct dcn35_resource_pool *pool)
 	if (pool->base.psr != NULL)
 		dmub_psr_destroy(&pool->base.psr);
 
+	if (pool->base.replay != NULL)
+		dmub_replay_destroy(&pool->base.replay);
+
 	if (pool->base.pg_cntl != NULL)
 		dcn_pg_cntl_destroy(&pool->base.pg_cntl);
 
@@ -1712,6 +1734,13 @@ static bool dcn35_validate_bandwidth(struct dc *dc,
 
 	out = dml2_validate(dc, context, fast_validate);
 
+	if (fast_validate)
+		return out;
+
+	DC_FP_START();
+	dcn35_decide_zstate_support(dc, context);
+	DC_FP_END();
+
 	return out;
 }
 
@@ -1857,7 +1886,7 @@ static bool dcn35_resource_construct(
 
 	/* Use pipe context based otg sync logic */
 	dc->config.use_pipe_ctx_sync_logic = true;
-	dc->config.use_default_clock_table = false;
+
 	/* read VBIOS LTTPR caps */
 	{
 		if (ctx->dc_bios->funcs->get_lttpr_caps) {
@@ -2006,6 +2035,14 @@ static bool dcn35_resource_construct(
 		goto create_fail;
 	}
 
+	/* Replay */
+	pool->base.replay = dmub_replay_create(ctx);
+	if (pool->base.replay == NULL) {
+		dm_error("DC: failed to create replay obj!\n");
+		BREAK_TO_DEBUGGER();
+		goto create_fail;
+	}
+
 	/* ABM */
 	for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) {
 		pool->base.multiple_abms[i] = dmub_abm_create(ctx,
@@ -2093,6 +2130,7 @@ static bool dcn35_resource_construct(
 	dc->dml2_options.dcn_pipe_count = pool->base.pipe_count;
 	dc->dml2_options.use_native_pstate_optimization = true;
 	dc->dml2_options.use_native_soc_bb_construction = true;
+	dc->dml2_options.minimize_dispclk_using_odm = false;
 	if (dc->config.EnableMinDispClkODM)
 		dc->dml2_options.minimize_dispclk_using_odm = true;
 	dc->dml2_options.enable_windowed_mpo_odm = dc->config.enable_windowed_mpo_odm;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h
index 99aea102e3f7..a51c4a9eaafe 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h
@@ -166,6 +166,7 @@ struct resource_pool *dcn35_create_resource_pool(
 	SR(MMHUBBUB_MEM_PWR_CNTL), \
 	SR(DCCG_GATE_DISABLE_CNTL), \
 	SR(DCCG_GATE_DISABLE_CNTL2), \
+	SR(DCCG_GATE_DISABLE_CNTL4), \
 	SR(DCCG_GATE_DISABLE_CNTL5), \
 	SR(DCFCLK_CNTL),\
 	SR(DC_MEM_GLOBAL_PWR_REQ_CNTL), \
diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
index df63aa8f01e9..c78c9224ab60 100644
--- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
+++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
@@ -86,6 +86,7 @@ enum dmub_status {
 	DMUB_STATUS_TIMEOUT,
 	DMUB_STATUS_INVALID,
 	DMUB_STATUS_HW_FAILURE,
+	DMUB_STATUS_POWER_STATE_D3
 };
 
 /* enum dmub_asic - dmub asic identifier */
@@ -150,6 +151,13 @@ enum dmub_memory_access_type {
 	DMUB_MEMORY_ACCESS_DMA
 };
 
+/* enum dmub_power_state type - to track DC power state in dmub_srv */
+enum dmub_srv_power_state_type {
+	DMUB_POWER_STATE_UNDEFINED = 0,
+	DMUB_POWER_STATE_D0 = 1,
+	DMUB_POWER_STATE_D3 = 8
+};
+
 /**
  * struct dmub_region - dmub hw memory region
  * @base: base address for region, must be 256 byte aligned
@@ -485,6 +493,8 @@ struct dmub_srv {
 	/* Feature capabilities reported by fw */
 	struct dmub_feature_caps feature_caps;
 	struct dmub_visual_confirm_color visual_confirm_color;
+
+	enum dmub_srv_power_state_type power_state;
 };
 
 /**
@@ -889,6 +899,18 @@ enum dmub_status dmub_srv_clear_inbox0_ack(struct dmub_srv *dmub);
  */
 void dmub_srv_subvp_save_surf_addr(struct dmub_srv *dmub, const struct dc_plane_address *addr, uint8_t subvp_index);
 
+/**
+ * dmub_srv_set_power_state() - Track DC power state in dmub_srv
+ * @dmub: The dmub service
+ * @power_state: DC power state setting
+ *
+ * Store DC power state in dmub_srv.  If dmub_srv is in D3, then don't send messages to DMUB
+ *
+ * Return:
+ *   void
+ */
+void dmub_srv_set_power_state(struct dmub_srv *dmub, enum dmub_srv_power_state_type dmub_srv_power_state);
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
index ed4379c04715..e699731ee68e 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@@ -185,8 +185,7 @@ union abm_flags {
 		unsigned int disable_abm_requested : 1;
 
 		/**
-		 * @disable_abm_immediately: Indicates if driver has requested ABM to be disabled
-		 * immediately.
+		 * @disable_abm_immediately: Indicates if driver has requested ABM to be disabled immediately.
 		 */
 		unsigned int disable_abm_immediately : 1;
 
@@ -654,7 +653,7 @@ union dmub_fw_boot_options {
 		uint32_t gpint_scratch8: 1; /* 1 if GPINT is in scratch8*/
 		uint32_t usb4_cm_version: 1; /**< 1 CM support */
 		uint32_t dpia_hpd_int_enable_supported: 1; /* 1 if dpia hpd int enable supported */
-		uint32_t usb4_dpia_bw_alloc_supported: 1; /* 1 if USB4 dpia BW allocation supported */
+		uint32_t reserved0: 1;
 		uint32_t disable_clk_ds: 1; /* 1 if disallow dispclk_ds and dppclk_ds*/
 		uint32_t disable_timeout_recovery : 1; /* 1 if timeout recovery should be disabled */
 		uint32_t ips_pg_disable: 1; /* 1 to disable ONO domains power gating*/
@@ -818,18 +817,61 @@ enum dmub_gpint_command {
 	 * RETURN: Lower 32-bit mask.
 	 */
 	DMUB_GPINT__UPDATE_TRACE_BUFFER_MASK = 101,
+
 	/**
-	 * DESC: Updates the trace buffer lower 32-bit mask.
+	 * DESC: Updates the trace buffer mask bit0~bit15.
 	 * ARGS: The new mask
 	 * RETURN: Lower 32-bit mask.
 	 */
 	DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD0 = 102,
+
 	/**
-	 * DESC: Updates the trace buffer mask bi0~bit15.
+	 * DESC: Updates the trace buffer mask bit16~bit31.
 	 * ARGS: The new mask
 	 * RETURN: Lower 32-bit mask.
 	 */
 	DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD1 = 103,
+
+	/**
+	 * DESC: Updates the trace buffer mask bit32~bit47.
+	 * ARGS: The new mask
+	 * RETURN: Lower 32-bit mask.
+	 */
+	DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD2 = 114,
+
+	/**
+	 * DESC: Updates the trace buffer mask bit48~bit63.
+	 * ARGS: The new mask
+	 * RETURN: Lower 32-bit mask.
+	 */
+	DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD3 = 115,
+
+	/**
+	 * DESC: Read the trace buffer mask bi0~bit15.
+	 */
+	DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD0 = 116,
+
+	/**
+	 * DESC: Read the trace buffer mask bit16~bit31.
+	 */
+	DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD1 = 117,
+
+	/**
+	 * DESC: Read the trace buffer mask bi32~bit47.
+	 */
+	DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD2 = 118,
+
+	/**
+	 * DESC: Updates the trace buffer mask bit32~bit63.
+	 */
+	DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD3 = 119,
+
+	/**
+	 * DESC: Enable measurements for various task duration
+	 * ARGS: 0 - Disable measurement
+	 *       1 - Enable measurement
+	 */
+	DMUB_GPINT__TRACE_DMUB_WAKE_ACTIVITY = 123,
 };
 
 /**
@@ -1303,6 +1345,10 @@ enum dmub_cmd_cab_type {
 	 * Fit surfaces in CAB (i.e. CAB enable)
 	 */
 	DMUB_CMD__CAB_DCN_SS_FIT_IN_CAB = 2,
+	/**
+	 * Do not fit surfaces in CAB (i.e. no CAB)
+	 */
+	DMUB_CMD__CAB_DCN_SS_NOT_FIT_IN_CAB = 3,
 };
 
 /**
@@ -2786,6 +2832,7 @@ struct dmub_rb_cmd_psr_set_power_opt {
 #define REPLAY_RESIDENCY_MODE_MASK             (0x1 << REPLAY_RESIDENCY_MODE_SHIFT)
 # define REPLAY_RESIDENCY_MODE_PHY             (0x0 << REPLAY_RESIDENCY_MODE_SHIFT)
 # define REPLAY_RESIDENCY_MODE_ALPM            (0x1 << REPLAY_RESIDENCY_MODE_SHIFT)
+# define REPLAY_RESIDENCY_MODE_IPS             0x10
 
 #define REPLAY_RESIDENCY_ENABLE_MASK           (0x1 << REPLAY_RESIDENCY_ENABLE_SHIFT)
 # define REPLAY_RESIDENCY_DISABLE              (0x0 << REPLAY_RESIDENCY_ENABLE_SHIFT)
@@ -2840,6 +2887,18 @@ enum dmub_cmd_replay_type {
 	 * Set power opt and coasting vtotal.
 	 */
 	DMUB_CMD__REPLAY_SET_POWER_OPT_AND_COASTING_VTOTAL	= 4,
+	/**
+	 * Set disabled iiming sync.
+	 */
+	DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED	= 5,
+	/**
+	 * Set Residency Frameupdate Timer.
+	 */
+	DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER = 6,
+	/**
+	 * Set pseudo vtotal
+	 */
+	DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL = 7,
 };
 
 /**
@@ -3003,6 +3062,46 @@ struct dmub_cmd_replay_set_power_opt_data {
 };
 
 /**
+ * Data passed from driver to FW in a DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED command.
+ */
+struct dmub_cmd_replay_set_timing_sync_data {
+	/**
+	 * Panel Instance.
+	 * Panel isntance to identify which replay_state to use
+	 * Currently the support is only for 0 or 1
+	 */
+	uint8_t panel_inst;
+	/**
+	 * REPLAY set_timing_sync
+	 */
+	uint8_t timing_sync_supported;
+	/**
+	 * Explicit padding to 4 byte boundary.
+	 */
+	uint8_t pad[2];
+};
+
+/**
+ * Data passed from driver to FW in a DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL command.
+ */
+struct dmub_cmd_replay_set_pseudo_vtotal {
+	/**
+	 * Panel Instance.
+	 * Panel isntance to identify which replay_state to use
+	 * Currently the support is only for 0 or 1
+	 */
+	uint8_t panel_inst;
+	/**
+	 * Source Vtotal that Replay + IPS + ABM full screen video src vtotal
+	 */
+	uint16_t vtotal;
+	/**
+	 * Explicit padding to 4 byte boundary.
+	 */
+	uint8_t pad;
+};
+
+/**
  * Definition of a DMUB_CMD__SET_REPLAY_POWER_OPT command.
  */
 struct dmub_rb_cmd_replay_set_power_opt {
@@ -3069,6 +3168,91 @@ struct dmub_rb_cmd_replay_set_power_opt_and_coasting_vtotal {
 };
 
 /**
+ * Definition of a DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED command.
+ */
+struct dmub_rb_cmd_replay_set_timing_sync {
+	/**
+	 * Command header.
+	 */
+	struct dmub_cmd_header header;
+	/**
+	 * Definition of DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED command.
+	 */
+	struct dmub_cmd_replay_set_timing_sync_data replay_set_timing_sync_data;
+};
+
+/**
+ * Definition of a DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL command.
+ */
+struct dmub_rb_cmd_replay_set_pseudo_vtotal {
+	/**
+	 * Command header.
+	 */
+	struct dmub_cmd_header header;
+	/**
+	 * Definition of DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL command.
+	 */
+	struct dmub_cmd_replay_set_pseudo_vtotal data;
+};
+
+/**
+ * Data passed from driver to FW in  DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER command.
+ */
+struct dmub_cmd_replay_frameupdate_timer_data {
+	/**
+	 * Panel Instance.
+	 * Panel isntance to identify which replay_state to use
+	 * Currently the support is only for 0 or 1
+	 */
+	uint8_t panel_inst;
+	/**
+	 * Replay Frameupdate Timer Enable or not
+	 */
+	uint8_t enable;
+	/**
+	 * REPLAY force reflash frame update number
+	 */
+	uint16_t frameupdate_count;
+};
+/**
+ * Definition of DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER
+ */
+struct dmub_rb_cmd_replay_set_frameupdate_timer {
+	/**
+	 * Command header.
+	 */
+	struct dmub_cmd_header header;
+	/**
+	 * Definition of a DMUB_CMD__SET_REPLAY_POWER_OPT command.
+	 */
+	struct dmub_cmd_replay_frameupdate_timer_data data;
+};
+
+/**
+ * Definition union of replay command set
+ */
+union dmub_replay_cmd_set {
+	/**
+	 * Panel Instance.
+	 * Panel isntance to identify which replay_state to use
+	 * Currently the support is only for 0 or 1
+	 */
+	uint8_t panel_inst;
+	/**
+	 * Definition of DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED command data.
+	 */
+	struct dmub_cmd_replay_set_timing_sync_data sync_data;
+	/**
+	 * Definition of DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER command data.
+	 */
+	struct dmub_cmd_replay_frameupdate_timer_data timer_data;
+	/**
+	 * Definition of DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL command data.
+	 */
+	struct dmub_cmd_replay_set_pseudo_vtotal pseudo_vtotal_data;
+};
+
+/**
  * Set of HW components that can be locked.
  *
  * Note: If updating with more HW components, fields
@@ -3357,6 +3541,16 @@ struct dmub_cmd_abm_set_pipe_data {
 	 * TODO: Remove.
 	 */
 	uint8_t ramping_boundary;
+
+	/**
+	 * PwrSeq HW Instance.
+	 */
+	uint8_t pwrseq_inst;
+
+	/**
+	 * Explicit padding to 4 byte boundary.
+	 */
+	uint8_t pad[3];
 };
 
 /**
@@ -3737,7 +3931,7 @@ enum dmub_cmd_panel_cntl_type {
  * struct dmub_cmd_panel_cntl_data - Panel control data.
  */
 struct dmub_cmd_panel_cntl_data {
-	uint32_t inst; /**< panel instance */
+	uint32_t pwrseq_inst; /**< pwrseq instance */
 	uint32_t current_backlight; /* in/out */
 	uint32_t bl_pwm_cntl; /* in/out */
 	uint32_t bl_pwm_period_cntl; /* in/out */
@@ -3796,7 +3990,7 @@ struct dmub_cmd_lvtma_control_data {
 	uint8_t uc_pwr_action; /**< LVTMA_ACTION */
 	uint8_t bypass_panel_control_wait;
 	uint8_t reserved_0[2]; /**< For future use */
-	uint8_t panel_inst; /**< LVTMA control instance */
+	uint8_t pwrseq_inst; /**< LVTMA control instance */
 	uint8_t reserved_1[3]; /**< For future use */
 };
 
@@ -4201,6 +4395,16 @@ union dmub_rb_cmd {
 	 * Definition of a DMUB_CMD__REPLAY_SET_POWER_OPT_AND_COASTING_VTOTAL command.
 	 */
 	struct dmub_rb_cmd_replay_set_power_opt_and_coasting_vtotal replay_set_power_opt_and_coasting_vtotal;
+
+	struct dmub_rb_cmd_replay_set_timing_sync replay_set_timing_sync;
+	/**
+	 * Definition of a DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER command.
+	 */
+	struct dmub_rb_cmd_replay_set_frameupdate_timer replay_set_frameupdate_timer;
+	/**
+	 * Definition of a DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL command.
+	 */
+	struct dmub_rb_cmd_replay_set_pseudo_vtotal replay_set_pseudo_vtotal;
 };
 
 /**
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
index 22fc4ba96def..9ad738805320 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
@@ -64,7 +64,7 @@
 
 
 /* Default scratch mem size. */
-#define DMUB_SCRATCH_MEM_SIZE (256)
+#define DMUB_SCRATCH_MEM_SIZE (1024)
 
 /* Number of windows in use. */
 #define DMUB_NUM_WINDOWS (DMUB_WINDOW_TOTAL)
@@ -713,6 +713,7 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub,
 		dmub->hw_funcs.reset_release(dmub);
 
 	dmub->hw_init = true;
+	dmub->power_state = DMUB_POWER_STATE_D0;
 
 	return DMUB_STATUS_OK;
 }
@@ -766,6 +767,9 @@ enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub,
 	if (!dmub->hw_init)
 		return DMUB_STATUS_INVALID;
 
+	if (dmub->power_state != DMUB_POWER_STATE_D0)
+		return DMUB_STATUS_POWER_STATE_D3;
+
 	if (dmub->inbox1_rb.rptr > dmub->inbox1_rb.capacity ||
 	    dmub->inbox1_rb.wrpt > dmub->inbox1_rb.capacity) {
 		return DMUB_STATUS_HW_FAILURE;
@@ -784,6 +788,9 @@ enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub)
 	if (!dmub->hw_init)
 		return DMUB_STATUS_INVALID;
 
+	if (dmub->power_state != DMUB_POWER_STATE_D0)
+		return DMUB_STATUS_POWER_STATE_D3;
+
 	/**
 	 * Read back all the queued commands to ensure that they've
 	 * been flushed to framebuffer memory. Otherwise DMCUB might
@@ -1077,6 +1084,7 @@ enum dmub_status dmub_srv_wait_for_inbox0_ack(struct dmub_srv *dmub, uint32_t ti
 		ack = dmub->hw_funcs.read_inbox0_ack_register(dmub);
 		if (ack)
 			return DMUB_STATUS_OK;
+		udelay(1);
 	}
 	return DMUB_STATUS_TIMEOUT;
 }
@@ -1099,3 +1107,11 @@ void dmub_srv_subvp_save_surf_addr(struct dmub_srv *dmub, const struct dc_plane_
 				subvp_index);
 	}
 }
+
+void dmub_srv_set_power_state(struct dmub_srv *dmub, enum dmub_srv_power_state_type dmub_srv_power_state)
+{
+	if (!dmub || !dmub->hw_init)
+		return;
+
+	dmub->power_state = dmub_srv_power_state;
+}
diff --git a/drivers/gpu/drm/amd/display/include/audio_types.h b/drivers/gpu/drm/amd/display/include/audio_types.h
index 66a54da0641c..915a031a43cb 100644
--- a/drivers/gpu/drm/amd/display/include/audio_types.h
+++ b/drivers/gpu/drm/amd/display/include/audio_types.h
@@ -64,7 +64,7 @@ enum audio_dto_source {
 /* PLL information required for AZALIA DTO calculation */
 
 struct audio_pll_info {
-	uint32_t dp_dto_source_clock_in_khz;
+	uint32_t audio_dto_source_clock_in_khz;
 	uint32_t feed_back_divider;
 	enum audio_dto_source dto_source;
 	bool ss_enabled;
diff --git a/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h b/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h
index bc96d0211360..813463ffe15c 100644
--- a/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h
+++ b/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h
@@ -417,6 +417,8 @@ struct integrated_info {
 	/* V2.1 */
 	struct edp_info edp1_info;
 	struct edp_info edp2_info;
+	uint32_t gpuclk_ss_percentage;
+	uint32_t gpuclk_ss_type;
 };
 
 /*
diff --git a/drivers/gpu/drm/amd/display/include/hdcp_msg_types.h b/drivers/gpu/drm/amd/display/include/hdcp_msg_types.h
index 42229b4effdc..eced9ad91f1d 100644
--- a/drivers/gpu/drm/amd/display/include/hdcp_msg_types.h
+++ b/drivers/gpu/drm/amd/display/include/hdcp_msg_types.h
@@ -69,6 +69,11 @@ enum hdcp_message_id {
 	HDCP_MESSAGE_ID_READ_RXSTATUS,
 	HDCP_MESSAGE_ID_WRITE_CONTENT_STREAM_TYPE,
 
+	/* PS175 chip */
+
+	HDCP_MESSAGE_ID_WRITE_PS175_CMD,
+	HDCP_MESSAGE_ID_READ_PS175_RSP,
+
 	HDCP_MESSAGE_ID_MAX
 };
 
diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
index ccecddafeb05..3955b7e4b2e2 100644
--- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
+++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
@@ -81,6 +81,7 @@ fail_alloc_context:
 void mod_freesync_destroy(struct mod_freesync *mod_freesync)
 {
 	struct core_freesync *core_freesync = NULL;
+
 	if (mod_freesync == NULL)
 		return;
 	core_freesync = MOD_FREESYNC_TO_CORE(mod_freesync);
@@ -278,9 +279,8 @@ static void apply_below_the_range(struct core_freesync *core_freesync,
 		}
 	} else if (last_render_time_in_us > (max_render_time_in_us + in_out_vrr->btr.margin_in_us / 2)) {
 		/* Enter Below the Range */
-		if (!in_out_vrr->btr.btr_active) {
+		if (!in_out_vrr->btr.btr_active)
 			in_out_vrr->btr.btr_active = true;
-		}
 	}
 
 	/* BTR set to "not active" so disengage */
@@ -693,10 +693,12 @@ static void build_vrr_infopacket_fs2_data(enum color_transfer_func app_tf,
 	if (app_tf != TRANSFER_FUNC_UNKNOWN) {
 		infopacket->valid = true;
 
-		if (app_tf != TRANSFER_FUNC_PQ2084) {
+		if (app_tf == TRANSFER_FUNC_PQ2084)
+			infopacket->sb[9] |= 0x20; // PB9 = [Bit 5 = PQ EOTF Active]
+		else {
 			infopacket->sb[6] |= 0x08;  // PB6 = [Bit 3 = Native Color Active]
 			if (app_tf == TRANSFER_FUNC_GAMMA_22)
-				infopacket->sb[9] |= 0x04;  // PB6 = [Bit 2 = Gamma 2.2 EOTF Active]
+				infopacket->sb[9] |= 0x04;  // PB9 = [Bit 2 = Gamma 2.2 EOTF Active]
 		}
 	}
 }
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c
index 1ddb4f5eac8e..182e7532dda8 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c
@@ -63,6 +63,7 @@ static inline enum mod_hdcp_status check_hdcp_capable_dp(struct mod_hdcp *hdcp)
 static inline enum mod_hdcp_status check_r0p_available_dp(struct mod_hdcp *hdcp)
 {
 	enum mod_hdcp_status status;
+
 	if (is_dp_hdcp(hdcp)) {
 		status = (hdcp->auth.msg.hdcp1.bstatus &
 				DP_BSTATUS_R0_PRIME_READY) ?
@@ -131,9 +132,8 @@ static inline uint8_t get_device_count(struct mod_hdcp *hdcp)
 static inline enum mod_hdcp_status check_device_count(struct mod_hdcp *hdcp)
 {
 	/* Avoid device count == 0 to do authentication */
-	if (0 == get_device_count(hdcp)) {
+	if (get_device_count(hdcp) == 0)
 		return MOD_HDCP_STATUS_HDCP1_DEVICE_COUNT_MISMATCH_FAILURE;
-	}
 
 	/* Some MST display may choose to report the internal panel as an HDCP RX.
 	 * To update this condition with 1(because the immediate repeater's internal
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c
index 91c22b96ebde..733f22bed021 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c
@@ -208,9 +208,8 @@ static inline uint8_t get_device_count(struct mod_hdcp *hdcp)
 static enum mod_hdcp_status check_device_count(struct mod_hdcp *hdcp)
 {
 	/* Avoid device count == 0 to do authentication */
-	if (0 == get_device_count(hdcp)) {
+	if (get_device_count(hdcp) == 0)
 		return MOD_HDCP_STATUS_HDCP1_DEVICE_COUNT_MISMATCH_FAILURE;
-	}
 
 	/* Some MST display may choose to report the internal panel as an HDCP RX.   */
 	/* To update this condition with 1(because the immediate repeater's internal */
@@ -689,9 +688,8 @@ static enum mod_hdcp_status validate_stream_ready(struct mod_hdcp *hdcp,
 	if (is_hdmi_dvi_sl_hdcp(hdcp)) {
 		if (!process_rxstatus(hdcp, event_ctx, input, &status))
 			goto out;
-		if (event_ctx->rx_id_list_ready) {
+		if (event_ctx->rx_id_list_ready)
 			goto out;
-		}
 	}
 	if (is_hdmi_dvi_sl_hdcp(hdcp))
 		if (!mod_hdcp_execute_and_set(check_stream_ready_available,
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h
index c62df3bcc7cb..1d83c1b9da10 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h
@@ -86,10 +86,12 @@
 #define HDCP_CPIRQ_TRACE(hdcp) \
 		HDCP_LOG_FSM(hdcp, "[Link %d] --> CPIRQ", hdcp->config.index)
 #define HDCP_EVENT_TRACE(hdcp, event) \
-		if (event == MOD_HDCP_EVENT_WATCHDOG_TIMEOUT) \
-			HDCP_TIMEOUT_TRACE(hdcp); \
-		else if (event == MOD_HDCP_EVENT_CPIRQ) \
-			HDCP_CPIRQ_TRACE(hdcp)
+		do { \
+			if (event == MOD_HDCP_EVENT_WATCHDOG_TIMEOUT) \
+				HDCP_TIMEOUT_TRACE(hdcp); \
+			else if (event == MOD_HDCP_EVENT_CPIRQ) \
+				HDCP_CPIRQ_TRACE(hdcp);	\
+		} while (0)
 /* TODO: find some way to tell if logging is off to save time */
 #define HDCP_DDC_READ_TRACE(hdcp, msg_name, msg, msg_size) do { \
 		mod_hdcp_dump_binary_message(msg, msg_size, hdcp->buf, \
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c
index ee67a35c2a8e..8c137d7c032e 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c
@@ -443,7 +443,7 @@ enum mod_hdcp_status mod_hdcp_hdcp1_enable_dp_stream_encryption(struct mod_hdcp
 	for (i = 0; i < MAX_NUM_OF_DISPLAYS; i++) {
 
 		if (hdcp->displays[i].adjust.disable || hdcp->displays[i].state != MOD_HDCP_DISPLAY_ACTIVE)
-				continue;
+			continue;
 
 		memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
 
@@ -926,7 +926,7 @@ enum mod_hdcp_status mod_hdcp_hdcp2_enable_dp_stream_encryption(struct mod_hdcp
 
 	for (i = 0; i < MAX_NUM_OF_DISPLAYS; i++) {
 		if (hdcp->displays[i].adjust.disable || hdcp->displays[i].state != MOD_HDCP_DISPLAY_ACTIVE)
-				continue;
+			continue;
 
 		hdcp_cmd->in_msg.hdcp2_enable_dp_stream_encryption.display_handle = hdcp->displays[i].index;
 		hdcp_cmd->in_msg.hdcp2_enable_dp_stream_encryption.session_handle = hdcp->auth.id;
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.h b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.h
index 5b71bc96b98c..7844ea91650b 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.h
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.h
@@ -98,9 +98,9 @@ enum ta_dtm_encoder_type {
  * This enum defines software value for dio_output_type
  */
 typedef enum {
-    TA_DTM_DIO_OUTPUT_TYPE__INVALID,
-    TA_DTM_DIO_OUTPUT_TYPE__DIRECT,
-    TA_DTM_DIO_OUTPUT_TYPE__DPIA
+	TA_DTM_DIO_OUTPUT_TYPE__INVALID,
+	TA_DTM_DIO_OUTPUT_TYPE__DIRECT,
+	TA_DTM_DIO_OUTPUT_TYPE__DPIA
 } ta_dtm_dio_output_type;
 
 struct ta_dtm_topology_update_input_v3 {
@@ -237,11 +237,11 @@ enum ta_hdcp2_hdcp2_msg_id_max_size {
 #define TA_HDCP__HDCP1_KSV_LIST_MAX_ENTRIES 127
 #define TA_HDCP__HDCP1_V_PRIME_SIZE 20
 #define TA_HDCP__HDCP2_TX_BUF_MAX_SIZE                                                                                 \
-	TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_NO_STORED_KM + TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_STORED_KM + 6
+	(TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_NO_STORED_KM + TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_STORED_KM + 6)
 
 // 64 bits boundaries
 #define TA_HDCP__HDCP2_RX_BUF_MAX_SIZE                                                                                 \
-	TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_SEND_CERT + TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_RECEIVER_INFO + 4
+	(TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_SEND_CERT + TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_RECEIVER_INFO + 4)
 
 enum ta_hdcp_status {
 	TA_HDCP_STATUS__SUCCESS = 0x00,
diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h b/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h
index afe1f6cce528..cc3dc9b589f6 100644
--- a/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h
+++ b/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h
@@ -23,34 +23,6 @@
  *
  */
 
-
-
-
-/*
- * Copyright 2016 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
 #ifndef MOD_FREESYNC_H_
 #define MOD_FREESYNC_H_
 
diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
index 84f9b412a4f1..738ee763f24a 100644
--- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
+++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
@@ -147,12 +147,15 @@ void mod_build_vsc_infopacket(const struct dc_stream_state *stream,
 	}
 
 	/* VSC packet set to 4 for PSR-SU, or 2 for PSR1 */
-	if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_SU_1)
-		vsc_packet_revision = vsc_packet_rev4;
-	else if (stream->link->replay_settings.config.replay_supported)
+	if (stream->link->psr_settings.psr_feature_enabled) {
+		if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_SU_1)
+			vsc_packet_revision = vsc_packet_rev4;
+		else if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_1)
+			vsc_packet_revision = vsc_packet_rev2;
+	}
+
+	if (stream->link->replay_settings.config.replay_supported)
 		vsc_packet_revision = vsc_packet_rev4;
-	else if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_1)
-		vsc_packet_revision = vsc_packet_rev2;
 
 	/* Update to revision 5 for extended colorimetry support */
 	if (stream->use_vsc_sdp_for_colorimetry)
diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
index a522a7c02911..e304e8435fb8 100644
--- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
+++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
@@ -31,7 +31,7 @@
 
 #define DIV_ROUNDUP(a, b) (((a)+((b)/2))/(b))
 #define bswap16_based_on_endian(big_endian, value) \
-	(big_endian) ? cpu_to_be16(value) : cpu_to_le16(value)
+	((big_endian) ? cpu_to_be16(value) : cpu_to_le16(value))
 
 /* Possible Min Reduction config from least aggressive to most aggressive
  *  0    1     2     3     4     5     6     7     8     9     10    11   12
@@ -839,6 +839,8 @@ bool is_psr_su_specific_panel(struct dc_link *link)
 				((dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x08) ||
 				(dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x07)))
 				isPSRSUSupported = false;
+			else if (dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x03)
+				isPSRSUSupported = false;
 			else if (dpcd_caps->psr_info.force_psrsu_cap == 0x1)
 				isPSRSUSupported = true;
 		}
@@ -971,6 +973,39 @@ bool psr_su_set_dsc_slice_height(struct dc *dc, struct dc_link *link,
 	return true;
 }
 
+void set_replay_coasting_vtotal(struct dc_link *link,
+	enum replay_coasting_vtotal_type type,
+	uint16_t vtotal)
+{
+	link->replay_settings.coasting_vtotal_table[type] = vtotal;
+}
+
+void set_replay_ips_full_screen_video_src_vtotal(struct dc_link *link, uint16_t vtotal)
+{
+	link->replay_settings.abm_with_ips_on_full_screen_video_pseudo_vtotal = vtotal;
+}
+
+void calculate_replay_link_off_frame_count(struct dc_link *link,
+	uint16_t vtotal, uint16_t htotal)
+{
+	uint8_t max_link_off_frame_count = 0;
+	uint16_t max_deviation_line = 0,  pixel_deviation_per_line = 0;
+
+	max_deviation_line = link->dpcd_caps.pr_info.max_deviation_line;
+	pixel_deviation_per_line = link->dpcd_caps.pr_info.pixel_deviation_per_line;
+
+	if (htotal != 0 && vtotal != 0)
+		max_link_off_frame_count = htotal * max_deviation_line / (pixel_deviation_per_line * vtotal);
+	else
+		ASSERT(0);
+
+	link->replay_settings.link_off_frame_count_level =
+		max_link_off_frame_count >= PR_LINK_OFF_FRAME_COUNT_BEST ? PR_LINK_OFF_FRAME_COUNT_BEST :
+		max_link_off_frame_count >= PR_LINK_OFF_FRAME_COUNT_GOOD ? PR_LINK_OFF_FRAME_COUNT_GOOD :
+		PR_LINK_OFF_FRAME_COUNT_FAIL;
+
+}
+
 bool fill_custom_backlight_caps(unsigned int config_no, struct dm_acpi_atif_backlight_caps *caps)
 {
 	unsigned int data_points_size;
diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h
index d9e0d67d67f7..bef4815e1703 100644
--- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h
+++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h
@@ -54,6 +54,12 @@ bool dmub_init_abm_config(struct resource_pool *res_pool,
 		unsigned int inst);
 
 void init_replay_config(struct dc_link *link, struct replay_config *pr_config);
+void set_replay_coasting_vtotal(struct dc_link *link,
+	enum replay_coasting_vtotal_type type,
+	uint16_t vtotal);
+void set_replay_ips_full_screen_video_src_vtotal(struct dc_link *link, uint16_t vtotal);
+void calculate_replay_link_off_frame_count(struct dc_link *link,
+	uint16_t vtotal, uint16_t htotal);
 
 bool is_psr_su_specific_panel(struct dc_link *link);
 void mod_power_calc_psr_configs(struct psr_config *psr_config,
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index 7f98394338c2..df2c7ffe190f 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -244,7 +244,6 @@ enum DC_FEATURE_MASK {
 	DC_DISABLE_LTTPR_DP2_0 = (1 << 6), //0x40, disabled by default
 	DC_PSR_ALLOW_SMU_OPT = (1 << 7), //0x80, disabled by default
 	DC_PSR_ALLOW_MULTI_DISP_OPT = (1 << 8), //0x100, disabled by default
-	DC_REPLAY_MASK = (1 << 9), //0x200, disabled by default for dcn < 3.1.4
 };
 
 enum DC_DEBUG_MASK {
@@ -255,8 +254,11 @@ enum DC_DEBUG_MASK {
 	DC_DISABLE_PSR = 0x10,
 	DC_FORCE_SUBVP_MCLK_SWITCH = 0x20,
 	DC_DISABLE_MPO = 0x40,
-	DC_DISABLE_REPLAY = 0x50,
 	DC_ENABLE_DPIA_TRACE = 0x80,
+	DC_ENABLE_DML2 = 0x100,
+	DC_DISABLE_PSR_SU = 0x200,
+	DC_DISABLE_REPLAY = 0x400,
+	DC_DISABLE_IPS = 0x800,
 };
 
 enum amd_dpm_forced_level;
diff --git a/drivers/gpu/drm/amd/include/amdgpu_reg_state.h b/drivers/gpu/drm/amd/include/amdgpu_reg_state.h
new file mode 100644
index 000000000000..335980e2afbf
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/amdgpu_reg_state.h
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_REG_STATE_H__
+#define __AMDGPU_REG_STATE_H__
+
+enum amdgpu_reg_state {
+	AMDGPU_REG_STATE_TYPE_INVALID	= 0,
+	AMDGPU_REG_STATE_TYPE_XGMI	= 1,
+	AMDGPU_REG_STATE_TYPE_WAFL	= 2,
+	AMDGPU_REG_STATE_TYPE_PCIE	= 3,
+	AMDGPU_REG_STATE_TYPE_USR	= 4,
+	AMDGPU_REG_STATE_TYPE_USR_1	= 5
+};
+
+enum amdgpu_sysfs_reg_offset {
+	AMDGPU_SYS_REG_STATE_XGMI	= 0x0000,
+	AMDGPU_SYS_REG_STATE_WAFL	= 0x1000,
+	AMDGPU_SYS_REG_STATE_PCIE	= 0x2000,
+	AMDGPU_SYS_REG_STATE_USR	= 0x3000,
+	AMDGPU_SYS_REG_STATE_USR_1	= 0x4000,
+	AMDGPU_SYS_REG_STATE_END	= 0x5000,
+};
+
+struct amdgpu_reg_state_header {
+	uint16_t		structure_size;
+	uint8_t			format_revision;
+	uint8_t			content_revision;
+	uint8_t			state_type;
+	uint8_t			num_instances;
+	uint16_t		pad;
+};
+
+enum amdgpu_reg_inst_state {
+	AMDGPU_INST_S_OK,
+	AMDGPU_INST_S_EDISABLED,
+	AMDGPU_INST_S_EACCESS,
+};
+
+struct amdgpu_smn_reg_data {
+	uint64_t addr;
+	uint32_t value;
+	uint32_t pad;
+};
+
+struct amdgpu_reg_inst_header {
+	uint16_t	instance;
+	uint16_t	state;
+	uint16_t	num_smn_regs;
+	uint16_t	pad;
+};
+
+
+struct amdgpu_regs_xgmi_v1_0 {
+	struct amdgpu_reg_inst_header	inst_header;
+
+	struct amdgpu_smn_reg_data	smn_reg_values[];
+};
+
+struct amdgpu_reg_state_xgmi_v1_0 {
+	/* common_header.state_type must be AMDGPU_REG_STATE_TYPE_XGMI */
+	struct amdgpu_reg_state_header	common_header;
+
+	struct amdgpu_regs_xgmi_v1_0	xgmi_state_regs[];
+};
+
+struct amdgpu_regs_wafl_v1_0 {
+	struct amdgpu_reg_inst_header	inst_header;
+
+	struct amdgpu_smn_reg_data	smn_reg_values[];
+};
+
+struct amdgpu_reg_state_wafl_v1_0 {
+	/* common_header.state_type must be AMDGPU_REG_STATE_TYPE_WAFL */
+	struct amdgpu_reg_state_header	common_header;
+
+	struct amdgpu_regs_wafl_v1_0	wafl_state_regs[];
+};
+
+struct amdgpu_regs_pcie_v1_0 {
+	struct amdgpu_reg_inst_header	inst_header;
+
+	uint16_t			device_status;
+	uint16_t			link_status;
+	uint32_t			sub_bus_number_latency;
+	uint32_t			pcie_corr_err_status;
+	uint32_t			pcie_uncorr_err_status;
+
+	struct amdgpu_smn_reg_data	smn_reg_values[];
+};
+
+struct amdgpu_reg_state_pcie_v1_0 {
+	/* common_header.state_type must be AMDGPU_REG_STATE_TYPE_PCIE */
+	struct amdgpu_reg_state_header	common_header;
+
+	struct amdgpu_regs_pcie_v1_0	pci_state_regs[];
+};
+
+struct amdgpu_regs_usr_v1_0 {
+	struct amdgpu_reg_inst_header	inst_header;
+
+	struct amdgpu_smn_reg_data	smn_reg_values[];
+};
+
+struct amdgpu_reg_state_usr_v1_0 {
+	/* common_header.state_type must be AMDGPU_REG_STATE_TYPE_USR */
+	struct amdgpu_reg_state_header	common_header;
+
+	struct amdgpu_regs_usr_v1_0	usr_state_regs[];
+};
+
+static inline size_t amdgpu_reginst_size(uint16_t num_inst, size_t inst_size,
+					 uint16_t num_regs)
+{
+	return num_inst *
+	       (inst_size + num_regs * sizeof(struct amdgpu_smn_reg_data));
+}
+
+#define amdgpu_asic_get_reg_state_supported(adev) \
+	(((adev)->asic_funcs && (adev)->asic_funcs->get_reg_state) ? 1 : 0)
+
+#define amdgpu_asic_get_reg_state(adev, state, buf, size)                  \
+	((adev)->asic_funcs->get_reg_state ?                               \
+		 (adev)->asic_funcs->get_reg_state((adev), (state), (buf), \
+						   (size)) :               \
+		 0)
+
+
+int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev);
+void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_5_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_5_0_sh_mask.h
index b64664879211..fca72e2ec929 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_5_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_5_0_sh_mask.h
@@ -6220,12 +6220,20 @@
 #define DCCG_GATE_DISABLE_CNTL4__PHYD_REFCLK_ROOT_GATE_DISABLE__SHIFT                                         0x3
 #define DCCG_GATE_DISABLE_CNTL4__PHYE_REFCLK_ROOT_GATE_DISABLE__SHIFT                                         0x4
 #define DCCG_GATE_DISABLE_CNTL4__HDMICHARCLK0_ROOT_GATE_DISABLE__SHIFT                                        0x11
+#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK0_GATE_DISABLE__SHIFT                                              0x17
+#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK1_GATE_DISABLE__SHIFT                                              0x18
+#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK2_GATE_DISABLE__SHIFT                                              0x19
+#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK3_GATE_DISABLE__SHIFT                                              0x1a
 #define DCCG_GATE_DISABLE_CNTL4__PHYA_REFCLK_ROOT_GATE_DISABLE_MASK                                           0x00000001L
 #define DCCG_GATE_DISABLE_CNTL4__PHYB_REFCLK_ROOT_GATE_DISABLE_MASK                                           0x00000002L
 #define DCCG_GATE_DISABLE_CNTL4__PHYC_REFCLK_ROOT_GATE_DISABLE_MASK                                           0x00000004L
 #define DCCG_GATE_DISABLE_CNTL4__PHYD_REFCLK_ROOT_GATE_DISABLE_MASK                                           0x00000008L
 #define DCCG_GATE_DISABLE_CNTL4__PHYE_REFCLK_ROOT_GATE_DISABLE_MASK                                           0x00000010L
 #define DCCG_GATE_DISABLE_CNTL4__HDMICHARCLK0_ROOT_GATE_DISABLE_MASK                                          0x00020000L
+#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK0_GATE_DISABLE_MASK                                                0x00800000L
+#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK1_GATE_DISABLE_MASK                                                0x01000000L
+#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK2_GATE_DISABLE_MASK                                                0x02000000L
+#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK3_GATE_DISABLE_MASK                                                0x04000000L
 #define DPSTREAMCLK_CNTL__DPSTREAMCLK0_SRC_SEL__SHIFT                                                         0x0
 #define DPSTREAMCLK_CNTL__DPSTREAMCLK0_EN__SHIFT                                                              0x3
 #define DPSTREAMCLK_CNTL__DPSTREAMCLK1_SRC_SEL__SHIFT                                                         0x4
diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_offset.h
index c92c4b83253f..4bff1ef8a9a6 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_offset.h
@@ -6369,6 +6369,8 @@
 #define regTCP_INVALIDATE_BASE_IDX                                                                      1
 #define regTCP_STATUS                                                                                   0x19a1
 #define regTCP_STATUS_BASE_IDX                                                                          1
+#define regTCP_CNTL                                                                                     0x19a2
+#define regTCP_CNTL_BASE_IDX                                                                            1
 #define regTCP_CNTL2                                                                                    0x19a3
 #define regTCP_CNTL2_BASE_IDX                                                                           1
 #define regTCP_DEBUG_INDEX                                                                              0x19a5
diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h
index ff30f04be591..6f80bfa7e41a 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h
@@ -781,6 +781,8 @@
 #define regBIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2_BASE_IDX                                              5
 #define regBIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1                                             0x420187
 #define regBIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1_BASE_IDX                                    5
+#define regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3                                                  0x4201c6
+#define regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3_BASE_IDX                                         5
 
 
 // addressBlock: nbio_nbif0_bif_cfg_dev0_rc_bifcfgdecp
@@ -8705,10 +8707,10 @@
 #define regBIF_BX1_MM_CFGREGS_CNTL_BASE_IDX                                                             2
 #define regBIF_BX1_BX_RESET_CNTL                                                                        0x00f0
 #define regBIF_BX1_BX_RESET_CNTL_BASE_IDX                                                               2
-#define regBIF_BX1_INTERRUPT_CNTL                                                                       0x8e11
-#define regBIF_BX1_INTERRUPT_CNTL_BASE_IDX                                                              5
-#define regBIF_BX1_INTERRUPT_CNTL2                                                                      0x8e12
-#define regBIF_BX1_INTERRUPT_CNTL2_BASE_IDX                                                             5
+#define regBIF_BX1_INTERRUPT_CNTL                                                                       0x00f1
+#define regBIF_BX1_INTERRUPT_CNTL_BASE_IDX                                                              2
+#define regBIF_BX1_INTERRUPT_CNTL2                                                                      0x00f2
+#define regBIF_BX1_INTERRUPT_CNTL2_BASE_IDX                                                             2
 #define regBIF_BX1_CLKREQB_PAD_CNTL                                                                     0x00f8
 #define regBIF_BX1_CLKREQB_PAD_CNTL_BASE_IDX                                                            2
 #define regBIF_BX1_BIF_FEATURES_CONTROL_MISC                                                            0x00fb
diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_sh_mask.h
index 7f131999a263..eb8c556d9c93 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_sh_mask.h
@@ -24646,6 +24646,35 @@
 //BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1
 #define BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK                                  0x00000001L
 #define BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK                               0x00000008L
+//BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_MAX_PAYLOAD_SIZE_MODE__SHIFT                     0x8
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_PRIV_MAX_PAYLOAD_SIZE__SHIFT                     0x9
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_10BIT_TAG_EN_OVERRIDE__SHIFT                          0xb
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_10BIT_TAG_EN_OVERRIDE__SHIFT                     0xd
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__MST_DROP_SYNC_FLOOD_EN__SHIFT                            0xf
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_MAX_PAYLOAD_SIZE_MODE__SHIFT                          0x10
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_PRIV_MAX_PAYLOAD_SIZE__SHIFT                          0x11
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_MAX_READ_REQUEST_SIZE_MODE__SHIFT                     0x14
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_PRIV_MAX_READ_REQUEST_SIZE__SHIFT                     0x15
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_MAX_READ_SAFE_MODE__SHIFT                             0x18
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_EXTENDED_TAG_EN_OVERRIDE__SHIFT                       0x19
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_MAX_READ_REQUEST_SIZE_MODE__SHIFT                0x1b
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV__SHIFT                0x1c
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_EXTENDED_TAG_EN_OVERRIDE__SHIFT                  0x1e
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_MAX_PAYLOAD_SIZE_MODE_MASK                       0x00000100L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_PRIV_MAX_PAYLOAD_SIZE_MASK                       0x00000600L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_10BIT_TAG_EN_OVERRIDE_MASK                            0x00001800L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_10BIT_TAG_EN_OVERRIDE_MASK                       0x00006000L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__MST_DROP_SYNC_FLOOD_EN_MASK                              0x00008000L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_MAX_PAYLOAD_SIZE_MODE_MASK                            0x00010000L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_PRIV_MAX_PAYLOAD_SIZE_MASK                            0x000E0000L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_MAX_READ_REQUEST_SIZE_MODE_MASK                       0x00100000L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_PRIV_MAX_READ_REQUEST_SIZE_MASK                       0x00E00000L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_MAX_READ_SAFE_MODE_MASK                               0x01000000L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_EXTENDED_TAG_EN_OVERRIDE_MASK                         0x06000000L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_MAX_READ_REQUEST_SIZE_MODE_MASK                  0x08000000L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV_MASK                  0x30000000L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_EXTENDED_TAG_EN_OVERRIDE_MASK                    0xC0000000L
 
 // addressBlock: nbio_nbif0_bif_cfg_dev0_rc_bifcfgdecp
 //BIF_CFG_DEV0_RC0_VENDOR_ID
diff --git a/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_offset.h b/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_offset.h
new file mode 100644
index 000000000000..a4dd372c0541
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_offset.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2023  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _smuio_10_0_2_OFFSET_HEADER
+
+// addressBlock: smuio_smuio_misc_SmuSmuioDec
+// base address: 0x5a000
+#define mmSMUIO_MCM_CONFIG                                                                             0x0023
+#define mmSMUIO_MCM_CONFIG_BASE_IDX                                                                    0
+#define mmIP_DISCOVERY_VERSION                                                                         0x0000
+#define mmIP_DISCOVERY_VERSION_BASE_IDX                                                                1
+#define mmIO_SMUIO_PINSTRAP                                                                            0x01b1
+#define mmIO_SMUIO_PINSTRAP_BASE_IDX                                                                   1
+#define mmSCRATCH_REGISTER0                                                                            0x01b2
+#define mmSCRATCH_REGISTER0_BASE_IDX                                                                   1
+#define mmSCRATCH_REGISTER1                                                                            0x01b3
+#define mmSCRATCH_REGISTER1_BASE_IDX                                                                   1
+#define mmSCRATCH_REGISTER2                                                                            0x01b4
+#define mmSCRATCH_REGISTER2_BASE_IDX                                                                   1
+#define mmSCRATCH_REGISTER3                                                                            0x01b5
+#define mmSCRATCH_REGISTER3_BASE_IDX                                                                   1
+#define mmSCRATCH_REGISTER4                                                                            0x01b6
+#define mmSCRATCH_REGISTER4_BASE_IDX                                                                   1
+#define mmSCRATCH_REGISTER5                                                                            0x01b7
+#define mmSCRATCH_REGISTER5_BASE_IDX                                                                   1
+#define mmSCRATCH_REGISTER6                                                                            0x01b8
+#define mmSCRATCH_REGISTER6_BASE_IDX                                                                   1
+#define mmSCRATCH_REGISTER7                                                                            0x01b9
+#define mmSCRATCH_REGISTER7_BASE_IDX                                                                   1
+
+
+// addressBlock: smuio_smuio_reset_SmuSmuioDec
+// base address: 0x5a300
+#define mmSMUIO_MP_RESET_INTR                                                                          0x00c1
+#define mmSMUIO_MP_RESET_INTR_BASE_IDX                                                                 0
+#define mmSMUIO_SOC_HALT                                                                               0x00c2
+#define mmSMUIO_SOC_HALT_BASE_IDX                                                                      0
+#define mmSMUIO_GFX_MISC_CNTL                                                                          0x00c8
+#define mmSMUIO_GFX_MISC_CNTL_BASE_IDX                                                                 0
+
+
+// addressBlock: smuio_smuio_ccxctrl_SmuSmuioDec
+// base address: 0x5a000
+#define mmPWROK_REFCLK_GAP_CYCLES                                                                      0x0001
+#define mmPWROK_REFCLK_GAP_CYCLES_BASE_IDX                                                             1
+#define mmGOLDEN_TSC_INCREMENT_UPPER                                                                   0x0004
+#define mmGOLDEN_TSC_INCREMENT_UPPER_BASE_IDX                                                          1
+#define mmGOLDEN_TSC_INCREMENT_LOWER                                                                   0x0005
+#define mmGOLDEN_TSC_INCREMENT_LOWER_BASE_IDX                                                          1
+#define mmGOLDEN_TSC_COUNT_UPPER                                                                       0x0025
+#define mmGOLDEN_TSC_COUNT_UPPER_BASE_IDX                                                              1
+#define mmGOLDEN_TSC_COUNT_LOWER                                                                       0x0026
+#define mmGOLDEN_TSC_COUNT_LOWER_BASE_IDX                                                              1
+#define mmGFX_GOLDEN_TSC_SHADOW_UPPER                                                                  0x0029
+#define mmGFX_GOLDEN_TSC_SHADOW_UPPER_BASE_IDX                                                         1
+#define mmGFX_GOLDEN_TSC_SHADOW_LOWER                                                                  0x002a
+#define mmGFX_GOLDEN_TSC_SHADOW_LOWER_BASE_IDX                                                         1
+#define mmSOC_GOLDEN_TSC_SHADOW_UPPER                                                                  0x002b
+#define mmSOC_GOLDEN_TSC_SHADOW_UPPER_BASE_IDX                                                         1
+#define mmSOC_GOLDEN_TSC_SHADOW_LOWER                                                                  0x002c
+#define mmSOC_GOLDEN_TSC_SHADOW_LOWER_BASE_IDX                                                         1
+#define mmSOC_GAP_PWROK                                                                                0x002d
+#define mmSOC_GAP_PWROK_BASE_IDX                                                                       1
+
+// addressBlock: smuio_smuio_swtimer_SmuSmuioDec
+// base address: 0x5ac40
+#define mmPWR_VIRT_RESET_REQ                                                                           0x0110
+#define mmPWR_VIRT_RESET_REQ_BASE_IDX                                                                  1
+#define mmPWR_DISP_TIMER_CONTROL                                                                       0x0111
+#define mmPWR_DISP_TIMER_CONTROL_BASE_IDX                                                              1
+#define mmPWR_DISP_TIMER2_CONTROL                                                                      0x0113
+#define mmPWR_DISP_TIMER2_CONTROL_BASE_IDX                                                             1
+#define mmPWR_DISP_TIMER_GLOBAL_CONTROL                                                                0x0115
+#define mmPWR_DISP_TIMER_GLOBAL_CONTROL_BASE_IDX                                                       1
+#define mmPWR_IH_CONTROL                                                                               0x0116
+#define mmPWR_IH_CONTROL_BASE_IDX                                                                      1
+
+// addressBlock: smuio_smuio_svi0_SmuSmuioDec
+// base address: 0x6f000
+#define mmSMUSVI0_TEL_PLANE0                                                                           0x520e
+#define mmSMUSVI0_TEL_PLANE0_BASE_IDX                                                                  1
+#define mmSMUSVI0_PLANE0_CURRENTVID                                                                    0x5217
+#define mmSMUSVI0_PLANE0_CURRENTVID_BASE_IDX                                                           1
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_sh_mask.h
new file mode 100644
index 000000000000..d10ae61c346b
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_sh_mask.h
@@ -0,0 +1,184 @@
+/*
+ * Copyright (C) 2023  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _smuio_10_0_2_SH_MASK_HEADER
+
+// addressBlock: smuio_smuio_misc_SmuSmuioDec
+//SMUIO_MCM_CONFIG
+#define SMUIO_MCM_CONFIG__DIE_ID__SHIFT                                                                       0x0
+#define SMUIO_MCM_CONFIG__PKG_TYPE__SHIFT                                                                     0x2
+#define SMUIO_MCM_CONFIG__SOCKET_ID__SHIFT                                                                    0x5
+#define SMUIO_MCM_CONFIG__PKG_SUBTYPE__SHIFT                                                                  0x6
+#define SMUIO_MCM_CONFIG__CONSOLE_K__SHIFT                                                                    0x10
+#define SMUIO_MCM_CONFIG__CONSOLE_A__SHIFT                                                                    0x11
+#define SMUIO_MCM_CONFIG__DIE_ID_MASK                                                                         0x00000003L
+#define SMUIO_MCM_CONFIG__PKG_TYPE_MASK                                                                       0x0000001CL
+#define SMUIO_MCM_CONFIG__SOCKET_ID_MASK                                                                      0x00000020L
+#define SMUIO_MCM_CONFIG__PKG_SUBTYPE_MASK                                                                    0x000000C0L
+#define SMUIO_MCM_CONFIG__CONSOLE_K_MASK                                                                      0x00010000L
+#define SMUIO_MCM_CONFIG__CONSOLE_A_MASK                                                                      0x00020000L
+//IP_DISCOVERY_VERSION
+#define IP_DISCOVERY_VERSION__IP_DISCOVERY_VERSION__SHIFT                                                     0x0
+#define IP_DISCOVERY_VERSION__IP_DISCOVERY_VERSION_MASK                                                       0xFFFFFFFFL
+//IO_SMUIO_PINSTRAP
+#define IO_SMUIO_PINSTRAP__AUD_PORT_CONN__SHIFT                                                               0x0
+#define IO_SMUIO_PINSTRAP__AUD__SHIFT                                                                         0x3
+#define IO_SMUIO_PINSTRAP__AUD_PORT_CONN_MASK                                                                 0x00000007L
+#define IO_SMUIO_PINSTRAP__AUD_MASK                                                                           0x00000018L
+//SCRATCH_REGISTER0
+#define SCRATCH_REGISTER0__ScratchPad0__SHIFT                                                                 0x0
+#define SCRATCH_REGISTER0__ScratchPad0_MASK                                                                   0xFFFFFFFFL
+//SCRATCH_REGISTER1
+#define SCRATCH_REGISTER1__ScratchPad1__SHIFT                                                                 0x0
+#define SCRATCH_REGISTER1__ScratchPad1_MASK                                                                   0xFFFFFFFFL
+//SCRATCH_REGISTER2
+#define SCRATCH_REGISTER2__ScratchPad2__SHIFT                                                                 0x0
+#define SCRATCH_REGISTER2__ScratchPad2_MASK                                                                   0xFFFFFFFFL
+//SCRATCH_REGISTER3
+#define SCRATCH_REGISTER3__ScratchPad3__SHIFT                                                                 0x0
+#define SCRATCH_REGISTER3__ScratchPad3_MASK                                                                   0xFFFFFFFFL
+//SCRATCH_REGISTER4
+#define SCRATCH_REGISTER4__ScratchPad4__SHIFT                                                                 0x0
+#define SCRATCH_REGISTER4__ScratchPad4_MASK                                                                   0xFFFFFFFFL
+//SCRATCH_REGISTER5
+#define SCRATCH_REGISTER5__ScratchPad5__SHIFT                                                                 0x0
+#define SCRATCH_REGISTER5__ScratchPad5_MASK                                                                   0xFFFFFFFFL
+//SCRATCH_REGISTER6
+#define SCRATCH_REGISTER6__ScratchPad6__SHIFT                                                                 0x0
+#define SCRATCH_REGISTER6__ScratchPad6_MASK                                                                   0xFFFFFFFFL
+//SCRATCH_REGISTER7
+#define SCRATCH_REGISTER7__ScratchPad7__SHIFT                                                                 0x0
+#define SCRATCH_REGISTER7__ScratchPad7_MASK                                                                   0xFFFFFFFFL
+
+// addressBlock: smuio_smuio_reset_SmuSmuioDec
+//SMUIO_MP_RESET_INTR
+#define SMUIO_MP_RESET_INTR__SMUIO_MP_RESET_INTR__SHIFT                                                       0x0
+#define SMUIO_MP_RESET_INTR__SMUIO_MP_RESET_INTR_MASK                                                         0x00000001L
+//SMUIO_SOC_HALT
+#define SMUIO_SOC_HALT__WDT_FORCE_PWROK_EN__SHIFT                                                             0x2
+#define SMUIO_SOC_HALT__WDT_FORCE_RESETn_EN__SHIFT                                                            0x3
+#define SMUIO_SOC_HALT__WDT_FORCE_PWROK_EN_MASK                                                               0x00000004L
+#define SMUIO_SOC_HALT__WDT_FORCE_RESETn_EN_MASK                                                              0x00000008L
+//SMUIO_GFX_MISC_CNTL
+#define SMUIO_GFX_MISC_CNTL__SMU_GFX_cold_vs_gfxoff__SHIFT                                                    0x0
+#define SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS__SHIFT                                                         0x1
+#define SMUIO_GFX_MISC_CNTL__PWR_GFX_DLDO_CLK_SWITCH__SHIFT                                                   0x3
+#define SMUIO_GFX_MISC_CNTL__PWR_GFX_RLC_CGPG_EN__SHIFT                                                       0x4
+#define SMUIO_GFX_MISC_CNTL__SMU_GFX_cold_vs_gfxoff_MASK                                                      0x00000001L
+#define SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS_MASK                                                           0x00000006L
+#define SMUIO_GFX_MISC_CNTL__PWR_GFX_DLDO_CLK_SWITCH_MASK                                                     0x00000008L
+#define SMUIO_GFX_MISC_CNTL__PWR_GFX_RLC_CGPG_EN_MASK                                                         0x00000010L
+
+// addressBlock: smuio_smuio_ccxctrl_SmuSmuioDec
+//PWROK_REFCLK_GAP_CYCLES
+#define PWROK_REFCLK_GAP_CYCLES__Pwrok_PreAssertion_clkgap_cycles__SHIFT                                      0x0
+#define PWROK_REFCLK_GAP_CYCLES__Pwrok_PostAssertion_clkgap_cycles__SHIFT                                     0x8
+#define PWROK_REFCLK_GAP_CYCLES__Pwrok_PreAssertion_clkgap_cycles_MASK                                        0x000000FFL
+#define PWROK_REFCLK_GAP_CYCLES__Pwrok_PostAssertion_clkgap_cycles_MASK                                       0x0000FF00L
+//GOLDEN_TSC_INCREMENT_UPPER
+#define GOLDEN_TSC_INCREMENT_UPPER__GoldenTscIncrementUpper__SHIFT                                            0x0
+#define GOLDEN_TSC_INCREMENT_UPPER__GoldenTscIncrementUpper_MASK                                              0x00FFFFFFL
+//GOLDEN_TSC_INCREMENT_LOWER
+#define GOLDEN_TSC_INCREMENT_LOWER__GoldenTscIncrementLower__SHIFT                                            0x0
+#define GOLDEN_TSC_INCREMENT_LOWER__GoldenTscIncrementLower_MASK                                              0xFFFFFFFFL
+//GOLDEN_TSC_COUNT_UPPER
+#define GOLDEN_TSC_COUNT_UPPER__GoldenTscCountUpper__SHIFT                                                    0x0
+#define GOLDEN_TSC_COUNT_UPPER__GoldenTscCountUpper_MASK                                                      0x00FFFFFFL
+//GOLDEN_TSC_COUNT_LOWER
+#define GOLDEN_TSC_COUNT_LOWER__GoldenTscCountLower__SHIFT                                                    0x0
+#define GOLDEN_TSC_COUNT_LOWER__GoldenTscCountLower_MASK                                                      0xFFFFFFFFL
+//GFX_GOLDEN_TSC_SHADOW_UPPER
+#define GFX_GOLDEN_TSC_SHADOW_UPPER__GfxGoldenTscShadowUpper__SHIFT                                           0x0
+#define GFX_GOLDEN_TSC_SHADOW_UPPER__GfxGoldenTscShadowUpper_MASK                                             0x00FFFFFFL
+//GFX_GOLDEN_TSC_SHADOW_LOWER
+#define GFX_GOLDEN_TSC_SHADOW_LOWER__GfxGoldenTscShadowLower__SHIFT                                           0x0
+#define GFX_GOLDEN_TSC_SHADOW_LOWER__GfxGoldenTscShadowLower_MASK                                             0xFFFFFFFFL
+//SOC_GOLDEN_TSC_SHADOW_UPPER
+#define SOC_GOLDEN_TSC_SHADOW_UPPER__SocGoldenTscShadowUpper__SHIFT                                           0x0
+#define SOC_GOLDEN_TSC_SHADOW_UPPER__SocGoldenTscShadowUpper_MASK                                             0x00FFFFFFL
+//SOC_GOLDEN_TSC_SHADOW_LOWER
+#define SOC_GOLDEN_TSC_SHADOW_LOWER__SocGoldenTscShadowLower__SHIFT                                           0x0
+#define SOC_GOLDEN_TSC_SHADOW_LOWER__SocGoldenTscShadowLower_MASK                                             0xFFFFFFFFL
+//SOC_GAP_PWROK
+#define SOC_GAP_PWROK__soc_gap_pwrok__SHIFT                                                                   0x0
+#define SOC_GAP_PWROK__soc_gap_pwrok_MASK                                                                     0x00000001L
+
+// addressBlock: smuio_smuio_swtimer_SmuSmuioDec
+//PWR_VIRT_RESET_REQ
+#define PWR_VIRT_RESET_REQ__VF_FLR__SHIFT                                                                     0x0
+#define PWR_VIRT_RESET_REQ__PF_FLR__SHIFT                                                                     0x1f
+#define PWR_VIRT_RESET_REQ__VF_FLR_MASK                                                                       0x7FFFFFFFL
+#define PWR_VIRT_RESET_REQ__PF_FLR_MASK                                                                       0x80000000L
+//PWR_DISP_TIMER_CONTROL
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_COUNT__SHIFT                                                   0x0
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_ENABLE__SHIFT                                                  0x19
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_DISABLE__SHIFT                                                 0x1a
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_MASK__SHIFT                                                    0x1b
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_STAT_AK__SHIFT                                                 0x1c
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_TYPE__SHIFT                                                    0x1d
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_MODE__SHIFT                                                    0x1e
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_COUNT_MASK                                                     0x01FFFFFFL
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_ENABLE_MASK                                                    0x02000000L
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_DISABLE_MASK                                                   0x04000000L
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_MASK_MASK                                                      0x08000000L
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_STAT_AK_MASK                                                   0x10000000L
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_TYPE_MASK                                                      0x20000000L
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_MODE_MASK                                                      0x40000000L
+//PWR_DISP_TIMER2_CONTROL
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_COUNT__SHIFT                                                  0x0
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_ENABLE__SHIFT                                                 0x19
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_DISABLE__SHIFT                                                0x1a
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_MASK__SHIFT                                                   0x1b
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_STAT_AK__SHIFT                                                0x1c
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_TYPE__SHIFT                                                   0x1d
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_MODE__SHIFT                                                   0x1e
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_COUNT_MASK                                                    0x01FFFFFFL
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_ENABLE_MASK                                                   0x02000000L
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_DISABLE_MASK                                                  0x04000000L
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_MASK_MASK                                                     0x08000000L
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_STAT_AK_MASK                                                  0x10000000L
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_TYPE_MASK                                                     0x20000000L
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_MODE_MASK                                                     0x40000000L
+//PWR_DISP_TIMER_GLOBAL_CONTROL
+#define PWR_DISP_TIMER_GLOBAL_CONTROL__DISP_TIMER_PULSE_WIDTH__SHIFT                                          0x0
+#define PWR_DISP_TIMER_GLOBAL_CONTROL__DISP_TIMER_PULSE_EN__SHIFT                                             0xa
+#define PWR_DISP_TIMER_GLOBAL_CONTROL__DISP_TIMER_PULSE_WIDTH_MASK                                            0x000003FFL
+#define PWR_DISP_TIMER_GLOBAL_CONTROL__DISP_TIMER_PULSE_EN_MASK                                               0x00000400L
+//PWR_IH_CONTROL
+#define PWR_IH_CONTROL__MAX_CREDIT__SHIFT                                                                     0x0
+#define PWR_IH_CONTROL__DISP_TIMER_TRIGGER_MASK__SHIFT                                                        0x5
+#define PWR_IH_CONTROL__DISP_TIMER2_TRIGGER_MASK__SHIFT                                                       0x6
+#define PWR_IH_CONTROL__PWR_IH_CLK_GATE_EN__SHIFT                                                             0x1f
+#define PWR_IH_CONTROL__MAX_CREDIT_MASK                                                                       0x0000001FL
+#define PWR_IH_CONTROL__DISP_TIMER_TRIGGER_MASK_MASK                                                          0x00000020L
+#define PWR_IH_CONTROL__DISP_TIMER2_TRIGGER_MASK_MASK                                                         0x00000040L
+#define PWR_IH_CONTROL__PWR_IH_CLK_GATE_EN_MASK                                                               0x80000000L
+
+// addressBlock: smuio_smuio_svi0_SmuSmuioDec
+//SMUSVI0_TEL_PLANE0
+#define SMUSVI0_TEL_PLANE0__SVI0_PLANE0_IDDCOR__SHIFT                                                         0x0
+#define SMUSVI0_TEL_PLANE0__SVI0_PLANE0_VDDCOR__SHIFT                                                         0x10
+#define SMUSVI0_TEL_PLANE0__SVI0_PLANE0_IDDCOR_MASK                                                           0x000000FFL
+#define SMUSVI0_TEL_PLANE0__SVI0_PLANE0_VDDCOR_MASK                                                           0x01FF0000L
+//SMUSVI0_PLANE0_CURRENTVID
+#define SMUSVI0_PLANE0_CURRENTVID__CURRENT_SVI0_PLANE0_VID__SHIFT                                             0x18
+#define SMUSVI0_PLANE0_CURRENTVID__CURRENT_SVI0_PLANE0_VID_MASK                                               0xFF000000L
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index cd3c40a86029..edcb85560ced 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -318,6 +318,7 @@ enum pp_xgmi_plpd_mode {
 #define MAX_GFX_CLKS 8
 #define MAX_CLKS 4
 #define NUM_VCN 4
+#define NUM_JPEG_ENG 32
 
 struct seq_file;
 enum amd_pp_clock_type;
@@ -421,7 +422,7 @@ struct amd_pm_funcs {
 	int (*set_hard_min_dcefclk_by_freq)(void *handle, uint32_t clock);
 	int (*set_hard_min_fclk_by_freq)(void *handle, uint32_t clock);
 	int (*set_min_deep_sleep_dcefclk)(void *handle, uint32_t clock);
-	int (*get_asic_baco_capability)(void *handle, bool *cap);
+	bool (*get_asic_baco_capability)(void *handle);
 	int (*get_asic_baco_state)(void *handle, int *state);
 	int (*set_asic_baco_state)(void *handle, int state);
 	int (*get_ppfeature_status)(void *handle, char *buf);
@@ -431,6 +432,7 @@ struct amd_pm_funcs {
 	int (*set_df_cstate)(void *handle, enum pp_df_cstate state);
 	int (*set_xgmi_pstate)(void *handle, uint32_t pstate);
 	ssize_t (*get_gpu_metrics)(void *handle, void **table);
+	ssize_t (*get_pm_metrics)(void *handle, void *pmmetrics, size_t size);
 	int (*set_watermarks_for_clock_ranges)(void *handle,
 					       struct pp_smu_wm_range_sets *ranges);
 	int (*display_disable_memory_clock_switch)(void *handle,
@@ -444,6 +446,7 @@ struct amd_pm_funcs {
 				   struct dpm_clocks *clock_table);
 	int (*get_smu_prv_buf_details)(void *handle, void **addr, size_t *size);
 	void (*pm_compute_clocks)(void *handle);
+	int (*notify_rlc_state)(void *handle, bool en);
 };
 
 struct metrics_table_header {
@@ -773,6 +776,85 @@ struct gpu_metrics_v1_4 {
 	uint16_t			padding;
 };
 
+struct gpu_metrics_v1_5 {
+	struct metrics_table_header	common_header;
+
+	/* Temperature (Celsius) */
+	uint16_t			temperature_hotspot;
+	uint16_t			temperature_mem;
+	uint16_t			temperature_vrsoc;
+
+	/* Power (Watts) */
+	uint16_t			curr_socket_power;
+
+	/* Utilization (%) */
+	uint16_t			average_gfx_activity;
+	uint16_t			average_umc_activity; // memory controller
+	uint16_t			vcn_activity[NUM_VCN];
+	uint16_t			jpeg_activity[NUM_JPEG_ENG];
+
+	/* Energy (15.259uJ (2^-16) units) */
+	uint64_t			energy_accumulator;
+
+	/* Driver attached timestamp (in ns) */
+	uint64_t			system_clock_counter;
+
+	/* Throttle status */
+	uint32_t			throttle_status;
+
+	/* Clock Lock Status. Each bit corresponds to clock instance */
+	uint32_t			gfxclk_lock_status;
+
+	/* Link width (number of lanes) and speed (in 0.1 GT/s) */
+	uint16_t			pcie_link_width;
+	uint16_t			pcie_link_speed;
+
+	/* XGMI bus width and bitrate (in Gbps) */
+	uint16_t			xgmi_link_width;
+	uint16_t			xgmi_link_speed;
+
+	/* Utilization Accumulated (%) */
+	uint32_t			gfx_activity_acc;
+	uint32_t			mem_activity_acc;
+
+	/*PCIE accumulated bandwidth (GB/sec) */
+	uint64_t			pcie_bandwidth_acc;
+
+	/*PCIE instantaneous bandwidth (GB/sec) */
+	uint64_t			pcie_bandwidth_inst;
+
+	/* PCIE L0 to recovery state transition accumulated count */
+	uint64_t			pcie_l0_to_recov_count_acc;
+
+	/* PCIE replay accumulated count */
+	uint64_t			pcie_replay_count_acc;
+
+	/* PCIE replay rollover accumulated count */
+	uint64_t			pcie_replay_rover_count_acc;
+
+	/* PCIE NAK sent  accumulated count */
+	uint32_t			pcie_nak_sent_count_acc;
+
+	/* PCIE NAK received accumulated count */
+	uint32_t			pcie_nak_rcvd_count_acc;
+
+	/* XGMI accumulated data transfer size(KiloBytes) */
+	uint64_t			xgmi_read_data_acc[NUM_XGMI_LINKS];
+	uint64_t			xgmi_write_data_acc[NUM_XGMI_LINKS];
+
+	/* PMFW attached timestamp (10ns resolution) */
+	uint64_t			firmware_timestamp;
+
+	/* Current clocks (Mhz) */
+	uint16_t			current_gfxclk[MAX_GFX_CLKS];
+	uint16_t			current_socclk[MAX_CLKS];
+	uint16_t			current_vclk0[MAX_CLKS];
+	uint16_t			current_dclk0[MAX_CLKS];
+	uint16_t			current_uclk;
+
+	uint16_t			padding;
+};
+
 /*
  * gpu_metrics_v2_0 is not recommended as it's not naturally aligned.
  * Use gpu_metrics_v2_1 or later instead.
@@ -1084,6 +1166,10 @@ struct gpu_metrics_v3_0 {
 	uint16_t			average_dram_reads;
 	/* time filtered DRAM write bandwidth [MB/sec] */
 	uint16_t			average_dram_writes;
+	/* time filtered IPU read bandwidth [MB/sec] */
+	uint16_t			average_ipu_reads;
+	/* time filtered IPU write bandwidth [MB/sec] */
+	uint16_t			average_ipu_writes;
 
 	/* Driver attached timestamp (in ns) */
 	uint64_t			system_clock_counter;
@@ -1103,6 +1189,8 @@ struct gpu_metrics_v3_0 {
 	uint32_t			average_all_core_power;
 	/* calculated core power [mW] */
 	uint16_t			average_core_power[16];
+	/* time filtered total system power [mW] */
+	uint16_t			average_sys_power;
 	/* maximum IRM defined STAPM power limit [mW] */
 	uint16_t			stapm_power_limit;
 	/* time filtered STAPM power limit [mW] */
@@ -1115,6 +1203,8 @@ struct gpu_metrics_v3_0 {
 	uint16_t			average_ipuclk_frequency;
 	uint16_t			average_fclk_frequency;
 	uint16_t			average_vclk_frequency;
+	uint16_t			average_uclk_frequency;
+	uint16_t			average_mpipu_frequency;
 
 	/* Current clocks */
 	/* target core frequency [MHz] */
@@ -1124,7 +1214,31 @@ struct gpu_metrics_v3_0 {
 	/* GFXCLK frequency limit enforced on GFX [MHz] */
 	uint16_t			current_gfx_maxfreq;
 
+	/* Throttle Residency (ASIC dependent) */
+	uint32_t			throttle_residency_prochot;
+	uint32_t			throttle_residency_spl;
+	uint32_t			throttle_residency_fppt;
+	uint32_t			throttle_residency_sppt;
+	uint32_t			throttle_residency_thm_core;
+	uint32_t			throttle_residency_thm_gfx;
+	uint32_t			throttle_residency_thm_soc;
+
 	/* Metrics table alpha filter time constant [us] */
 	uint32_t			time_filter_alphavalue;
 };
+
+struct amdgpu_pmmetrics_header {
+	uint16_t structure_size;
+	uint16_t pad;
+	uint32_t mp1_ip_discovery_version;
+	uint32_t pmfw_version;
+	uint32_t pmmetrics_version;
+};
+
+struct amdgpu_pm_metrics {
+	struct amdgpu_pmmetrics_header common_header;
+
+	uint8_t data[];
+};
+
 #endif
diff --git a/drivers/gpu/drm/amd/include/mes_v11_api_def.h b/drivers/gpu/drm/amd/include/mes_v11_api_def.h
index b1db2b190187..ec5b9ab67c5e 100644
--- a/drivers/gpu/drm/amd/include/mes_v11_api_def.h
+++ b/drivers/gpu/drm/amd/include/mes_v11_api_def.h
@@ -232,6 +232,7 @@ union MESAPI_SET_HW_RESOURCES {
 		};
 		uint32_t	oversubscription_timer;
 		uint64_t        doorbell_info;
+		uint64_t        event_intr_history_gpu_mc_ptr;
 	};
 
 	uint32_t	max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
@@ -571,7 +572,8 @@ struct SET_SHADER_DEBUGGER {
 		struct {
 			uint32_t single_memop : 1;  /* SQ_DEBUG.single_memop */
 			uint32_t single_alu_op : 1; /* SQ_DEBUG.single_alu_op */
-			uint32_t reserved : 30;
+			uint32_t reserved : 29;
+			uint32_t process_ctx_flush : 1;
 		};
 		uint32_t u32all;
 	} flags;
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index 08cb79401410..6627ee07d52d 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -181,12 +181,29 @@ int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev,
 	return ret;
 }
 
+int amdgpu_dpm_notify_rlc_state(struct amdgpu_device *adev, bool en)
+{
+	int ret = 0;
+	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+
+	if (pp_funcs && pp_funcs->notify_rlc_state) {
+		mutex_lock(&adev->pm.mutex);
+
+		ret = pp_funcs->notify_rlc_state(
+				adev->powerplay.pp_handle,
+				en);
+
+		mutex_unlock(&adev->pm.mutex);
+	}
+
+	return ret;
+}
+
 bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev)
 {
 	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
 	void *pp_handle = adev->powerplay.pp_handle;
-	bool baco_cap;
-	int ret = 0;
+	bool ret;
 
 	if (!pp_funcs || !pp_funcs->get_asic_baco_capability)
 		return false;
@@ -204,12 +221,11 @@ bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev)
 
 	mutex_lock(&adev->pm.mutex);
 
-	ret = pp_funcs->get_asic_baco_capability(pp_handle,
-						 &baco_cap);
+	ret = pp_funcs->get_asic_baco_capability(pp_handle);
 
 	mutex_unlock(&adev->pm.mutex);
 
-	return ret ? false : baco_cap;
+	return ret;
 }
 
 int amdgpu_dpm_mode2_reset(struct amdgpu_device *adev)
@@ -600,6 +616,16 @@ void amdgpu_dpm_enable_jpeg(struct amdgpu_device *adev, bool enable)
 			  enable ? "enable" : "disable", ret);
 }
 
+void amdgpu_dpm_enable_vpe(struct amdgpu_device *adev, bool enable)
+{
+	int ret = 0;
+
+	ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VPE, !enable);
+	if (ret)
+		DRM_ERROR("Dpm %s vpe failed, ret = %d.\n",
+			  enable ? "enable" : "disable", ret);
+}
+
 int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_version)
 {
 	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
@@ -1301,6 +1327,23 @@ int amdgpu_dpm_get_gpu_metrics(struct amdgpu_device *adev, void **table)
 	return ret;
 }
 
+ssize_t amdgpu_dpm_get_pm_metrics(struct amdgpu_device *adev, void *pm_metrics,
+				  size_t size)
+{
+	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+	int ret = 0;
+
+	if (!pp_funcs->get_pm_metrics)
+		return -EOPNOTSUPP;
+
+	mutex_lock(&adev->pm.mutex);
+	ret = pp_funcs->get_pm_metrics(adev->powerplay.pp_handle, pm_metrics,
+				       size);
+	mutex_unlock(&adev->pm.mutex);
+
+	return ret;
+}
+
 int amdgpu_dpm_get_fan_control_mode(struct amdgpu_device *adev,
 				    uint32_t *fan_mode)
 {
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index ca2ece24e1e0..087d57850304 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -1799,6 +1799,44 @@ static ssize_t amdgpu_set_apu_thermal_cap(struct device *dev,
 	return count;
 }
 
+static int amdgpu_pm_metrics_attr_update(struct amdgpu_device *adev,
+					 struct amdgpu_device_attr *attr,
+					 uint32_t mask,
+					 enum amdgpu_device_attr_states *states)
+{
+	if (amdgpu_dpm_get_pm_metrics(adev, NULL, 0) == -EOPNOTSUPP)
+		*states = ATTR_STATE_UNSUPPORTED;
+
+	return 0;
+}
+
+static ssize_t amdgpu_get_pm_metrics(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+	ssize_t size = 0;
+	int ret;
+
+	if (amdgpu_in_reset(adev))
+		return -EPERM;
+	if (adev->in_suspend && !adev->in_runpm)
+		return -EPERM;
+
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0) {
+		pm_runtime_put_autosuspend(ddev->dev);
+		return ret;
+	}
+
+	size = amdgpu_dpm_get_pm_metrics(adev, buf, PAGE_SIZE);
+
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
+	return size;
+}
+
 /**
  * DOC: gpu_metrics
  *
@@ -2096,6 +2134,8 @@ static struct amdgpu_device_attr amdgpu_device_attrs[] = {
 	AMDGPU_DEVICE_ATTR_RW(smartshift_bias,				ATTR_FLAG_BASIC,
 			      .attr_update = ss_bias_attr_update),
 	AMDGPU_DEVICE_ATTR_RW(xgmi_plpd_policy,				ATTR_FLAG_BASIC),
+	AMDGPU_DEVICE_ATTR_RO(pm_metrics,				ATTR_FLAG_BASIC,
+			      .attr_update = amdgpu_pm_metrics_attr_update),
 };
 
 static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_attr *attr,
@@ -2128,7 +2168,9 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_
 		if (amdgpu_dpm_is_overdrive_supported(adev))
 			*states = ATTR_STATE_SUPPORTED;
 	} else if (DEVICE_ATTR_IS(mem_busy_percent)) {
-		if (adev->flags & AMD_IS_APU || gc_ver == IP_VERSION(9, 0, 1))
+		if ((adev->flags & AMD_IS_APU &&
+		     gc_ver != IP_VERSION(9, 4, 3)) ||
+		    gc_ver == IP_VERSION(9, 0, 1))
 			*states = ATTR_STATE_UNSUPPORTED;
 	} else if (DEVICE_ATTR_IS(pcie_bw)) {
 		/* PCIe Perf counters won't work on APU nodes */
@@ -2198,10 +2240,10 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_
 	} else if (DEVICE_ATTR_IS(xgmi_plpd_policy)) {
 		if (amdgpu_dpm_get_xgmi_plpd_mode(adev, NULL) == XGMI_PLPD_NONE)
 			*states = ATTR_STATE_UNSUPPORTED;
-	} else if (DEVICE_ATTR_IS(pp_dpm_mclk_od)) {
+	} else if (DEVICE_ATTR_IS(pp_mclk_od)) {
 		if (amdgpu_dpm_get_mclk_od(adev) == -EOPNOTSUPP)
 			*states = ATTR_STATE_UNSUPPORTED;
-	} else if (DEVICE_ATTR_IS(pp_dpm_sclk_od)) {
+	} else if (DEVICE_ATTR_IS(pp_sclk_od)) {
 		if (amdgpu_dpm_get_sclk_od(adev) == -EOPNOTSUPP)
 			*states = ATTR_STATE_UNSUPPORTED;
 	} else if (DEVICE_ATTR_IS(apu_thermal_cap)) {
@@ -4307,11 +4349,19 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a
 	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&value, &size))
 		seq_printf(m, "\t%u mV (VDDNB)\n", value);
 	size = sizeof(uint32_t);
-	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_AVG_POWER, (void *)&query, &size))
-		seq_printf(m, "\t%u.%02u W (average GPU)\n", query >> 8, query & 0xff);
+	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_AVG_POWER, (void *)&query, &size)) {
+		if (adev->flags & AMD_IS_APU)
+			seq_printf(m, "\t%u.%02u W (average SoC including CPU)\n", query >> 8, query & 0xff);
+		else
+			seq_printf(m, "\t%u.%02u W (average SoC)\n", query >> 8, query & 0xff);
+	}
 	size = sizeof(uint32_t);
-	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_INPUT_POWER, (void *)&query, &size))
-		seq_printf(m, "\t%u.%02u W (current GPU)\n", query >> 8, query & 0xff);
+	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_INPUT_POWER, (void *)&query, &size)) {
+		if (adev->flags & AMD_IS_APU)
+			seq_printf(m, "\t%u.%02u W (current SoC including CPU)\n", query >> 8, query & 0xff);
+		else
+			seq_printf(m, "\t%u.%02u W (current SoC)\n", query >> 8, query & 0xff);
+	}
 	size = sizeof(value);
 	seq_printf(m, "\n");
 
@@ -4337,9 +4387,9 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a
 		/* VCN clocks */
 		if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCN_POWER_STATE, (void *)&value, &size)) {
 			if (!value) {
-				seq_printf(m, "VCN: Disabled\n");
+				seq_printf(m, "VCN: Powered down\n");
 			} else {
-				seq_printf(m, "VCN: Enabled\n");
+				seq_printf(m, "VCN: Powered up\n");
 				if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_DCLK, (void *)&value, &size))
 					seq_printf(m, "\t%u MHz (DCLK)\n", value/100);
 				if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_VCLK, (void *)&value, &size))
@@ -4351,9 +4401,9 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a
 		/* UVD clocks */
 		if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_POWER, (void *)&value, &size)) {
 			if (!value) {
-				seq_printf(m, "UVD: Disabled\n");
+				seq_printf(m, "UVD: Powered down\n");
 			} else {
-				seq_printf(m, "UVD: Enabled\n");
+				seq_printf(m, "UVD: Powered up\n");
 				if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_DCLK, (void *)&value, &size))
 					seq_printf(m, "\t%u MHz (DCLK)\n", value/100);
 				if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_VCLK, (void *)&value, &size))
@@ -4365,9 +4415,9 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a
 		/* VCE clocks */
 		if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCE_POWER, (void *)&value, &size)) {
 			if (!value) {
-				seq_printf(m, "VCE: Disabled\n");
+				seq_printf(m, "VCE: Powered down\n");
 			} else {
-				seq_printf(m, "VCE: Enabled\n");
+				seq_printf(m, "VCE: Powered up\n");
 				if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCE_ECCLK, (void *)&value, &size))
 					seq_printf(m, "\t%u MHz (ECCLK)\n", value/100);
 			}
diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
index feccd2a7120d..3047ffe7f244 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
@@ -415,6 +415,8 @@ int amdgpu_dpm_mode1_reset(struct amdgpu_device *adev);
 int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev,
 			     enum pp_mp1_state mp1_state);
 
+int amdgpu_dpm_notify_rlc_state(struct amdgpu_device *adev, bool en);
+
 int amdgpu_dpm_set_gfx_power_up_by_imu(struct amdgpu_device *adev);
 
 int amdgpu_dpm_baco_exit(struct amdgpu_device *adev);
@@ -443,6 +445,7 @@ void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev);
 void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable);
 void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable);
 void amdgpu_dpm_enable_jpeg(struct amdgpu_device *adev, bool enable);
+void amdgpu_dpm_enable_vpe(struct amdgpu_device *adev, bool enable);
 int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_version);
 int amdgpu_dpm_handle_passthrough_sbr(struct amdgpu_device *adev, bool enable);
 int amdgpu_dpm_send_hbm_bad_pages_num(struct amdgpu_device *adev, uint32_t size);
@@ -511,6 +514,18 @@ int amdgpu_dpm_get_power_profile_mode(struct amdgpu_device *adev,
 int amdgpu_dpm_set_power_profile_mode(struct amdgpu_device *adev,
 				      long *input, uint32_t size);
 int amdgpu_dpm_get_gpu_metrics(struct amdgpu_device *adev, void **table);
+
+/**
+ * @get_pm_metrics: Get one snapshot of power management metrics from PMFW. The
+ * sample is copied to pm_metrics buffer. It's expected to be allocated by the
+ * caller and size of the allocated buffer is passed. Max size expected for a
+ * metrics sample is 4096 bytes.
+ *
+ * Return: Actual size of the metrics sample
+ */
+ssize_t amdgpu_dpm_get_pm_metrics(struct amdgpu_device *adev, void *pm_metrics,
+				  size_t size);
+
 int amdgpu_dpm_get_fan_control_mode(struct amdgpu_device *adev,
 				    uint32_t *fan_mode);
 int amdgpu_dpm_set_fan_speed_pwm(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c
index 5d28c951a319..5cb4725c773f 100644
--- a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c
+++ b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c
@@ -2735,10 +2735,8 @@ static int kv_parse_power_table(struct amdgpu_device *adev)
 		non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *)
 			&non_clock_info_array->nonClockInfo[non_clock_array_index];
 		ps = kzalloc(sizeof(struct kv_ps), GFP_KERNEL);
-		if (ps == NULL) {
-			kfree(adev->pm.dpm.ps);
+		if (ps == NULL)
 			return -ENOMEM;
-		}
 		adev->pm.dpm.ps[i].ps_priv = ps;
 		k = 0;
 		idx = (u8 *)&power_state->v2.clockInfoIndex[0];
diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
index 81fb4e5dd804..60377747bab4 100644
--- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
+++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
@@ -272,10 +272,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 				 le16_to_cpu(power_info->pplib4.usVddcDependencyOnSCLKOffset));
 			ret = amdgpu_parse_clk_voltage_dep_table(&adev->pm.dpm.dyn_state.vddc_dependency_on_sclk,
 								 dep_table);
-			if (ret) {
-				amdgpu_free_extended_power_table(adev);
+			if (ret)
 				return ret;
-			}
 		}
 		if (power_info->pplib4.usVddciDependencyOnMCLKOffset) {
 			dep_table = (ATOM_PPLIB_Clock_Voltage_Dependency_Table *)
@@ -283,10 +281,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 				 le16_to_cpu(power_info->pplib4.usVddciDependencyOnMCLKOffset));
 			ret = amdgpu_parse_clk_voltage_dep_table(&adev->pm.dpm.dyn_state.vddci_dependency_on_mclk,
 								 dep_table);
-			if (ret) {
-				amdgpu_free_extended_power_table(adev);
+			if (ret)
 				return ret;
-			}
 		}
 		if (power_info->pplib4.usVddcDependencyOnMCLKOffset) {
 			dep_table = (ATOM_PPLIB_Clock_Voltage_Dependency_Table *)
@@ -294,10 +290,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 				 le16_to_cpu(power_info->pplib4.usVddcDependencyOnMCLKOffset));
 			ret = amdgpu_parse_clk_voltage_dep_table(&adev->pm.dpm.dyn_state.vddc_dependency_on_mclk,
 								 dep_table);
-			if (ret) {
-				amdgpu_free_extended_power_table(adev);
+			if (ret)
 				return ret;
-			}
 		}
 		if (power_info->pplib4.usMvddDependencyOnMCLKOffset) {
 			dep_table = (ATOM_PPLIB_Clock_Voltage_Dependency_Table *)
@@ -305,10 +299,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 				 le16_to_cpu(power_info->pplib4.usMvddDependencyOnMCLKOffset));
 			ret = amdgpu_parse_clk_voltage_dep_table(&adev->pm.dpm.dyn_state.mvdd_dependency_on_mclk,
 								 dep_table);
-			if (ret) {
-				amdgpu_free_extended_power_table(adev);
+			if (ret)
 				return ret;
-			}
 		}
 		if (power_info->pplib4.usMaxClockVoltageOnDCOffset) {
 			ATOM_PPLIB_Clock_Voltage_Limit_Table *clk_v =
@@ -339,10 +331,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 				kcalloc(psl->ucNumEntries,
 					sizeof(struct amdgpu_phase_shedding_limits_entry),
 					GFP_KERNEL);
-			if (!adev->pm.dpm.dyn_state.phase_shedding_limits_table.entries) {
-				amdgpu_free_extended_power_table(adev);
+			if (!adev->pm.dpm.dyn_state.phase_shedding_limits_table.entries)
 				return -ENOMEM;
-			}
 
 			entry = &psl->entries[0];
 			for (i = 0; i < psl->ucNumEntries; i++) {
@@ -383,10 +373,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 			ATOM_PPLIB_CAC_Leakage_Record *entry;
 			u32 size = cac_table->ucNumEntries * sizeof(struct amdgpu_cac_leakage_table);
 			adev->pm.dpm.dyn_state.cac_leakage_table.entries = kzalloc(size, GFP_KERNEL);
-			if (!adev->pm.dpm.dyn_state.cac_leakage_table.entries) {
-				amdgpu_free_extended_power_table(adev);
+			if (!adev->pm.dpm.dyn_state.cac_leakage_table.entries)
 				return -ENOMEM;
-			}
 			entry = &cac_table->entries[0];
 			for (i = 0; i < cac_table->ucNumEntries; i++) {
 				if (adev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_EVV) {
@@ -438,10 +426,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 				sizeof(struct amdgpu_vce_clock_voltage_dependency_entry);
 			adev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries =
 				kzalloc(size, GFP_KERNEL);
-			if (!adev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries) {
-				amdgpu_free_extended_power_table(adev);
+			if (!adev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries)
 				return -ENOMEM;
-			}
 			adev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.count =
 				limits->numEntries;
 			entry = &limits->entries[0];
@@ -493,10 +479,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 				sizeof(struct amdgpu_uvd_clock_voltage_dependency_entry);
 			adev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries =
 				kzalloc(size, GFP_KERNEL);
-			if (!adev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries) {
-				amdgpu_free_extended_power_table(adev);
+			if (!adev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries)
 				return -ENOMEM;
-			}
 			adev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.count =
 				limits->numEntries;
 			entry = &limits->entries[0];
@@ -525,10 +509,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 				sizeof(struct amdgpu_clock_voltage_dependency_entry);
 			adev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries =
 				kzalloc(size, GFP_KERNEL);
-			if (!adev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries) {
-				amdgpu_free_extended_power_table(adev);
+			if (!adev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries)
 				return -ENOMEM;
-			}
 			adev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.count =
 				limits->numEntries;
 			entry = &limits->entries[0];
@@ -548,10 +530,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 				 le16_to_cpu(ext_hdr->usPPMTableOffset));
 			adev->pm.dpm.dyn_state.ppm_table =
 				kzalloc(sizeof(struct amdgpu_ppm_table), GFP_KERNEL);
-			if (!adev->pm.dpm.dyn_state.ppm_table) {
-				amdgpu_free_extended_power_table(adev);
+			if (!adev->pm.dpm.dyn_state.ppm_table)
 				return -ENOMEM;
-			}
 			adev->pm.dpm.dyn_state.ppm_table->ppm_design = ppm->ucPpmDesign;
 			adev->pm.dpm.dyn_state.ppm_table->cpu_core_number =
 				le16_to_cpu(ppm->usCpuCoreNumber);
@@ -583,10 +563,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 				sizeof(struct amdgpu_clock_voltage_dependency_entry);
 			adev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries =
 				kzalloc(size, GFP_KERNEL);
-			if (!adev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries) {
-				amdgpu_free_extended_power_table(adev);
+			if (!adev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries)
 				return -ENOMEM;
-			}
 			adev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.count =
 				limits->numEntries;
 			entry = &limits->entries[0];
@@ -606,10 +584,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 			ATOM_PowerTune_Table *pt;
 			adev->pm.dpm.dyn_state.cac_tdp_table =
 				kzalloc(sizeof(struct amdgpu_cac_tdp_table), GFP_KERNEL);
-			if (!adev->pm.dpm.dyn_state.cac_tdp_table) {
-				amdgpu_free_extended_power_table(adev);
+			if (!adev->pm.dpm.dyn_state.cac_tdp_table)
 				return -ENOMEM;
-			}
 			if (rev > 0) {
 				ATOM_PPLIB_POWERTUNE_Table_V1 *ppt = (ATOM_PPLIB_POWERTUNE_Table_V1 *)
 					(mode_info->atom_context->bios + data_offset +
@@ -645,10 +621,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 			ret = amdgpu_parse_clk_voltage_dep_table(
 					&adev->pm.dpm.dyn_state.vddgfx_dependency_on_sclk,
 					dep_table);
-			if (ret) {
-				kfree(adev->pm.dpm.dyn_state.vddgfx_dependency_on_sclk.entries);
+			if (ret)
 				return ret;
-			}
 		}
 	}
 
diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
index fc8e4ac6c8e7..df4f20293c16 100644
--- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
+++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
@@ -7379,10 +7379,9 @@ static int si_dpm_init(struct amdgpu_device *adev)
 		kcalloc(4,
 			sizeof(struct amdgpu_clock_voltage_dependency_entry),
 			GFP_KERNEL);
-	if (!adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries) {
-		amdgpu_free_extended_power_table(adev);
+	if (!adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries)
 		return -ENOMEM;
-	}
+
 	adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.count = 4;
 	adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[0].clk = 0;
 	adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[0].v = 0;
diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
index 914c15387157..aed0e2cefbf9 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
@@ -1371,21 +1371,18 @@ static int pp_set_active_display_count(void *handle, uint32_t count)
 	return phm_set_active_display_count(hwmgr, count);
 }
 
-static int pp_get_asic_baco_capability(void *handle, bool *cap)
+static bool pp_get_asic_baco_capability(void *handle)
 {
 	struct pp_hwmgr *hwmgr = handle;
 
-	*cap = false;
 	if (!hwmgr)
-		return -EINVAL;
+		return false;
 
 	if (!(hwmgr->not_vf && amdgpu_dpm) ||
 		!hwmgr->hwmgr_func->get_asic_baco_capability)
-		return 0;
+		return false;
 
-	hwmgr->hwmgr_func->get_asic_baco_capability(hwmgr, cap);
-
-	return 0;
+	return hwmgr->hwmgr_func->get_asic_baco_capability(hwmgr);
 }
 
 static int pp_get_asic_baco_state(void *handle, int *state)
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/process_pptables_v1_0.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/process_pptables_v1_0.c
index f2a55c1413f5..17882f8dfdd3 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/process_pptables_v1_0.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/process_pptables_v1_0.c
@@ -200,7 +200,7 @@ static int get_platform_power_management_table(
 		struct pp_hwmgr *hwmgr,
 		ATOM_Tonga_PPM_Table *atom_ppm_table)
 {
-	struct phm_ppm_table *ptr = kzalloc(sizeof(ATOM_Tonga_PPM_Table), GFP_KERNEL);
+	struct phm_ppm_table *ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
 	struct phm_ppt_v1_information *pp_table_information =
 		(struct phm_ppt_v1_information *)(hwmgr->pptable);
 
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c
index 044cda005aed..e8a9471c1898 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c
@@ -33,21 +33,20 @@
 #include "smu/smu_7_1_2_d.h"
 #include "smu/smu_7_1_2_sh_mask.h"
 
-int smu7_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap)
+bool smu7_baco_get_capability(struct pp_hwmgr *hwmgr)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev);
 	uint32_t reg;
 
-	*cap = false;
 	if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_BACO))
 		return 0;
 
 	reg = RREG32(mmCC_BIF_BX_FUSESTRAP0);
 
 	if (reg & CC_BIF_BX_FUSESTRAP0__STRAP_BIF_PX_CAPABLE_MASK)
-		*cap = true;
+		return true;
 
-	return 0;
+	return false;
 }
 
 int smu7_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state)
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.h
index be0d98abb536..73a773f4ce2e 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.h
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.h
@@ -25,7 +25,7 @@
 #include "hwmgr.h"
 #include "common_baco.h"
 
-extern int smu7_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap);
+extern bool smu7_baco_get_capability(struct pp_hwmgr *hwmgr);
 extern int smu7_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state);
 extern int smu7_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state);
 
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
index 11372fcc59c8..aa91730e4eaf 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
@@ -2974,6 +2974,8 @@ static int smu7_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 		result = smu7_get_evv_voltages(hwmgr);
 		if (result) {
 			pr_info("Get EVV Voltage Failed.  Abort Driver loading!\n");
+			kfree(hwmgr->backend);
+			hwmgr->backend = NULL;
 			return -EINVAL;
 		}
 	} else {
@@ -3019,8 +3021,10 @@ static int smu7_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 	}
 
 	result = smu7_update_edc_leakage_table(hwmgr);
-	if (result)
+	if (result) {
+		smu7_hwmgr_backend_fini(hwmgr);
 		return result;
+	}
 
 	return 0;
 }
@@ -3995,6 +3999,7 @@ static int smu7_read_sensor(struct pp_hwmgr *hwmgr, int idx,
 	uint32_t sclk, mclk, activity_percent;
 	uint32_t offset, val_vid;
 	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+	struct amdgpu_device *adev = hwmgr->adev;
 
 	/* size must be at least 4 bytes for all sensors */
 	if (*size < 4)
@@ -4038,7 +4043,21 @@ static int smu7_read_sensor(struct pp_hwmgr *hwmgr, int idx,
 		*size = 4;
 		return 0;
 	case AMDGPU_PP_SENSOR_GPU_INPUT_POWER:
-		return smu7_get_gpu_power(hwmgr, (uint32_t *)value);
+		if ((adev->asic_type != CHIP_HAWAII) &&
+		    (adev->asic_type != CHIP_BONAIRE) &&
+		    (adev->asic_type != CHIP_FIJI) &&
+		    (adev->asic_type != CHIP_TONGA))
+			return smu7_get_gpu_power(hwmgr, (uint32_t *)value);
+		else
+			return -EOPNOTSUPP;
+	case AMDGPU_PP_SENSOR_GPU_AVG_POWER:
+		if ((adev->asic_type != CHIP_HAWAII) &&
+		    (adev->asic_type != CHIP_BONAIRE) &&
+		    (adev->asic_type != CHIP_FIJI) &&
+		    (adev->asic_type != CHIP_TONGA))
+			return -EOPNOTSUPP;
+		else
+			return smu7_get_gpu_power(hwmgr, (uint32_t *)value);
 	case AMDGPU_PP_SENSOR_VDDGFX:
 		if ((data->vr_config & VRCONF_VDDGFX_MASK) ==
 		    (VR_SVI2_PLANE_2 << VRCONF_VDDGFX_SHIFT))
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.c
index de0a37f7c632..c66ef9741535 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.c
@@ -28,14 +28,13 @@
 #include "vega10_inc.h"
 #include "smu9_baco.h"
 
-int smu9_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap)
+bool smu9_baco_get_capability(struct pp_hwmgr *hwmgr)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev);
 	uint32_t reg, data;
 
-	*cap = false;
 	if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_BACO))
-		return 0;
+		return false;
 
 	WREG32(0x12074, 0xFFF0003B);
 	data = RREG32(0x12075);
@@ -44,10 +43,10 @@ int smu9_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap)
 		reg = RREG32_SOC15(NBIF, 0, mmRCC_BIF_STRAP0);
 
 		if (reg & RCC_BIF_STRAP0__STRAP_PX_CAPABLE_MASK)
-			*cap = true;
+			return true;
 	}
 
-	return 0;
+	return false;
 }
 
 int smu9_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state)
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.h
index 84e90f801ac3..9ff7c2ea1b58 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.h
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.h
@@ -25,7 +25,7 @@
 #include "hwmgr.h"
 #include "common_baco.h"
 
-extern int smu9_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap);
+extern bool smu9_baco_get_capability(struct pp_hwmgr *hwmgr);
 extern int smu9_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state);
 
 #endif
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c
index 994c0d374bfa..dad4c80aee58 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c
@@ -36,23 +36,22 @@ static const struct soc15_baco_cmd_entry clean_baco_tbl[] = {
 	{CMD_WRITE, SOC15_REG_ENTRY(NBIF, 0, mmBIOS_SCRATCH_7), 0, 0, 0, 0},
 };
 
-int vega20_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap)
+bool vega20_baco_get_capability(struct pp_hwmgr *hwmgr)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev);
 	uint32_t reg;
 
-	*cap = false;
 	if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_BACO))
-		return 0;
+		return false;
 
 	if (((RREG32(0x17569) & 0x20000000) >> 29) == 0x1) {
 		reg = RREG32_SOC15(NBIF, 0, mmRCC_BIF_STRAP0);
 
 		if (reg & RCC_BIF_STRAP0__STRAP_PX_CAPABLE_MASK)
-			*cap = true;
+			return true;
 	}
 
-	return 0;
+	return false;
 }
 
 int vega20_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state)
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h
index f06471e712dc..bdad9c915631 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h
@@ -25,7 +25,7 @@
 #include "hwmgr.h"
 #include "common_baco.h"
 
-extern int vega20_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap);
+extern bool vega20_baco_get_capability(struct pp_hwmgr *hwmgr);
 extern int vega20_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state);
 extern int vega20_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state);
 extern int vega20_baco_apply_vdci_flush_workaround(struct pp_hwmgr *hwmgr);
diff --git a/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h
index 81650727a5de..6f536159df4d 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h
+++ b/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h
@@ -351,7 +351,7 @@ struct pp_hwmgr_func {
 	int (*set_hard_min_fclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t clock);
 	int (*set_hard_min_gfxclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t clock);
 	int (*set_soft_max_gfxclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t clock);
-	int (*get_asic_baco_capability)(struct pp_hwmgr *hwmgr, bool *cap);
+	bool (*get_asic_baco_capability)(struct pp_hwmgr *hwmgr);
 	int (*get_asic_baco_state)(struct pp_hwmgr *hwmgr, enum BACO_STATE *state);
 	int (*set_asic_baco_state)(struct pp_hwmgr *hwmgr, enum BACO_STATE state);
 	int (*get_ppfeature_status)(struct pp_hwmgr *hwmgr, char *buf);
diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c
index 9e4228232f02..ad1fd3150d03 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c
@@ -2298,6 +2298,7 @@ static uint32_t ci_get_mac_definition(uint32_t value)
 	case SMU_MAX_ENTRIES_SMIO:
 		return SMU7_MAX_ENTRIES_SMIO;
 	case SMU_MAX_LEVELS_VDDC:
+	case SMU_MAX_LEVELS_VDDGFX:
 		return SMU7_MAX_LEVELS_VDDC;
 	case SMU_MAX_LEVELS_VDDCI:
 		return SMU7_MAX_LEVELS_VDDCI;
diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c
index 97d9802fe673..17d2f5bff4a7 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c
@@ -2263,6 +2263,7 @@ static uint32_t iceland_get_mac_definition(uint32_t value)
 	case SMU_MAX_ENTRIES_SMIO:
 		return SMU71_MAX_ENTRIES_SMIO;
 	case SMU_MAX_LEVELS_VDDC:
+	case SMU_MAX_LEVELS_VDDGFX:
 		return SMU71_MAX_LEVELS_VDDC;
 	case SMU_MAX_LEVELS_VDDCI:
 		return SMU71_MAX_LEVELS_VDDCI;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 1ead323f1c78..0ad947df777a 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -24,6 +24,7 @@
 
 #include <linux/firmware.h>
 #include <linux/pci.h>
+#include <linux/power_supply.h>
 #include <linux/reboot.h>
 
 #include "amdgpu.h"
@@ -733,7 +734,7 @@ static int smu_early_init(void *handle)
 	smu->adev = adev;
 	smu->pm_enabled = !!amdgpu_dpm;
 	smu->is_apu = false;
-	smu->smu_baco.state = SMU_BACO_STATE_NONE;
+	smu->smu_baco.state = SMU_BACO_STATE_EXIT;
 	smu->smu_baco.platform_support = false;
 	smu->user_dpm_profile.fan_mode = -1;
 
@@ -817,16 +818,8 @@ static int smu_late_init(void *handle)
 	 * handle the switch automatically. Driver involvement
 	 * is unnecessary.
 	 */
-	if (!smu->dc_controlled_by_gpio) {
-		ret = smu_set_power_source(smu,
-					   adev->pm.ac_power ? SMU_POWER_SOURCE_AC :
-					   SMU_POWER_SOURCE_DC);
-		if (ret) {
-			dev_err(adev->dev, "Failed to switch to %s mode!\n",
-				adev->pm.ac_power ? "AC" : "DC");
-			return ret;
-		}
-	}
+	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
+	smu_set_ac_dc(smu);
 
 	if ((amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 1)) ||
 	    (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 3)))
@@ -1322,6 +1315,187 @@ static int smu_get_thermal_temperature_range(struct smu_context *smu)
 	return ret;
 }
 
+/**
+ * smu_wbrf_handle_exclusion_ranges - consume the wbrf exclusion ranges
+ *
+ * @smu: smu_context pointer
+ *
+ * Retrieve the wbrf exclusion ranges and send them to PMFW for proper handling.
+ * Returns 0 on success, error on failure.
+ */
+static int smu_wbrf_handle_exclusion_ranges(struct smu_context *smu)
+{
+	struct wbrf_ranges_in_out wbrf_exclusion = {0};
+	struct freq_band_range *wifi_bands = wbrf_exclusion.band_list;
+	struct amdgpu_device *adev = smu->adev;
+	uint32_t num_of_wbrf_ranges = MAX_NUM_OF_WBRF_RANGES;
+	uint64_t start, end;
+	int ret, i, j;
+
+	ret = amd_wbrf_retrieve_freq_band(adev->dev, &wbrf_exclusion);
+	if (ret) {
+		dev_err(adev->dev, "Failed to retrieve exclusion ranges!\n");
+		return ret;
+	}
+
+	/*
+	 * The exclusion ranges array we got might be filled with holes and duplicate
+	 * entries. For example:
+	 * {(2400, 2500), (0, 0), (6882, 6962), (2400, 2500), (0, 0), (6117, 6189), (0, 0)...}
+	 * We need to do some sortups to eliminate those holes and duplicate entries.
+	 * Expected output: {(2400, 2500), (6117, 6189), (6882, 6962), (0, 0)...}
+	 */
+	for (i = 0; i < num_of_wbrf_ranges; i++) {
+		start = wifi_bands[i].start;
+		end = wifi_bands[i].end;
+
+		/* get the last valid entry to fill the intermediate hole */
+		if (!start && !end) {
+			for (j = num_of_wbrf_ranges - 1; j > i; j--)
+				if (wifi_bands[j].start && wifi_bands[j].end)
+					break;
+
+			/* no valid entry left */
+			if (j <= i)
+				break;
+
+			start = wifi_bands[i].start = wifi_bands[j].start;
+			end = wifi_bands[i].end = wifi_bands[j].end;
+			wifi_bands[j].start = 0;
+			wifi_bands[j].end = 0;
+			num_of_wbrf_ranges = j;
+		}
+
+		/* eliminate duplicate entries */
+		for (j = i + 1; j < num_of_wbrf_ranges; j++) {
+			if ((wifi_bands[j].start == start) && (wifi_bands[j].end == end)) {
+				wifi_bands[j].start = 0;
+				wifi_bands[j].end = 0;
+			}
+		}
+	}
+
+	/* Send the sorted wifi_bands to PMFW */
+	ret = smu_set_wbrf_exclusion_ranges(smu, wifi_bands);
+	/* Try to set the wifi_bands again */
+	if (unlikely(ret == -EBUSY)) {
+		mdelay(5);
+		ret = smu_set_wbrf_exclusion_ranges(smu, wifi_bands);
+	}
+
+	return ret;
+}
+
+/**
+ * smu_wbrf_event_handler - handle notify events
+ *
+ * @nb: notifier block
+ * @action: event type
+ * @_arg: event data
+ *
+ * Calls relevant amdgpu function in response to wbrf event
+ * notification from kernel.
+ */
+static int smu_wbrf_event_handler(struct notifier_block *nb,
+				  unsigned long action, void *_arg)
+{
+	struct smu_context *smu = container_of(nb, struct smu_context, wbrf_notifier);
+
+	switch (action) {
+	case WBRF_CHANGED:
+		schedule_delayed_work(&smu->wbrf_delayed_work,
+				      msecs_to_jiffies(SMU_WBRF_EVENT_HANDLING_PACE));
+		break;
+	default:
+		return NOTIFY_DONE;
+	}
+
+	return NOTIFY_OK;
+}
+
+/**
+ * smu_wbrf_delayed_work_handler - callback on delayed work timer expired
+ *
+ * @work: struct work_struct pointer
+ *
+ * Flood is over and driver will consume the latest exclusion ranges.
+ */
+static void smu_wbrf_delayed_work_handler(struct work_struct *work)
+{
+	struct smu_context *smu = container_of(work, struct smu_context, wbrf_delayed_work.work);
+
+	smu_wbrf_handle_exclusion_ranges(smu);
+}
+
+/**
+ * smu_wbrf_support_check - check wbrf support
+ *
+ * @smu: smu_context pointer
+ *
+ * Verifies the ACPI interface whether wbrf is supported.
+ */
+static void smu_wbrf_support_check(struct smu_context *smu)
+{
+	struct amdgpu_device *adev = smu->adev;
+
+	smu->wbrf_supported = smu_is_asic_wbrf_supported(smu) && amdgpu_wbrf &&
+							acpi_amd_wbrf_supported_consumer(adev->dev);
+
+	if (smu->wbrf_supported)
+		dev_info(adev->dev, "RF interference mitigation is supported\n");
+}
+
+/**
+ * smu_wbrf_init - init driver wbrf support
+ *
+ * @smu: smu_context pointer
+ *
+ * Verifies the AMD ACPI interfaces and registers with the wbrf
+ * notifier chain if wbrf feature is supported.
+ * Returns 0 on success, error on failure.
+ */
+static int smu_wbrf_init(struct smu_context *smu)
+{
+	int ret;
+
+	if (!smu->wbrf_supported)
+		return 0;
+
+	INIT_DELAYED_WORK(&smu->wbrf_delayed_work, smu_wbrf_delayed_work_handler);
+
+	smu->wbrf_notifier.notifier_call = smu_wbrf_event_handler;
+	ret = amd_wbrf_register_notifier(&smu->wbrf_notifier);
+	if (ret)
+		return ret;
+
+	/*
+	 * Some wifiband exclusion ranges may be already there
+	 * before our driver loaded. To make sure our driver
+	 * is awared of those exclusion ranges.
+	 */
+	schedule_delayed_work(&smu->wbrf_delayed_work,
+			      msecs_to_jiffies(SMU_WBRF_EVENT_HANDLING_PACE));
+
+	return 0;
+}
+
+/**
+ * smu_wbrf_fini - tear down driver wbrf support
+ *
+ * @smu: smu_context pointer
+ *
+ * Unregisters with the wbrf notifier chain.
+ */
+static void smu_wbrf_fini(struct smu_context *smu)
+{
+	if (!smu->wbrf_supported)
+		return;
+
+	amd_wbrf_unregister_notifier(&smu->wbrf_notifier);
+
+	cancel_delayed_work_sync(&smu->wbrf_delayed_work);
+}
+
 static int smu_smc_hw_setup(struct smu_context *smu)
 {
 	struct smu_feature *feature = &smu->smu_feature;
@@ -1414,6 +1588,15 @@ static int smu_smc_hw_setup(struct smu_context *smu)
 	if (ret)
 		return ret;
 
+	/* Enable UclkShadow on wbrf supported */
+	if (smu->wbrf_supported) {
+		ret = smu_enable_uclk_shadow(smu, true);
+		if (ret) {
+			dev_err(adev->dev, "Failed to enable UclkShadow feature to support wbrf!\n");
+			return ret;
+		}
+	}
+
 	/*
 	 * With SCPM enabled, these actions(and relevant messages) are
 	 * not needed and permitted.
@@ -1512,6 +1695,15 @@ static int smu_smc_hw_setup(struct smu_context *smu)
 	 */
 	ret = smu_set_min_dcef_deep_sleep(smu,
 					  smu->smu_table.boot_values.dcefclk / 100);
+	if (ret) {
+		dev_err(adev->dev, "Error setting min deepsleep dcefclk\n");
+		return ret;
+	}
+
+	/* Init wbrf support. Properly setup the notifier */
+	ret = smu_wbrf_init(smu);
+	if (ret)
+		dev_err(adev->dev, "Error during wbrf init call\n");
 
 	return ret;
 }
@@ -1567,6 +1759,13 @@ static int smu_hw_init(void *handle)
 		return ret;
 	}
 
+	/*
+	 * Check whether wbrf is supported. This needs to be done
+	 * before SMU setup starts since part of SMU configuration
+	 * relies on this.
+	 */
+	smu_wbrf_support_check(smu);
+
 	if (smu->is_apu) {
 		ret = smu_set_gfx_imu_enable(smu);
 		if (ret)
@@ -1710,6 +1909,16 @@ static int smu_disable_dpms(struct smu_context *smu)
 		}
 	}
 
+	/* Notify SMU RLC is going to be off, stop RLC and SMU interaction.
+	 * otherwise SMU will hang while interacting with RLC if RLC is halted
+	 * this is a WA for Vangogh asic which fix the SMU hang issue.
+	 */
+	ret = smu_notify_rlc_state(smu, false);
+	if (ret) {
+		dev_err(adev->dev, "Fail to notify rlc status!\n");
+		return ret;
+	}
+
 	if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2) &&
 	    !((adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs) &&
 	    !amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs->stop)
@@ -1723,6 +1932,8 @@ static int smu_smc_hw_cleanup(struct smu_context *smu)
 	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	smu_wbrf_fini(smu);
+
 	cancel_work_sync(&smu->throttling_logging_work);
 	cancel_work_sync(&smu->interrupt_work);
 
@@ -1743,31 +1954,10 @@ static int smu_smc_hw_cleanup(struct smu_context *smu)
 	return 0;
 }
 
-static int smu_reset_mp1_state(struct smu_context *smu)
-{
-	struct amdgpu_device *adev = smu->adev;
-	int ret = 0;
-
-	if ((!adev->in_runpm) && (!adev->in_suspend) &&
-		(!amdgpu_in_reset(adev)))
-		switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
-		case IP_VERSION(13, 0, 0):
-		case IP_VERSION(13, 0, 7):
-		case IP_VERSION(13, 0, 10):
-			ret = smu_set_mp1_state(smu, PP_MP1_STATE_UNLOAD);
-			break;
-		default:
-			break;
-		}
-
-	return ret;
-}
-
 static int smu_hw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct smu_context *smu = adev->powerplay.pp_handle;
-	int ret;
 
 	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
 		return 0;
@@ -1785,15 +1975,7 @@ static int smu_hw_fini(void *handle)
 
 	adev->pm.dpm_enabled = false;
 
-	ret = smu_smc_hw_cleanup(smu);
-	if (ret)
-		return ret;
-
-	ret = smu_reset_mp1_state(smu);
-	if (ret)
-		return ret;
-
-	return 0;
+	return smu_smc_hw_cleanup(smu);
 }
 
 static void smu_late_fini(void *handle)
@@ -2492,6 +2674,7 @@ int smu_get_power_limit(void *handle,
 		case SMU_PPT_LIMIT_CURRENT:
 			switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
 			case IP_VERSION(13, 0, 2):
+			case IP_VERSION(13, 0, 6):
 			case IP_VERSION(11, 0, 7):
 			case IP_VERSION(11, 0, 11):
 			case IP_VERSION(11, 0, 12):
@@ -3005,19 +3188,17 @@ static int smu_set_xgmi_pstate(void *handle,
 	return ret;
 }
 
-static int smu_get_baco_capability(void *handle, bool *cap)
+static bool smu_get_baco_capability(void *handle)
 {
 	struct smu_context *smu = handle;
 
-	*cap = false;
-
 	if (!smu->pm_enabled)
-		return 0;
+		return false;
 
-	if (smu->ppt_funcs && smu->ppt_funcs->baco_is_support)
-		*cap = smu->ppt_funcs->baco_is_support(smu);
+	if (!smu->ppt_funcs || !smu->ppt_funcs->baco_is_support)
+		return false;
 
-	return 0;
+	return smu->ppt_funcs->baco_is_support(smu);
 }
 
 static int smu_baco_set_state(void *handle, int state)
@@ -3191,6 +3372,20 @@ static ssize_t smu_sys_get_gpu_metrics(void *handle, void **table)
 	return smu->ppt_funcs->get_gpu_metrics(smu, table);
 }
 
+static ssize_t smu_sys_get_pm_metrics(void *handle, void *pm_metrics,
+				      size_t size)
+{
+	struct smu_context *smu = handle;
+
+	if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled)
+		return -EOPNOTSUPP;
+
+	if (!smu->ppt_funcs->get_pm_metrics)
+		return -EOPNOTSUPP;
+
+	return smu->ppt_funcs->get_pm_metrics(smu, pm_metrics, size);
+}
+
 static int smu_enable_mgpu_fan_boost(void *handle)
 {
 	struct smu_context *smu = handle;
@@ -3332,6 +3527,7 @@ static const struct amd_pm_funcs swsmu_pm_funcs = {
 	.set_df_cstate                    = smu_set_df_cstate,
 	.set_xgmi_pstate                  = smu_set_xgmi_pstate,
 	.get_gpu_metrics                  = smu_sys_get_gpu_metrics,
+	.get_pm_metrics                   = smu_sys_get_pm_metrics,
 	.set_watermarks_for_clock_ranges     = smu_set_watermarks_for_clock_ranges,
 	.display_disable_memory_clock_switch = smu_display_disable_memory_clock_switch,
 	.get_max_sustainable_clocks_by_dc    = smu_get_max_sustainable_clocks_by_dc,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index 8def291b18bc..66e84defd0b6 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -22,6 +22,9 @@
 #ifndef __AMDGPU_SMU_H__
 #define __AMDGPU_SMU_H__
 
+#include <linux/acpi_amd_wbrf.h>
+#include <linux/units.h>
+
 #include "amdgpu.h"
 #include "kgd_pp_interface.h"
 #include "dm_pp_interface.h"
@@ -253,6 +256,7 @@ struct smu_table {
 	uint64_t mc_address;
 	void *cpu_addr;
 	struct amdgpu_bo *bo;
+	uint32_t version;
 };
 
 enum smu_perf_level_designation {
@@ -317,6 +321,7 @@ enum smu_table_id {
 	SMU_TABLE_PACE,
 	SMU_TABLE_ECCINFO,
 	SMU_TABLE_COMBO_PPTABLE,
+	SMU_TABLE_WIFIBAND,
 	SMU_TABLE_COUNT,
 };
 
@@ -419,7 +424,6 @@ enum smu_reset_mode {
 enum smu_baco_state {
 	SMU_BACO_STATE_ENTER = 0,
 	SMU_BACO_STATE_EXIT,
-	SMU_BACO_STATE_NONE,
 };
 
 struct smu_baco_context {
@@ -470,6 +474,12 @@ struct stb_context {
 
 #define WORKLOAD_POLICY_MAX 7
 
+/*
+ * Configure wbrf event handling pace as there can be only one
+ * event processed every SMU_WBRF_EVENT_HANDLING_PACE ms.
+ */
+#define SMU_WBRF_EVENT_HANDLING_PACE	10
+
 struct smu_context {
 	struct amdgpu_device            *adev;
 	struct amdgpu_irq_src		irq_source;
@@ -569,6 +579,11 @@ struct smu_context {
 	struct delayed_work		swctf_delayed_work;
 
 	enum pp_xgmi_plpd_mode plpd_mode;
+
+	/* data structures for wbrf feature support */
+	bool				wbrf_supported;
+	struct notifier_block		wbrf_notifier;
+	struct delayed_work		wbrf_delayed_work;
 };
 
 struct i2c_adapter;
@@ -1253,6 +1268,15 @@ struct pptable_funcs {
 	ssize_t (*get_gpu_metrics)(struct smu_context *smu, void **table);
 
 	/**
+	 * @get_pm_metrics: Get one snapshot of power management metrics from
+	 * PMFW.
+	 *
+	 * Return: Size of the metrics sample
+	 */
+	ssize_t (*get_pm_metrics)(struct smu_context *smu, void *pm_metrics,
+				  size_t size);
+
+	/**
 	 * @enable_mgpu_fan_boost: Enable multi-GPU fan boost.
 	 */
 	int (*enable_mgpu_fan_boost)(struct smu_context *smu);
@@ -1360,6 +1384,27 @@ struct pptable_funcs {
 	 *                       management.
 	 */
 	int (*dpm_set_umsch_mm_enable)(struct smu_context *smu, bool enable);
+
+	/**
+	 * @notify_rlc_state: Notify RLC power state to SMU.
+	 */
+	int (*notify_rlc_state)(struct smu_context *smu, bool en);
+
+	/**
+	 * @is_asic_wbrf_supported: check whether PMFW supports the wbrf feature
+	 */
+	bool (*is_asic_wbrf_supported)(struct smu_context *smu);
+
+	/**
+	 * @enable_uclk_shadow: Enable the uclk shadow feature on wbrf supported
+	 */
+	int (*enable_uclk_shadow)(struct smu_context *smu, bool enable);
+
+	/**
+	 * @set_wbrf_exclusion_ranges: notify SMU the wifi bands occupied
+	 */
+	int (*set_wbrf_exclusion_ranges)(struct smu_context *smu,
+					struct freq_band_range *exclusion_ranges);
 };
 
 typedef enum {
@@ -1403,6 +1448,16 @@ typedef enum {
 	METRICS_PCIE_WIDTH,
 	METRICS_CURR_FANPWM,
 	METRICS_CURR_SOCKETPOWER,
+	METRICS_AVERAGE_VPECLK,
+	METRICS_AVERAGE_IPUCLK,
+	METRICS_AVERAGE_MPIPUCLK,
+	METRICS_THROTTLER_RESIDENCY_PROCHOT,
+	METRICS_THROTTLER_RESIDENCY_SPL,
+	METRICS_THROTTLER_RESIDENCY_FPPT,
+	METRICS_THROTTLER_RESIDENCY_SPPT,
+	METRICS_THROTTLER_RESIDENCY_THM_CORE,
+	METRICS_THROTTLER_RESIDENCY_THM_GFX,
+	METRICS_THROTTLER_RESIDENCY_THM_SOC,
 } MetricsMember_t;
 
 enum smu_cmn2asic_mapping_type {
@@ -1476,6 +1531,17 @@ enum smu_baco_seq {
 			 __dst_size);					   \
 })
 
+typedef struct {
+	uint16_t     LowFreq;
+	uint16_t     HighFreq;
+} WifiOneBand_t;
+
+typedef struct {
+	uint32_t		WifiBandEntryNum;
+	WifiOneBand_t	WifiBandEntry[11];
+	uint32_t		MmHubPadding[8];
+} WifiBandEntryTable_t;
+
 #if !defined(SWSMU_CODE_LAYER_L2) && !defined(SWSMU_CODE_LAYER_L3) && !defined(SWSMU_CODE_LAYER_L4)
 int smu_get_power_limit(void *handle,
 			uint32_t *limit,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
index 9dd1ed5b8940..b114d14fc053 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
@@ -1615,7 +1615,8 @@ typedef struct {
 #define TABLE_I2C_COMMANDS            9
 #define TABLE_DRIVER_INFO             10
 #define TABLE_ECCINFO                 11
-#define TABLE_COUNT                   12
+#define TABLE_WIFIBAND                12
+#define TABLE_COUNT                   13
 
 //IH Interupt ID
 #define IH_INTERRUPT_ID_TO_DRIVER                   0xFE
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
index 62b7c0daff68..8b1496f8ce58 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
@@ -1605,7 +1605,8 @@ typedef struct {
 #define TABLE_I2C_COMMANDS            9
 #define TABLE_DRIVER_INFO             10
 #define TABLE_ECCINFO                 11
-#define TABLE_COUNT                   12
+#define TABLE_WIFIBAND                12
+#define TABLE_COUNT                   13
 
 //IH Interupt ID
 #define IH_INTERRUPT_ID_TO_DRIVER                   0xFE
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h
index 22f88842a7fd..5bb7a63c0602 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h
@@ -24,11 +24,6 @@
 #ifndef SMU14_DRIVER_IF_V14_0_0_H
 #define SMU14_DRIVER_IF_V14_0_0_H
 
-// *** IMPORTANT ***
-// SMU TEAM: Always increment the interface version if
-// any structure is changed in this file
-#define PMFW_DRIVER_IF_VERSION 6
-
 typedef struct {
   int32_t value;
   uint32_t numFractionalBits;
@@ -150,37 +145,50 @@ typedef struct {
 } DpmClocks_t;
 
 typedef struct {
-  uint16_t CoreFrequency[16];        //Target core frequency [MHz]
-  uint16_t CorePower[16];            //CAC calculated core power [mW]
-  uint16_t CoreTemperature[16];      //TSEN measured core temperature [centi-C]
-  uint16_t GfxTemperature;           //TSEN measured GFX temperature [centi-C]
-  uint16_t SocTemperature;           //TSEN measured SOC temperature [centi-C]
-  uint16_t StapmOpnLimit;            //Maximum IRM defined STAPM power limit [mW]
-  uint16_t StapmCurrentLimit;        //Time filtered STAPM power limit [mW]
-  uint16_t InfrastructureCpuMaxFreq; //CCLK frequency limit enforced on classic cores [MHz]
-  uint16_t InfrastructureGfxMaxFreq; //GFXCLK frequency limit enforced on GFX [MHz]
-  uint16_t SkinTemp;                 //Maximum skin temperature reported by APU and HS2 chassis sensors [centi-C]
-  uint16_t GfxclkFrequency;          //Time filtered target GFXCLK frequency [MHz]
-  uint16_t FclkFrequency;            //Time filtered target FCLK frequency [MHz]
-  uint16_t GfxActivity;              //Time filtered GFX busy % [0-100]
-  uint16_t SocclkFrequency;          //Time filtered target SOCCLK frequency [MHz]
-  uint16_t VclkFrequency;            //Time filtered target VCLK frequency [MHz]
-  uint16_t VcnActivity;              //Time filtered VCN busy % [0-100]
-  uint16_t VpeclkFrequency;          //Time filtered target VPECLK frequency [MHz]
-  uint16_t IpuclkFrequency;          //Time filtered target IPUCLK frequency [MHz]
-  uint16_t IpuBusy[8];               //Time filtered IPU per-column busy % [0-100]
-  uint16_t DRAMReads;                //Time filtered DRAM read bandwidth [MB/sec]
-  uint16_t DRAMWrites;               //Time filtered DRAM write bandwidth [MB/sec]
-  uint16_t CoreC0Residency[16];      //Time filtered per-core C0 residency % [0-100]
-  uint16_t IpuPower;                 //Time filtered IPU power [mW]
-  uint32_t ApuPower;                 //Time filtered APU power [mW]
-  uint32_t GfxPower;                 //Time filtered GFX power [mW]
-  uint32_t dGpuPower;                //Time filtered dGPU power [mW]
-  uint32_t SocketPower;              //Time filtered power used for PPT/STAPM [APU+dGPU] [mW]
-  uint32_t AllCorePower;             //Time filtered sum of core power across all cores in the socket [mW]
-  uint32_t FilterAlphaValue;         //Metrics table alpha filter time constant [us]
-  uint32_t MetricsCounter;           //Counter that is incremented on every metrics table update [PM_TIMER cycles]
-  uint32_t spare[16];
+  uint16_t CoreFrequency[16];          //Target core frequency [MHz]
+  uint16_t CorePower[16];              //CAC calculated core power [mW]
+  uint16_t CoreTemperature[16];        //TSEN measured core temperature [centi-C]
+  uint16_t GfxTemperature;             //TSEN measured GFX temperature [centi-C]
+  uint16_t SocTemperature;             //TSEN measured SOC temperature [centi-C]
+  uint16_t StapmOpnLimit;              //Maximum IRM defined STAPM power limit [mW]
+  uint16_t StapmCurrentLimit;          //Time filtered STAPM power limit [mW]
+  uint16_t InfrastructureCpuMaxFreq;   //CCLK frequency limit enforced on classic cores [MHz]
+  uint16_t InfrastructureGfxMaxFreq;   //GFXCLK frequency limit enforced on GFX [MHz]
+  uint16_t SkinTemp;                   //Maximum skin temperature reported by APU and HS2 chassis sensors [centi-C]
+  uint16_t GfxclkFrequency;            //Time filtered target GFXCLK frequency [MHz]
+  uint16_t FclkFrequency;              //Time filtered target FCLK frequency [MHz]
+  uint16_t GfxActivity;                //Time filtered GFX busy % [0-100]
+  uint16_t SocclkFrequency;            //Time filtered target SOCCLK frequency [MHz]
+  uint16_t VclkFrequency;              //Time filtered target VCLK frequency [MHz]
+  uint16_t VcnActivity;                //Time filtered VCN busy % [0-100]
+  uint16_t VpeclkFrequency;            //Time filtered target VPECLK frequency [MHz]
+  uint16_t IpuclkFrequency;            //Time filtered target IPUCLK frequency [MHz]
+  uint16_t IpuBusy[8];                 //Time filtered IPU per-column busy % [0-100]
+  uint16_t DRAMReads;                  //Time filtered DRAM read bandwidth [MB/sec]
+  uint16_t DRAMWrites;                 //Time filtered DRAM write bandwidth [MB/sec]
+  uint16_t CoreC0Residency[16];        //Time filtered per-core C0 residency % [0-100]
+  uint16_t IpuPower;                   //Time filtered IPU power [mW]
+  uint32_t ApuPower;                   //Time filtered APU power [mW]
+  uint32_t GfxPower;                   //Time filtered GFX power [mW]
+  uint32_t dGpuPower;                  //Time filtered dGPU power [mW]
+  uint32_t SocketPower;                //Time filtered power used for PPT/STAPM [APU+dGPU] [mW]
+  uint32_t AllCorePower;               //Time filtered sum of core power across all cores in the socket [mW]
+  uint32_t FilterAlphaValue;           //Metrics table alpha filter time constant [us]
+  uint32_t MetricsCounter;             //Counter that is incremented on every metrics table update [PM_TIMER cycles]
+  uint16_t MemclkFrequency;            //Time filtered target MEMCLK frequency [MHz]
+  uint16_t MpipuclkFrequency;          //Time filtered target MPIPUCLK frequency [MHz]
+  uint16_t IpuReads;                   //Time filtered IPU read bandwidth [MB/sec]
+  uint16_t IpuWrites;                  //Time filtered IPU write bandwidth [MB/sec]
+  uint32_t ThrottleResidency_PROCHOT;  //Counter that is incremented on every metrics table update when PROCHOT was engaged [PM_TIMER cycles]
+  uint32_t ThrottleResidency_SPL;      //Counter that is incremented on every metrics table update when SPL was engaged [PM_TIMER cycles]
+  uint32_t ThrottleResidency_FPPT;     //Counter that is incremented on every metrics table update when fast PPT was engaged [PM_TIMER cycles]
+  uint32_t ThrottleResidency_SPPT;     //Counter that is incremented on every metrics table update when slow PPT was engaged [PM_TIMER cycles]
+  uint32_t ThrottleResidency_THM_CORE; //Counter that is incremented on every metrics table update when CORE thermal throttling was engaged [PM_TIMER cycles]
+  uint32_t ThrottleResidency_THM_GFX;  //Counter that is incremented on every metrics table update when GFX thermal throttling was engaged [PM_TIMER cycles]
+  uint32_t ThrottleResidency_THM_SOC;  //Counter that is incremented on every metrics table update when SOC thermal throttling was engaged [PM_TIMER cycles]
+  uint16_t Psys;                       //Time filtered Psys power [mW]
+  uint16_t spare1;
+  uint32_t spare[6];
 } SmuMetrics_t;
 
 //ISP tile definitions
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h
index e2ee855c7748..e862d323caab 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h
@@ -138,10 +138,9 @@
 #define PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel 0x4A
 #define PPSMC_MSG_SetPriorityDeltaGain           0x4B
 #define PPSMC_MSG_AllowIHHostInterrupt           0x4C
-
 #define PPSMC_MSG_DALNotPresent                  0x4E
-
-#define PPSMC_Message_Count                      0x4F
+#define PPSMC_MSG_EnableUCLKShadow               0x51
+#define PPSMC_Message_Count                      0x52
 
 //Debug Dump Message
 #define DEBUGSMC_MSG_TestMessage                    0x1
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
index fef2d290f3f2..7b812b9994d7 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
@@ -123,7 +123,7 @@ typedef enum {
   VOLTAGE_GUARDBAND_COUNT
 } GFX_GUARDBAND_e;
 
-#define SMU_METRICS_TABLE_VERSION 0x9
+#define SMU_METRICS_TABLE_VERSION 0xB
 
 typedef struct __attribute__((packed, aligned(4))) {
   uint32_t AccumulationCounter;
@@ -219,7 +219,103 @@ typedef struct __attribute__((packed, aligned(4))) {
   uint32_t PCIenReplayARolloverCountAcc;  // The Pcie counter itself is accumulated
   uint32_t PCIeNAKSentCountAcc;           // The Pcie counter itself is accumulated
   uint32_t PCIeNAKReceivedCountAcc;       // The Pcie counter itself is accumulated
-} MetricsTable_t;
+
+  // VCN/JPEG ACTIVITY
+  uint32_t VcnBusy[4];
+  uint32_t JpegBusy[32];
+} MetricsTableX_t;
+
+typedef struct __attribute__((packed, aligned(4))) {
+  uint32_t AccumulationCounter;
+
+  //TEMPERATURE
+  uint32_t MaxSocketTemperature;
+  uint32_t MaxVrTemperature;
+  uint32_t MaxHbmTemperature;
+  uint64_t MaxSocketTemperatureAcc;
+  uint64_t MaxVrTemperatureAcc;
+  uint64_t MaxHbmTemperatureAcc;
+
+  //POWER
+  uint32_t SocketPowerLimit;
+  uint32_t MaxSocketPowerLimit;
+  uint32_t SocketPower;
+
+  //ENERGY
+  uint64_t Timestamp;
+  uint64_t SocketEnergyAcc;
+  uint64_t CcdEnergyAcc;
+  uint64_t XcdEnergyAcc;
+  uint64_t AidEnergyAcc;
+  uint64_t HbmEnergyAcc;
+
+  //FREQUENCY
+  uint32_t CclkFrequencyLimit;
+  uint32_t GfxclkFrequencyLimit;
+  uint32_t FclkFrequency;
+  uint32_t UclkFrequency;
+  uint32_t SocclkFrequency[4];
+  uint32_t VclkFrequency[4];
+  uint32_t DclkFrequency[4];
+  uint32_t LclkFrequency[4];
+  uint64_t GfxclkFrequencyAcc[8];
+  uint64_t CclkFrequencyAcc[96];
+
+  //FREQUENCY RANGE
+  uint32_t MaxCclkFrequency;
+  uint32_t MinCclkFrequency;
+  uint32_t MaxGfxclkFrequency;
+  uint32_t MinGfxclkFrequency;
+  uint32_t FclkFrequencyTable[4];
+  uint32_t UclkFrequencyTable[4];
+  uint32_t SocclkFrequencyTable[4];
+  uint32_t VclkFrequencyTable[4];
+  uint32_t DclkFrequencyTable[4];
+  uint32_t LclkFrequencyTable[4];
+  uint32_t MaxLclkDpmRange;
+  uint32_t MinLclkDpmRange;
+
+  //XGMI
+  uint32_t XgmiWidth;
+  uint32_t XgmiBitrate;
+  uint64_t XgmiReadBandwidthAcc[8];
+  uint64_t XgmiWriteBandwidthAcc[8];
+
+  //ACTIVITY
+  uint32_t SocketC0Residency;
+  uint32_t SocketGfxBusy;
+  uint32_t DramBandwidthUtilization;
+  uint64_t SocketC0ResidencyAcc;
+  uint64_t SocketGfxBusyAcc;
+  uint64_t DramBandwidthAcc;
+  uint32_t MaxDramBandwidth;
+  uint64_t DramBandwidthUtilizationAcc;
+  uint64_t PcieBandwidthAcc[4];
+
+  //THROTTLERS
+  uint32_t ProchotResidencyAcc;
+  uint32_t PptResidencyAcc;
+  uint32_t SocketThmResidencyAcc;
+  uint32_t VrThmResidencyAcc;
+  uint32_t HbmThmResidencyAcc;
+  uint32_t GfxLockXCDMak;
+
+  // New Items at end to maintain driver compatibility
+  uint32_t GfxclkFrequency[8];
+
+  //PSNs
+  uint64_t PublicSerialNumber_AID[4];
+  uint64_t PublicSerialNumber_XCD[8];
+  uint64_t PublicSerialNumber_CCD[12];
+
+  //XGMI Data tranfser size
+  uint64_t XgmiReadDataSizeAcc[8];//in KByte
+  uint64_t XgmiWriteDataSizeAcc[8];//in KByte
+
+  // VCN/JPEG ACTIVITY
+  uint32_t VcnBusy[4];
+  uint32_t JpegBusy[32];
+} MetricsTableA_t;
 
 #define SMU_VF_METRICS_TABLE_VERSION 0x3
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h
index 6aaefca9b595..a6bf9cdd130e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h
@@ -134,6 +134,7 @@
 #define PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel 0x4A
 #define PPSMC_MSG_SetPriorityDeltaGain           0x4B
 #define PPSMC_MSG_AllowIHHostInterrupt           0x4C
-#define PPSMC_Message_Count                      0x4D
+#define PPSMC_MSG_EnableUCLKShadow               0x51
+#define PPSMC_Message_Count                      0x52
 
 #endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
index 9dd47d91093e..953a767613b1 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
@@ -259,7 +259,9 @@
 	__SMU_DUMMY_MAP(PowerUpUmsch),	\
 	__SMU_DUMMY_MAP(PowerDownUmsch),	\
 	__SMU_DUMMY_MAP(SetSoftMaxVpe),	\
-	__SMU_DUMMY_MAP(SetSoftMinVpe),
+	__SMU_DUMMY_MAP(SetSoftMinVpe), \
+	__SMU_DUMMY_MAP(GetMetricsVersion), \
+	__SMU_DUMMY_MAP(EnableUCLKShadow),
 
 #undef __SMU_DUMMY_MAP
 #define __SMU_DUMMY_MAP(type)	SMU_MSG_##type
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
index 95cb919718ae..fbd57fa1a004 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
@@ -210,15 +210,8 @@ int smu_v13_0_set_azalia_d3_pme(struct smu_context *smu);
 int smu_v13_0_get_max_sustainable_clocks_by_dc(struct smu_context *smu,
 					       struct pp_smu_nv_clock_table *max_clocks);
 
-int smu_v13_0_baco_set_armd3_sequence(struct smu_context *smu,
-				      enum smu_baco_seq baco_seq);
-
 bool smu_v13_0_baco_is_support(struct smu_context *smu);
 
-enum smu_baco_state smu_v13_0_baco_get_state(struct smu_context *smu);
-
-int smu_v13_0_baco_set_state(struct smu_context *smu, enum smu_baco_state state);
-
 int smu_v13_0_baco_enter(struct smu_context *smu);
 int smu_v13_0_baco_exit(struct smu_context *smu);
 
@@ -301,5 +294,9 @@ int smu_v13_0_update_pcie_parameters(struct smu_context *smu,
 
 int smu_v13_0_disable_pmfw_state(struct smu_context *smu);
 
+int smu_v13_0_enable_uclk_shadow(struct smu_context *smu, bool enable);
+
+int smu_v13_0_set_wbrf_exclusion_ranges(struct smu_context *smu,
+						 struct freq_band_range *exclusion_ranges);
 #endif
 #endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h
index a5b569976f19..3f7463c1c1a9 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h
@@ -26,8 +26,8 @@
 #include "amdgpu_smu.h"
 
 #define SMU14_DRIVER_IF_VERSION_INV 0xFFFFFFFF
+#define SMU14_DRIVER_IF_VERSION_SMU_V14_0_0 0x7
 #define SMU14_DRIVER_IF_VERSION_SMU_V14_0_2 0x1
-#define SMU14_DRIVER_IF_VERSION_SMU_V14_0_0 0x6
 
 #define FEATURE_MASK(feature) (1ULL << feature)
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
index 2cb6b68222ba..4cd43bbec910 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
@@ -2407,8 +2407,6 @@ static const struct pptable_funcs arcturus_ppt_funcs = {
 	.set_azalia_d3_pme = smu_v11_0_set_azalia_d3_pme,
 	.get_max_sustainable_clocks_by_dc = smu_v11_0_get_max_sustainable_clocks_by_dc,
 	.baco_is_support = smu_v11_0_baco_is_support,
-	.baco_get_state = smu_v11_0_baco_get_state,
-	.baco_set_state = smu_v11_0_baco_set_state,
 	.baco_enter = smu_v11_0_baco_enter,
 	.baco_exit = smu_v11_0_baco_exit,
 	.get_dpm_ultimate_freq = smu_v11_0_get_dpm_ultimate_freq,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index a38233cc5b7f..8d1d29ffb0f1 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -3537,8 +3537,6 @@ static const struct pptable_funcs navi10_ppt_funcs = {
 	.set_azalia_d3_pme = smu_v11_0_set_azalia_d3_pme,
 	.get_max_sustainable_clocks_by_dc = smu_v11_0_get_max_sustainable_clocks_by_dc,
 	.baco_is_support = smu_v11_0_baco_is_support,
-	.baco_get_state = smu_v11_0_baco_get_state,
-	.baco_set_state = smu_v11_0_baco_set_state,
 	.baco_enter = navi10_baco_enter,
 	.baco_exit = navi10_baco_exit,
 	.get_dpm_ultimate_freq = smu_v11_0_get_dpm_ultimate_freq,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index 1de9f8b5cc5f..21fc033528fa 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -4428,8 +4428,6 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = {
 	.set_azalia_d3_pme = smu_v11_0_set_azalia_d3_pme,
 	.get_max_sustainable_clocks_by_dc = smu_v11_0_get_max_sustainable_clocks_by_dc,
 	.baco_is_support = smu_v11_0_baco_is_support,
-	.baco_get_state = smu_v11_0_baco_get_state,
-	.baco_set_state = smu_v11_0_baco_set_state,
 	.baco_enter = sienna_cichlid_baco_enter,
 	.baco_exit = sienna_cichlid_baco_exit,
 	.mode1_reset_is_support = sienna_cichlid_is_mode1_reset_supported,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
index 5a314d0316c1..c7bfa68bf00f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
@@ -1442,10 +1442,12 @@ static int smu_v11_0_irq_process(struct amdgpu_device *adev,
 			case 0x3:
 				dev_dbg(adev->dev, "Switched to AC mode!\n");
 				schedule_work(&smu->interrupt_work);
+				adev->pm.ac_power = true;
 				break;
 			case 0x4:
 				dev_dbg(adev->dev, "Switched to DC mode!\n");
 				schedule_work(&smu->interrupt_work);
+				adev->pm.ac_power = false;
 				break;
 			case 0x7:
 				/*
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
index 762b31455a0b..2ff6deedef95 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
@@ -2193,8 +2193,7 @@ static int vangogh_get_dpm_clock_table(struct smu_context *smu, struct dpm_clock
 	return 0;
 }
 
-
-static int vangogh_system_features_control(struct smu_context *smu, bool en)
+static int vangogh_notify_rlc_state(struct smu_context *smu, bool en)
 {
 	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
@@ -2523,7 +2522,7 @@ static const struct pptable_funcs vangogh_ppt_funcs = {
 	.print_clk_levels = vangogh_common_print_clk_levels,
 	.set_default_dpm_table = vangogh_set_default_dpm_tables,
 	.set_fine_grain_gfx_freq_parameters = vangogh_set_fine_grain_gfx_freq_parameters,
-	.system_features_control = vangogh_system_features_control,
+	.notify_rlc_state = vangogh_notify_rlc_state,
 	.feature_is_enabled = smu_cmn_feature_is_enabled,
 	.set_power_profile_mode = vangogh_set_power_profile_mode,
 	.get_power_profile_mode = vangogh_get_power_profile_mode,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
index 1a6675d70a4b..dd9bcbd630a1 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
@@ -257,8 +257,11 @@ static int aldebaran_tables_init(struct smu_context *smu)
 	}
 
 	smu_table->ecc_table = kzalloc(tables[SMU_TABLE_ECCINFO].size, GFP_KERNEL);
-	if (!smu_table->ecc_table)
+	if (!smu_table->ecc_table) {
+		kfree(smu_table->metrics_table);
+		kfree(smu_table->gpu_metrics_table);
 		return -ENOMEM;
+	}
 
 	return 0;
 }
@@ -1527,7 +1530,6 @@ static int aldebaran_i2c_control_init(struct smu_context *smu)
 	smu_i2c->port = 0;
 	mutex_init(&smu_i2c->mutex);
 	control->owner = THIS_MODULE;
-	control->class = I2C_CLASS_SPD;
 	control->dev.parent = &adev->pdev->dev;
 	control->algo = &aldebaran_i2c_algo;
 	snprintf(control->name, sizeof(control->name), "AMDGPU SMU 0");
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index cf1b84060bc3..c486182ff275 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -1379,10 +1379,12 @@ static int smu_v13_0_irq_process(struct amdgpu_device *adev,
 			case 0x3:
 				dev_dbg(adev->dev, "Switched to AC mode!\n");
 				smu_v13_0_ack_ac_dc_interrupt(smu);
+				adev->pm.ac_power = true;
 				break;
 			case 0x4:
 				dev_dbg(adev->dev, "Switched to DC mode!\n");
 				smu_v13_0_ack_ac_dc_interrupt(smu);
+				adev->pm.ac_power = false;
 				break;
 			case 0x7:
 				/*
@@ -2199,7 +2201,7 @@ int smu_v13_0_gfx_ulv_control(struct smu_context *smu,
 	return ret;
 }
 
-int smu_v13_0_baco_set_armd3_sequence(struct smu_context *smu,
+static int smu_v13_0_baco_set_armd3_sequence(struct smu_context *smu,
 				      enum smu_baco_seq baco_seq)
 {
 	struct smu_baco_context *smu_baco = &smu->smu_baco;
@@ -2221,33 +2223,14 @@ int smu_v13_0_baco_set_armd3_sequence(struct smu_context *smu,
 	return 0;
 }
 
-bool smu_v13_0_baco_is_support(struct smu_context *smu)
-{
-	struct smu_baco_context *smu_baco = &smu->smu_baco;
-
-	if (amdgpu_sriov_vf(smu->adev) ||
-	    !smu_baco->platform_support)
-		return false;
-
-	/* return true if ASIC is in BACO state already */
-	if (smu_v13_0_baco_get_state(smu) == SMU_BACO_STATE_ENTER)
-		return true;
-
-	if (smu_cmn_feature_is_supported(smu, SMU_FEATURE_BACO_BIT) &&
-	    !smu_cmn_feature_is_enabled(smu, SMU_FEATURE_BACO_BIT))
-		return false;
-
-	return true;
-}
-
-enum smu_baco_state smu_v13_0_baco_get_state(struct smu_context *smu)
+static enum smu_baco_state smu_v13_0_baco_get_state(struct smu_context *smu)
 {
 	struct smu_baco_context *smu_baco = &smu->smu_baco;
 
 	return smu_baco->state;
 }
 
-int smu_v13_0_baco_set_state(struct smu_context *smu,
+static int smu_v13_0_baco_set_state(struct smu_context *smu,
 			     enum smu_baco_state state)
 {
 	struct smu_baco_context *smu_baco = &smu->smu_baco;
@@ -2281,24 +2264,60 @@ int smu_v13_0_baco_set_state(struct smu_context *smu,
 	return ret;
 }
 
-int smu_v13_0_baco_enter(struct smu_context *smu)
+bool smu_v13_0_baco_is_support(struct smu_context *smu)
 {
-	int ret = 0;
+	struct smu_baco_context *smu_baco = &smu->smu_baco;
 
-	ret = smu_v13_0_baco_set_state(smu,
-				       SMU_BACO_STATE_ENTER);
-	if (ret)
-		return ret;
+	if (amdgpu_sriov_vf(smu->adev) || !smu_baco->platform_support)
+		return false;
+
+	/* return true if ASIC is in BACO state already */
+	if (smu_v13_0_baco_get_state(smu) == SMU_BACO_STATE_ENTER)
+		return true;
 
-	msleep(10);
+	if (smu_cmn_feature_is_supported(smu, SMU_FEATURE_BACO_BIT) &&
+	    !smu_cmn_feature_is_enabled(smu, SMU_FEATURE_BACO_BIT))
+		return false;
 
-	return ret;
+	return true;
+}
+
+int smu_v13_0_baco_enter(struct smu_context *smu)
+{
+	struct smu_baco_context *smu_baco = &smu->smu_baco;
+	struct amdgpu_device *adev = smu->adev;
+	int ret;
+
+	if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) {
+		return smu_v13_0_baco_set_armd3_sequence(smu,
+				(smu_baco->maco_support && amdgpu_runtime_pm != 1) ?
+					BACO_SEQ_BAMACO : BACO_SEQ_BACO);
+	} else {
+		ret = smu_v13_0_baco_set_state(smu, SMU_BACO_STATE_ENTER);
+		if (!ret)
+			usleep_range(10000, 11000);
+
+		return ret;
+	}
 }
 
 int smu_v13_0_baco_exit(struct smu_context *smu)
 {
-	return smu_v13_0_baco_set_state(smu,
-					SMU_BACO_STATE_EXIT);
+	struct amdgpu_device *adev = smu->adev;
+	int ret;
+
+	if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) {
+		/* Wait for PMFW handling for the Dstate change */
+		usleep_range(10000, 11000);
+		ret = smu_v13_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS);
+	} else {
+		ret = smu_v13_0_baco_set_state(smu, SMU_BACO_STATE_EXIT);
+	}
+
+	if (!ret)
+		adev->gfx.is_poweron = false;
+
+	return ret;
 }
 
 int smu_v13_0_set_gfx_power_up_by_imu(struct smu_context *smu)
@@ -2490,3 +2509,51 @@ int smu_v13_0_disable_pmfw_state(struct smu_context *smu)
 
 	return ret == 0 ? 0 : -EINVAL;
 }
+
+int smu_v13_0_enable_uclk_shadow(struct smu_context *smu, bool enable)
+{
+	return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_EnableUCLKShadow, enable, NULL);
+}
+
+int smu_v13_0_set_wbrf_exclusion_ranges(struct smu_context *smu,
+						 struct freq_band_range *exclusion_ranges)
+{
+	WifiBandEntryTable_t wifi_bands;
+	int valid_entries = 0;
+	int ret, i;
+
+	memset(&wifi_bands, 0, sizeof(wifi_bands));
+	for (i = 0; i < ARRAY_SIZE(wifi_bands.WifiBandEntry); i++) {
+		if (!exclusion_ranges[i].start && !exclusion_ranges[i].end)
+			break;
+
+		/* PMFW expects the inputs to be in Mhz unit */
+		wifi_bands.WifiBandEntry[valid_entries].LowFreq =
+			DIV_ROUND_DOWN_ULL(exclusion_ranges[i].start, HZ_PER_MHZ);
+		wifi_bands.WifiBandEntry[valid_entries++].HighFreq =
+			DIV_ROUND_UP_ULL(exclusion_ranges[i].end, HZ_PER_MHZ);
+	}
+	wifi_bands.WifiBandEntryNum = valid_entries;
+
+	/*
+	 * Per confirm with PMFW team, WifiBandEntryNum = 0
+	 * is a valid setting.
+	 *
+	 * Considering the scenarios below:
+	 * - At first the wifi device adds an exclusion range e.g. (2400,2500) to
+	 *   BIOS and our driver gets notified. We will set WifiBandEntryNum = 1
+	 *   and pass the WifiBandEntry (2400, 2500) to PMFW.
+	 *
+	 * - Later the wifi device removes the wifiband list added above and
+	 *   our driver gets notified again. At this time, driver will set
+	 *   WifiBandEntryNum = 0 and pass an empty WifiBandEntry list to PMFW.
+	 *
+	 * - PMFW may still need to do some uclk shadow update(e.g. switching
+	 *   from shadow clock back to primary clock) on receiving this.
+	 */
+	ret = smu_cmn_update_table(smu, SMU_TABLE_WIFIBAND, 0, &wifi_bands, true);
+	if (ret)
+		dev_warn(smu->adev->dev, "Failed to set wifiband!");
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index 82c4e1f1c6f0..a9954ffc02c5 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -169,6 +169,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] =
 	MSG_MAP(AllowIHHostInterrupt,		PPSMC_MSG_AllowIHHostInterrupt,       0),
 	MSG_MAP(ReenableAcDcInterrupt,		PPSMC_MSG_ReenableAcDcInterrupt,       0),
 	MSG_MAP(DALNotPresent,		PPSMC_MSG_DALNotPresent,       0),
+	MSG_MAP(EnableUCLKShadow,		PPSMC_MSG_EnableUCLKShadow,            0),
 };
 
 static struct cmn2asic_mapping smu_v13_0_0_clk_map[SMU_CLK_COUNT] = {
@@ -253,6 +254,7 @@ static struct cmn2asic_mapping smu_v13_0_0_table_map[SMU_TABLE_COUNT] = {
 	TAB_MAP(I2C_COMMANDS),
 	TAB_MAP(ECCINFO),
 	TAB_MAP(OVERDRIVE),
+	TAB_MAP(WIFIBAND),
 };
 
 static struct cmn2asic_mapping smu_v13_0_0_pwr_src_map[SMU_POWER_SOURCE_COUNT] = {
@@ -498,6 +500,9 @@ static int smu_v13_0_0_tables_init(struct smu_context *smu)
 			PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
 	SMU_TABLE_INIT(tables, SMU_TABLE_ECCINFO, sizeof(EccInfoTable_t),
 			PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
+	SMU_TABLE_INIT(tables, SMU_TABLE_WIFIBAND,
+		       sizeof(WifiBandEntryTable_t), PAGE_SIZE,
+		       AMDGPU_GEM_DOMAIN_VRAM);
 
 	smu_table->metrics_table = kzalloc(sizeof(SmuMetricsExternal_t), GFP_KERNEL);
 	if (!smu_table->metrics_table)
@@ -2352,6 +2357,7 @@ static int smu_v13_0_0_get_power_limit(struct smu_context *smu,
 	PPTable_t *pptable = table_context->driver_pptable;
 	SkuTable_t *skutable = &pptable->SkuTable;
 	uint32_t power_limit, od_percent_upper, od_percent_lower;
+	uint32_t msg_limit = skutable->MsgLimits.Power[PPT_THROTTLER_PPT0][POWER_SOURCE_AC];
 
 	if (smu_v13_0_get_current_power_limit(smu, &power_limit))
 		power_limit = smu->adev->pm.ac_power ?
@@ -2375,7 +2381,7 @@ static int smu_v13_0_0_get_power_limit(struct smu_context *smu,
 					od_percent_upper, od_percent_lower, power_limit);
 
 	if (max_power_limit) {
-		*max_power_limit = power_limit * (100 + od_percent_upper);
+		*max_power_limit = msg_limit * (100 + od_percent_upper);
 		*max_power_limit /= 100;
 	}
 
@@ -2540,16 +2546,19 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu,
 
 	workload_mask = 1 << workload_type;
 
-	/* Add optimizations for SMU13.0.0.  Reuse the power saving profile */
-	if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE &&
-	    (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0)) &&
-	    ((smu->adev->pm.fw_version == 0x004e6601) ||
-	     (smu->adev->pm.fw_version >= 0x004e7400))) {
-		workload_type = smu_cmn_to_asic_specific_index(smu,
-							       CMN2ASIC_MAPPING_WORKLOAD,
-							       PP_SMC_POWER_PROFILE_POWERSAVING);
-		if (workload_type >= 0)
-			workload_mask |= 1 << workload_type;
+	/* Add optimizations for SMU13.0.0/10.  Reuse the power saving profile */
+	if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE) {
+		if ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) &&
+			((smu->adev->pm.fw_version == 0x004e6601) ||
+			(smu->adev->pm.fw_version >= 0x004e7300))) ||
+			(amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) &&
+			 smu->adev->pm.fw_version >= 0x00504500)) {
+			workload_type = smu_cmn_to_asic_specific_index(smu,
+														   CMN2ASIC_MAPPING_WORKLOAD,
+														   PP_SMC_POWER_PROFILE_POWERSAVING);
+			if (workload_type >= 0)
+				workload_mask |= 1 << workload_type;
+		}
 	}
 
 	return smu_cmn_send_smc_msg_with_param(smu,
@@ -2558,38 +2567,6 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu,
 					       NULL);
 }
 
-static int smu_v13_0_0_baco_enter(struct smu_context *smu)
-{
-	struct smu_baco_context *smu_baco = &smu->smu_baco;
-	struct amdgpu_device *adev = smu->adev;
-
-	if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev))
-		return smu_v13_0_baco_set_armd3_sequence(smu,
-				(smu_baco->maco_support && amdgpu_runtime_pm != 1) ?
-					BACO_SEQ_BAMACO : BACO_SEQ_BACO);
-	else
-		return smu_v13_0_baco_enter(smu);
-}
-
-static int smu_v13_0_0_baco_exit(struct smu_context *smu)
-{
-	struct amdgpu_device *adev = smu->adev;
-	int ret;
-
-	if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) {
-		/* Wait for PMFW handling for the Dstate change */
-		usleep_range(10000, 11000);
-		ret = smu_v13_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS);
-	} else {
-		ret = smu_v13_0_baco_exit(smu);
-	}
-
-	if (!ret)
-		adev->gfx.is_poweron = false;
-
-	return ret;
-}
-
 static bool smu_v13_0_0_is_mode1_reset_supported(struct smu_context *smu)
 {
 	struct amdgpu_device *adev = smu->adev;
@@ -2720,7 +2697,6 @@ static int smu_v13_0_0_i2c_control_init(struct smu_context *smu)
 		smu_i2c->port = i;
 		mutex_init(&smu_i2c->mutex);
 		control->owner = THIS_MODULE;
-		control->class = I2C_CLASS_SPD;
 		control->dev.parent = &adev->pdev->dev;
 		control->algo = &smu_v13_0_0_i2c_algo;
 		snprintf(control->name, sizeof(control->name), "AMDGPU SMU %d", i);
@@ -2772,13 +2748,7 @@ static int smu_v13_0_0_set_mp1_state(struct smu_context *smu,
 
 	switch (mp1_state) {
 	case PP_MP1_STATE_UNLOAD:
-		ret = smu_cmn_send_smc_msg_with_param(smu,
-											  SMU_MSG_PrepareMp1ForUnload,
-											  0x55, NULL);
-
-		if (!ret && smu->smu_baco.state == SMU_BACO_STATE_EXIT)
-			ret = smu_v13_0_disable_pmfw_state(smu);
-
+		ret = smu_cmn_set_mp1_state(smu, mp1_state);
 		break;
 	default:
 		/* Ignore others */
@@ -2970,6 +2940,69 @@ static ssize_t smu_v13_0_0_get_ecc_info(struct smu_context *smu,
 	return ret;
 }
 
+static bool smu_v13_0_0_wbrf_support_check(struct smu_context *smu)
+{
+	struct amdgpu_device *adev = smu->adev;
+
+	switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+	case IP_VERSION(13, 0, 0):
+		return smu->smc_fw_version >= 0x004e6300;
+	case IP_VERSION(13, 0, 10):
+		return smu->smc_fw_version >= 0x00503300;
+	default:
+		return false;
+	}
+}
+
+static int smu_v13_0_0_set_power_limit(struct smu_context *smu,
+				       enum smu_ppt_limit_type limit_type,
+				       uint32_t limit)
+{
+	PPTable_t *pptable = smu->smu_table.driver_pptable;
+	SkuTable_t *skutable = &pptable->SkuTable;
+	uint32_t msg_limit = skutable->MsgLimits.Power[PPT_THROTTLER_PPT0][POWER_SOURCE_AC];
+	struct smu_table_context *table_context = &smu->smu_table;
+	OverDriveTableExternal_t *od_table =
+		(OverDriveTableExternal_t *)table_context->overdrive_table;
+	int ret = 0;
+
+	if (limit_type != SMU_DEFAULT_PPT_LIMIT)
+		return -EINVAL;
+
+	if (limit <= msg_limit) {
+		if (smu->current_power_limit > msg_limit) {
+			od_table->OverDriveTable.Ppt = 0;
+			od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_PPT_BIT;
+
+			ret = smu_v13_0_0_upload_overdrive_table(smu, od_table);
+			if (ret) {
+				dev_err(smu->adev->dev, "Failed to upload overdrive table!\n");
+				return ret;
+			}
+		}
+		return smu_v13_0_set_power_limit(smu, limit_type, limit);
+	} else if (smu->od_enabled) {
+		ret = smu_v13_0_set_power_limit(smu, limit_type, msg_limit);
+		if (ret)
+			return ret;
+
+		od_table->OverDriveTable.Ppt = (limit * 100) / msg_limit - 100;
+		od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_PPT_BIT;
+
+		ret = smu_v13_0_0_upload_overdrive_table(smu, od_table);
+		if (ret) {
+		  dev_err(smu->adev->dev, "Failed to upload overdrive table!\n");
+		  return ret;
+		}
+
+		smu->current_power_limit = limit;
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
 	.get_allowed_feature_mask = smu_v13_0_0_get_allowed_feature_mask,
 	.set_default_dpm_table = smu_v13_0_0_set_default_dpm_table,
@@ -3024,7 +3057,7 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
 	.set_fan_control_mode = smu_v13_0_set_fan_control_mode,
 	.enable_mgpu_fan_boost = smu_v13_0_0_enable_mgpu_fan_boost,
 	.get_power_limit = smu_v13_0_0_get_power_limit,
-	.set_power_limit = smu_v13_0_set_power_limit,
+	.set_power_limit = smu_v13_0_0_set_power_limit,
 	.set_power_source = smu_v13_0_set_power_source,
 	.get_power_profile_mode = smu_v13_0_0_get_power_profile_mode,
 	.set_power_profile_mode = smu_v13_0_0_set_power_profile_mode,
@@ -3035,10 +3068,8 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
 	.deep_sleep_control = smu_v13_0_deep_sleep_control,
 	.gfx_ulv_control = smu_v13_0_gfx_ulv_control,
 	.baco_is_support = smu_v13_0_baco_is_support,
-	.baco_get_state = smu_v13_0_baco_get_state,
-	.baco_set_state = smu_v13_0_baco_set_state,
-	.baco_enter = smu_v13_0_0_baco_enter,
-	.baco_exit = smu_v13_0_0_baco_exit,
+	.baco_enter = smu_v13_0_baco_enter,
+	.baco_exit = smu_v13_0_baco_exit,
 	.mode1_reset_is_support = smu_v13_0_0_is_mode1_reset_supported,
 	.mode1_reset = smu_v13_0_0_mode1_reset,
 	.mode2_reset = smu_v13_0_0_mode2_reset,
@@ -3050,6 +3081,9 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
 	.gpo_control = smu_v13_0_gpo_control,
 	.get_ecc_info = smu_v13_0_0_get_ecc_info,
 	.notify_display_change = smu_v13_0_notify_display_change,
+	.is_asic_wbrf_supported = smu_v13_0_0_wbrf_support_check,
+	.enable_uclk_shadow = smu_v13_0_enable_uclk_shadow,
+	.set_wbrf_exclusion_ranges = smu_v13_0_set_wbrf_exclusion_ranges,
 };
 
 void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 0e5a77c3c2e2..7e1941cf1796 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -120,6 +120,7 @@ struct mca_ras_info {
 #define P2S_TABLE_ID_A 0x50325341
 #define P2S_TABLE_ID_X 0x50325358
 
+// clang-format off
 static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COUNT] = {
 	MSG_MAP(TestMessage,			     PPSMC_MSG_TestMessage,			0),
 	MSG_MAP(GetSmuVersion,			     PPSMC_MSG_GetSmuVersion,			1),
@@ -128,6 +129,7 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU
 	MSG_MAP(DisableAllSmuFeatures,		     PPSMC_MSG_DisableAllSmuFeatures,		0),
 	MSG_MAP(RequestI2cTransaction,		     PPSMC_MSG_RequestI2cTransaction,		0),
 	MSG_MAP(GetMetricsTable,		     PPSMC_MSG_GetMetricsTable,			1),
+	MSG_MAP(GetMetricsVersion,		     PPSMC_MSG_GetMetricsVersion,		1),
 	MSG_MAP(GetEnabledSmuFeaturesHigh,	     PPSMC_MSG_GetEnabledSmuFeaturesHigh,	1),
 	MSG_MAP(GetEnabledSmuFeaturesLow,	     PPSMC_MSG_GetEnabledSmuFeaturesLow,	1),
 	MSG_MAP(SetDriverDramAddrHigh,		     PPSMC_MSG_SetDriverDramAddrHigh,		1),
@@ -158,8 +160,8 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU
 	MSG_MAP(GfxDriverResetRecovery,		     PPSMC_MSG_GfxDriverResetRecovery,		0),
 	MSG_MAP(GetMinGfxclkFrequency,               PPSMC_MSG_GetMinGfxDpmFreq,                1),
 	MSG_MAP(GetMaxGfxclkFrequency,               PPSMC_MSG_GetMaxGfxDpmFreq,                1),
-	MSG_MAP(SetSoftMinGfxclk,                    PPSMC_MSG_SetSoftMinGfxClk,                0),
-	MSG_MAP(SetSoftMaxGfxClk,                    PPSMC_MSG_SetSoftMaxGfxClk,                0),
+	MSG_MAP(SetSoftMinGfxclk,                    PPSMC_MSG_SetSoftMinGfxClk,                1),
+	MSG_MAP(SetSoftMaxGfxClk,                    PPSMC_MSG_SetSoftMaxGfxClk,                1),
 	MSG_MAP(PrepareMp1ForUnload,                 PPSMC_MSG_PrepareForDriverUnload,          0),
 	MSG_MAP(GetCTFLimit,                         PPSMC_MSG_GetCTFLimit,                     0),
 	MSG_MAP(GetThermalLimit,                     PPSMC_MSG_ReadThrottlerLimit,              0),
@@ -171,6 +173,7 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU
 	MSG_MAP(SelectPLPDMode,                      PPSMC_MSG_SelectPLPDMode,                  0),
 };
 
+// clang-format on
 static const struct cmn2asic_mapping smu_v13_0_6_clk_map[SMU_CLK_COUNT] = {
 	CLK_MAP(SOCCLK, PPCLK_SOCCLK),
 	CLK_MAP(FCLK, PPCLK_FCLK),
@@ -245,6 +248,8 @@ struct PPTable_t {
 #define SMUQ10_TO_UINT(x) ((x) >> 10)
 #define SMUQ10_FRAC(x) ((x) & 0x3ff)
 #define SMUQ10_ROUND(x) ((SMUQ10_TO_UINT(x)) + ((SMUQ10_FRAC(x)) >= 0x200))
+#define GET_METRIC_FIELD(field) ((adev->flags & AMD_IS_APU) ?\
+		(metrics_a->field) : (metrics_x->field))
 
 struct smu_v13_0_6_dpm_map {
 	enum smu_clk_type clk_type;
@@ -327,7 +332,8 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu)
 		SMU_TABLE_INIT(tables, SMU_TABLE_PMSTATUSLOG, SMU13_TOOL_SIZE,
 			       PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
 
-	SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(MetricsTable_t),
+	SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS,
+		       max(sizeof(MetricsTableX_t), sizeof(MetricsTableA_t)),
 		       PAGE_SIZE,
 		       AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT);
 
@@ -335,12 +341,13 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu)
 		       PAGE_SIZE,
 		       AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT);
 
-	smu_table->metrics_table = kzalloc(sizeof(MetricsTable_t), GFP_KERNEL);
+	smu_table->metrics_table = kzalloc(max(sizeof(MetricsTableX_t),
+		       sizeof(MetricsTableA_t)), GFP_KERNEL);
 	if (!smu_table->metrics_table)
 		return -ENOMEM;
 	smu_table->metrics_time = 0;
 
-	smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_4);
+	smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_5);
 	smu_table->gpu_metrics_table =
 		kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL);
 	if (!smu_table->gpu_metrics_table) {
@@ -428,13 +435,51 @@ static int smu_v13_0_6_get_metrics_table(struct smu_context *smu,
 	return 0;
 }
 
+static ssize_t smu_v13_0_6_get_pm_metrics(struct smu_context *smu,
+					  void *metrics, size_t max_size)
+{
+	struct smu_table_context *smu_tbl_ctxt = &smu->smu_table;
+	uint32_t table_version = smu_tbl_ctxt->tables[SMU_TABLE_SMU_METRICS].version;
+	uint32_t table_size = smu_tbl_ctxt->tables[SMU_TABLE_SMU_METRICS].size;
+	struct amdgpu_pm_metrics *pm_metrics = metrics;
+	uint32_t pmfw_version;
+	int ret;
+
+	if (!pm_metrics || !max_size)
+		return -EINVAL;
+
+	if (max_size < (table_size + sizeof(pm_metrics->common_header)))
+		return -EOVERFLOW;
+
+	/* Don't use cached metrics data */
+	ret = smu_v13_0_6_get_metrics_table(smu, pm_metrics->data, true);
+	if (ret)
+		return ret;
+
+	smu_cmn_get_smc_version(smu, NULL, &pmfw_version);
+
+	memset(&pm_metrics->common_header, 0,
+	       sizeof(pm_metrics->common_header));
+	pm_metrics->common_header.mp1_ip_discovery_version =
+		IP_VERSION(13, 0, 6);
+	pm_metrics->common_header.pmfw_version = pmfw_version;
+	pm_metrics->common_header.pmmetrics_version = table_version;
+	pm_metrics->common_header.structure_size =
+		sizeof(pm_metrics->common_header) + table_size;
+
+	return pm_metrics->common_header.structure_size;
+}
+
 static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu)
 {
 	struct smu_table_context *smu_table = &smu->smu_table;
-	MetricsTable_t *metrics = (MetricsTable_t *)smu_table->metrics_table;
+	MetricsTableX_t *metrics_x = (MetricsTableX_t *)smu_table->metrics_table;
+	MetricsTableA_t *metrics_a = (MetricsTableA_t *)smu_table->metrics_table;
 	struct PPTable_t *pptable =
 		(struct PPTable_t *)smu_table->driver_pptable;
+	struct amdgpu_device *adev = smu->adev;
 	int ret, i, retry = 100;
+	uint32_t table_version;
 
 	/* Store one-time values in driver PPTable */
 	if (!pptable->Init) {
@@ -444,7 +489,7 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu)
 				return ret;
 
 			/* Ensure that metrics have been updated */
-			if (metrics->AccumulationCounter)
+			if (GET_METRIC_FIELD(AccumulationCounter))
 				break;
 
 			usleep_range(1000, 1100);
@@ -453,30 +498,37 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu)
 		if (!retry)
 			return -ETIME;
 
+		ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetMetricsVersion,
+					   &table_version);
+		if (ret)
+			return ret;
+		smu_table->tables[SMU_TABLE_SMU_METRICS].version =
+			table_version;
+
 		pptable->MaxSocketPowerLimit =
-			SMUQ10_ROUND(metrics->MaxSocketPowerLimit);
+			SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketPowerLimit));
 		pptable->MaxGfxclkFrequency =
-			SMUQ10_ROUND(metrics->MaxGfxclkFrequency);
+			SMUQ10_ROUND(GET_METRIC_FIELD(MaxGfxclkFrequency));
 		pptable->MinGfxclkFrequency =
-			SMUQ10_ROUND(metrics->MinGfxclkFrequency);
+			SMUQ10_ROUND(GET_METRIC_FIELD(MinGfxclkFrequency));
 
 		for (i = 0; i < 4; ++i) {
 			pptable->FclkFrequencyTable[i] =
-				SMUQ10_ROUND(metrics->FclkFrequencyTable[i]);
+				SMUQ10_ROUND(GET_METRIC_FIELD(FclkFrequencyTable)[i]);
 			pptable->UclkFrequencyTable[i] =
-				SMUQ10_ROUND(metrics->UclkFrequencyTable[i]);
+				SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequencyTable)[i]);
 			pptable->SocclkFrequencyTable[i] = SMUQ10_ROUND(
-				metrics->SocclkFrequencyTable[i]);
+				GET_METRIC_FIELD(SocclkFrequencyTable)[i]);
 			pptable->VclkFrequencyTable[i] =
-				SMUQ10_ROUND(metrics->VclkFrequencyTable[i]);
+				SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequencyTable)[i]);
 			pptable->DclkFrequencyTable[i] =
-				SMUQ10_ROUND(metrics->DclkFrequencyTable[i]);
+				SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequencyTable)[i]);
 			pptable->LclkFrequencyTable[i] =
-				SMUQ10_ROUND(metrics->LclkFrequencyTable[i]);
+				SMUQ10_ROUND(GET_METRIC_FIELD(LclkFrequencyTable)[i]);
 		}
 
 		/* use AID0 serial number by default */
-		pptable->PublicSerialNumber_AID = metrics->PublicSerialNumber_AID[0];
+		pptable->PublicSerialNumber_AID = GET_METRIC_FIELD(PublicSerialNumber_AID)[0];
 
 		pptable->Init = true;
 	}
@@ -778,7 +830,8 @@ static int smu_v13_0_6_get_smu_metrics_data(struct smu_context *smu,
 					    uint32_t *value)
 {
 	struct smu_table_context *smu_table = &smu->smu_table;
-	MetricsTable_t *metrics = (MetricsTable_t *)smu_table->metrics_table;
+	MetricsTableX_t *metrics_x = (MetricsTableX_t *)smu_table->metrics_table;
+	MetricsTableA_t *metrics_a = (MetricsTableA_t *)smu_table->metrics_table;
 	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 	int xcc_id;
@@ -793,50 +846,50 @@ static int smu_v13_0_6_get_smu_metrics_data(struct smu_context *smu,
 	case METRICS_AVERAGE_GFXCLK:
 		if (smu->smc_fw_version >= 0x552F00) {
 			xcc_id = GET_INST(GC, 0);
-			*value = SMUQ10_ROUND(metrics->GfxclkFrequency[xcc_id]);
+			*value = SMUQ10_ROUND(GET_METRIC_FIELD(GfxclkFrequency)[xcc_id]);
 		} else {
 			*value = 0;
 		}
 		break;
 	case METRICS_CURR_SOCCLK:
 	case METRICS_AVERAGE_SOCCLK:
-		*value = SMUQ10_ROUND(metrics->SocclkFrequency[0]);
+		*value = SMUQ10_ROUND(GET_METRIC_FIELD(SocclkFrequency)[0]);
 		break;
 	case METRICS_CURR_UCLK:
 	case METRICS_AVERAGE_UCLK:
-		*value = SMUQ10_ROUND(metrics->UclkFrequency);
+		*value = SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequency));
 		break;
 	case METRICS_CURR_VCLK:
-		*value = SMUQ10_ROUND(metrics->VclkFrequency[0]);
+		*value = SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequency)[0]);
 		break;
 	case METRICS_CURR_DCLK:
-		*value = SMUQ10_ROUND(metrics->DclkFrequency[0]);
+		*value = SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequency)[0]);
 		break;
 	case METRICS_CURR_FCLK:
-		*value = SMUQ10_ROUND(metrics->FclkFrequency);
+		*value = SMUQ10_ROUND(GET_METRIC_FIELD(FclkFrequency));
 		break;
 	case METRICS_AVERAGE_GFXACTIVITY:
-		*value = SMUQ10_ROUND(metrics->SocketGfxBusy);
+		*value = SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusy));
 		break;
 	case METRICS_AVERAGE_MEMACTIVITY:
-		*value = SMUQ10_ROUND(metrics->DramBandwidthUtilization);
+		*value = SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilization));
 		break;
 	case METRICS_CURR_SOCKETPOWER:
-		*value = SMUQ10_ROUND(metrics->SocketPower) << 8;
+		*value = SMUQ10_ROUND(GET_METRIC_FIELD(SocketPower)) << 8;
 		break;
 	case METRICS_TEMPERATURE_HOTSPOT:
-		*value = SMUQ10_ROUND(metrics->MaxSocketTemperature) *
+		*value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature)) *
 			 SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
 		break;
 	case METRICS_TEMPERATURE_MEM:
-		*value = SMUQ10_ROUND(metrics->MaxHbmTemperature) *
+		*value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxHbmTemperature)) *
 			 SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
 		break;
 	/* This is the max of all VRs and not just SOC VR.
 	 * No need to define another data type for the same.
 	 */
 	case METRICS_TEMPERATURE_VRSOC:
-		*value = SMUQ10_ROUND(metrics->MaxVrTemperature) *
+		*value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxVrTemperature)) *
 			 SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
 		break;
 	default:
@@ -917,7 +970,9 @@ static int smu_v13_0_6_print_clks(struct smu_context *smu, char *buf, int size,
 			if (i < (clocks.num_levels - 1))
 				clk2 = clocks.data[i + 1].clocks_in_khz / 1000;
 
-			if (curr_clk >= clk1 && curr_clk < clk2) {
+			if (curr_clk == clk1) {
+				level = i;
+			} else if (curr_clk >= clk1 && curr_clk < clk2) {
 				level = (curr_clk - clk1) <= (clk2 - curr_clk) ?
 						i :
 						i + 1;
@@ -1470,7 +1525,6 @@ static int smu_v13_0_6_mca_set_debug_mode(struct smu_context *smu, bool enable)
 	if (smu->smc_fw_version < 0x554800)
 		return 0;
 
-	amdgpu_ras_set_mca_debug_mode(smu->adev, enable);
 	return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ClearMcaOnRead,
 					       enable ? 0 : ClearMcaOnRead_UE_FLAG_MASK | ClearMcaOnRead_CE_POLL_MASK,
 					       NULL);
@@ -1884,7 +1938,6 @@ static int smu_v13_0_6_i2c_control_init(struct smu_context *smu)
 		smu_i2c->port = i;
 		mutex_init(&smu_i2c->mutex);
 		control->owner = THIS_MODULE;
-		control->class = I2C_CLASS_SPD;
 		control->dev.parent = &adev->pdev->dev;
 		control->algo = &smu_v13_0_6_i2c_algo;
 		snprintf(control->name, sizeof(control->name), "AMDGPU SMU %d", i);
@@ -2022,67 +2075,70 @@ static int smu_v13_0_6_get_current_pcie_link_speed(struct smu_context *smu)
 static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table)
 {
 	struct smu_table_context *smu_table = &smu->smu_table;
-	struct gpu_metrics_v1_4 *gpu_metrics =
-		(struct gpu_metrics_v1_4 *)smu_table->gpu_metrics_table;
+	struct gpu_metrics_v1_5 *gpu_metrics =
+		(struct gpu_metrics_v1_5 *)smu_table->gpu_metrics_table;
 	struct amdgpu_device *adev = smu->adev;
-	int ret = 0, xcc_id, inst, i;
-	MetricsTable_t *metrics;
+	int ret = 0, xcc_id, inst, i, j;
+	MetricsTableX_t *metrics_x;
+	MetricsTableA_t *metrics_a;
 	u16 link_width_level;
 
-	metrics = kzalloc(sizeof(MetricsTable_t), GFP_KERNEL);
-	ret = smu_v13_0_6_get_metrics_table(smu, metrics, true);
+	metrics_x = kzalloc(max(sizeof(MetricsTableX_t), sizeof(MetricsTableA_t)), GFP_KERNEL);
+	ret = smu_v13_0_6_get_metrics_table(smu, metrics_x, true);
 	if (ret) {
-		kfree(metrics);
+		kfree(metrics_x);
 		return ret;
 	}
 
-	smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 4);
+	metrics_a = (MetricsTableA_t *)metrics_x;
+
+	smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 5);
 
 	gpu_metrics->temperature_hotspot =
-		SMUQ10_ROUND(metrics->MaxSocketTemperature);
+		SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature));
 	/* Individual HBM stack temperature is not reported */
 	gpu_metrics->temperature_mem =
-		SMUQ10_ROUND(metrics->MaxHbmTemperature);
+		SMUQ10_ROUND(GET_METRIC_FIELD(MaxHbmTemperature));
 	/* Reports max temperature of all voltage rails */
 	gpu_metrics->temperature_vrsoc =
-		SMUQ10_ROUND(metrics->MaxVrTemperature);
+		SMUQ10_ROUND(GET_METRIC_FIELD(MaxVrTemperature));
 
 	gpu_metrics->average_gfx_activity =
-		SMUQ10_ROUND(metrics->SocketGfxBusy);
+		SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusy));
 	gpu_metrics->average_umc_activity =
-		SMUQ10_ROUND(metrics->DramBandwidthUtilization);
+		SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilization));
 
 	gpu_metrics->curr_socket_power =
-		SMUQ10_ROUND(metrics->SocketPower);
+		SMUQ10_ROUND(GET_METRIC_FIELD(SocketPower));
 	/* Energy counter reported in 15.259uJ (2^-16) units */
-	gpu_metrics->energy_accumulator = metrics->SocketEnergyAcc;
+	gpu_metrics->energy_accumulator = GET_METRIC_FIELD(SocketEnergyAcc);
 
 	for (i = 0; i < MAX_GFX_CLKS; i++) {
 		xcc_id = GET_INST(GC, i);
 		if (xcc_id >= 0)
 			gpu_metrics->current_gfxclk[i] =
-				SMUQ10_ROUND(metrics->GfxclkFrequency[xcc_id]);
+				SMUQ10_ROUND(GET_METRIC_FIELD(GfxclkFrequency)[xcc_id]);
 
 		if (i < MAX_CLKS) {
 			gpu_metrics->current_socclk[i] =
-				SMUQ10_ROUND(metrics->SocclkFrequency[i]);
+				SMUQ10_ROUND(GET_METRIC_FIELD(SocclkFrequency)[i]);
 			inst = GET_INST(VCN, i);
 			if (inst >= 0) {
 				gpu_metrics->current_vclk0[i] =
-					SMUQ10_ROUND(metrics->VclkFrequency[inst]);
+					SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequency)[inst]);
 				gpu_metrics->current_dclk0[i] =
-					SMUQ10_ROUND(metrics->DclkFrequency[inst]);
+					SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequency)[inst]);
 			}
 		}
 	}
 
-	gpu_metrics->current_uclk = SMUQ10_ROUND(metrics->UclkFrequency);
+	gpu_metrics->current_uclk = SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequency));
 
 	/* Throttle status is not reported through metrics now */
 	gpu_metrics->throttle_status = 0;
 
 	/* Clock Lock Status. Each bit corresponds to each GFXCLK instance */
-	gpu_metrics->gfxclk_lock_status = metrics->GfxLockXCDMak >> GET_INST(GC, 0);
+	gpu_metrics->gfxclk_lock_status = GET_METRIC_FIELD(GfxLockXCDMak) >> GET_INST(GC, 0);
 
 	if (!(adev->flags & AMD_IS_APU)) {
 		link_width_level = smu_v13_0_6_get_current_pcie_link_width_level(smu);
@@ -2094,38 +2150,57 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
 		gpu_metrics->pcie_link_speed =
 			smu_v13_0_6_get_current_pcie_link_speed(smu);
 		gpu_metrics->pcie_bandwidth_acc =
-				SMUQ10_ROUND(metrics->PcieBandwidthAcc[0]);
+				SMUQ10_ROUND(metrics_x->PcieBandwidthAcc[0]);
 		gpu_metrics->pcie_bandwidth_inst =
-				SMUQ10_ROUND(metrics->PcieBandwidth[0]);
+				SMUQ10_ROUND(metrics_x->PcieBandwidth[0]);
 		gpu_metrics->pcie_l0_to_recov_count_acc =
-				metrics->PCIeL0ToRecoveryCountAcc;
+				metrics_x->PCIeL0ToRecoveryCountAcc;
 		gpu_metrics->pcie_replay_count_acc =
-				metrics->PCIenReplayAAcc;
+				metrics_x->PCIenReplayAAcc;
 		gpu_metrics->pcie_replay_rover_count_acc =
-				metrics->PCIenReplayARolloverCountAcc;
+				metrics_x->PCIenReplayARolloverCountAcc;
+		gpu_metrics->pcie_nak_sent_count_acc =
+				metrics_x->PCIeNAKSentCountAcc;
+		gpu_metrics->pcie_nak_rcvd_count_acc =
+				metrics_x->PCIeNAKReceivedCountAcc;
 	}
 
 	gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
 
 	gpu_metrics->gfx_activity_acc =
-		SMUQ10_ROUND(metrics->SocketGfxBusyAcc);
+		SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusyAcc));
 	gpu_metrics->mem_activity_acc =
-		SMUQ10_ROUND(metrics->DramBandwidthUtilizationAcc);
+		SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilizationAcc));
 
 	for (i = 0; i < NUM_XGMI_LINKS; i++) {
 		gpu_metrics->xgmi_read_data_acc[i] =
-			SMUQ10_ROUND(metrics->XgmiReadDataSizeAcc[i]);
+			SMUQ10_ROUND(GET_METRIC_FIELD(XgmiReadDataSizeAcc)[i]);
 		gpu_metrics->xgmi_write_data_acc[i] =
-			SMUQ10_ROUND(metrics->XgmiWriteDataSizeAcc[i]);
+			SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWriteDataSizeAcc)[i]);
 	}
 
-	gpu_metrics->xgmi_link_width = SMUQ10_ROUND(metrics->XgmiWidth);
-	gpu_metrics->xgmi_link_speed = SMUQ10_ROUND(metrics->XgmiBitrate);
+	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+		inst = GET_INST(JPEG, i);
+		for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+			gpu_metrics->jpeg_activity[(i * adev->jpeg.num_jpeg_rings) + j] =
+				SMUQ10_ROUND(GET_METRIC_FIELD(JpegBusy)
+				[(inst * adev->jpeg.num_jpeg_rings) + j]);
+		}
+	}
+
+	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+		inst = GET_INST(VCN, i);
+		gpu_metrics->vcn_activity[i] =
+			SMUQ10_ROUND(GET_METRIC_FIELD(VcnBusy)[inst]);
+	}
+
+	gpu_metrics->xgmi_link_width = SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWidth));
+	gpu_metrics->xgmi_link_speed = SMUQ10_ROUND(GET_METRIC_FIELD(XgmiBitrate));
 
-	gpu_metrics->firmware_timestamp = metrics->Timestamp;
+	gpu_metrics->firmware_timestamp = GET_METRIC_FIELD(Timestamp);
 
 	*table = (void *)gpu_metrics;
-	kfree(metrics);
+	kfree(metrics_x);
 
 	return sizeof(*gpu_metrics);
 }
@@ -2161,17 +2236,18 @@ static int smu_v13_0_6_mode2_reset(struct smu_context *smu)
 			continue;
 		}
 
-		if (ret) {
-			dev_err(adev->dev,
-				"failed to send mode2 message \tparam: 0x%08x error code %d\n",
-				SMU_RESET_MODE_2, ret);
+		if (ret)
 			goto out;
-		}
+
 	} while (ret == -ETIME && timeout);
 
 out:
 	mutex_unlock(&smu->message_lock);
 
+	if (ret)
+		dev_err(adev->dev, "failed to send mode2 reset, error code %d",
+			ret);
+
 	return ret;
 }
 
@@ -2300,16 +2376,6 @@ static int smu_v13_0_6_smu_send_hbm_bad_page_num(struct smu_context *smu,
 	return ret;
 }
 
-static int smu_v13_0_6_post_init(struct smu_context *smu)
-{
-	struct amdgpu_device *adev = smu->adev;
-
-	if (!amdgpu_sriov_vf(adev) && adev->ras_enabled)
-		return smu_v13_0_6_mca_set_debug_mode(smu, false);
-
-	return 0;
-}
-
 static int mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable)
 {
 	struct smu_context *smu = adev->powerplay.pp_handle;
@@ -2392,8 +2458,8 @@ static const struct mca_bank_ipid smu_v13_0_6_mca_ipid_table[AMDGPU_MCA_IP_COUNT
 
 static void mca_bank_entry_info_decode(struct mca_bank_entry *entry, struct mca_bank_info *info)
 {
-	uint64_t ipid = entry->regs[MCA_REG_IDX_IPID];
-	uint32_t insthi;
+	u64 ipid = entry->regs[MCA_REG_IDX_IPID];
+	u32 instidhi, instid;
 
 	/* NOTE: All MCA IPID register share the same format,
 	 * so the driver can share the MCMP1 register header file.
@@ -2402,9 +2468,15 @@ static void mca_bank_entry_info_decode(struct mca_bank_entry *entry, struct mca_
 	info->hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID);
 	info->mcatype = REG_GET_FIELD(ipid, MCMP1_IPIDT0, McaType);
 
-	insthi = REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi);
-	info->aid = ((insthi >> 2) & 0x03);
-	info->socket_id = insthi & 0x03;
+	/*
+	 * Unfied DieID Format: SAASS. A:AID, S:Socket.
+	 * Unfied DieID[4] = InstanceId[0]
+	 * Unfied DieID[0:3] = InstanceIdHi[0:3]
+	 */
+	instidhi = REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi);
+	instid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo);
+	info->aid = ((instidhi >> 2) & 0x03);
+	info->socket_id = ((instid & 0x1) << 2) | (instidhi & 0x03);
 }
 
 static int mca_bank_read_reg(struct amdgpu_device *adev, enum amdgpu_mca_error_type type,
@@ -2483,9 +2555,9 @@ static int mca_umc_mca_get_err_count(const struct mca_ras_info *mca_ras, struct
 		return 0;
 	}
 
-	if (type == AMDGPU_MCA_ERROR_TYPE_UE && umc_v12_0_is_uncorrectable_error(status0))
+	if (type == AMDGPU_MCA_ERROR_TYPE_UE && umc_v12_0_is_uncorrectable_error(adev, status0))
 		*count = 1;
-	else if (type == AMDGPU_MCA_ERROR_TYPE_CE && umc_v12_0_is_correctable_error(status0))
+	else if (type == AMDGPU_MCA_ERROR_TYPE_CE && umc_v12_0_is_correctable_error(adev, status0))
 		*count = 1;
 
 	return 0;
@@ -2496,13 +2568,15 @@ static int mca_pcs_xgmi_mca_get_err_count(const struct mca_ras_info *mca_ras, st
 					  uint32_t *count)
 {
 	u32 ext_error_code;
+	u32 err_cnt;
 
 	ext_error_code = MCA_REG__STATUS__ERRORCODEEXT(entry->regs[MCA_REG_IDX_STATUS]);
+	err_cnt = MCA_REG__MISC0__ERRCNT(entry->regs[MCA_REG_IDX_MISC0]);
 
 	if (type == AMDGPU_MCA_ERROR_TYPE_UE && ext_error_code == 0)
-		*count = 1;
+		*count = err_cnt;
 	else if (type == AMDGPU_MCA_ERROR_TYPE_CE && ext_error_code == 6)
-		*count = 1;
+		*count = err_cnt;
 
 	return 0;
 }
@@ -2578,6 +2652,7 @@ static bool mca_gfx_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct
 	uint32_t instlo;
 
 	instlo = REG_GET_FIELD(entry->regs[MCA_REG_IDX_IPID], MCMP1_IPIDT0, InstanceIdLo);
+	instlo &= GENMASK(31, 1);
 	switch (instlo) {
 	case 0x36430400: /* SMNAID XCD 0 */
 	case 0x38430400: /* SMNAID XCD 1 */
@@ -2593,13 +2668,21 @@ static bool mca_gfx_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct
 static bool mca_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev,
 				  enum amdgpu_mca_error_type type, struct mca_bank_entry *entry)
 {
+	struct smu_context *smu = adev->powerplay.pp_handle;
 	uint32_t errcode, instlo;
 
 	instlo = REG_GET_FIELD(entry->regs[MCA_REG_IDX_IPID], MCMP1_IPIDT0, InstanceIdLo);
+	instlo &= GENMASK(31, 1);
 	if (instlo != 0x03b30400)
 		return false;
 
-	errcode = REG_GET_FIELD(entry->regs[MCA_REG_IDX_STATUS], MCMP1_STATUST0, ErrorCode);
+	if (!(adev->flags & AMD_IS_APU) && smu->smc_fw_version >= 0x00555600) {
+		errcode = MCA_REG__SYND__ERRORINFORMATION(entry->regs[MCA_REG_IDX_SYND]);
+		errcode &= 0xff;
+	} else {
+		errcode = REG_GET_FIELD(entry->regs[MCA_REG_IDX_STATUS], MCMP1_STATUST0, ErrorCode);
+	}
+
 	return mca_smu_check_error_code(adev, mca_ras, errcode);
 }
 
@@ -2812,6 +2895,13 @@ static int smu_v13_0_6_select_xgmi_plpd_policy(struct smu_context *smu,
 	return ret;
 }
 
+static ssize_t smu_v13_0_6_get_ecc_info(struct smu_context *smu,
+			void *table)
+{
+	/* Support ecc info by default */
+	return 0;
+}
+
 static const struct pptable_funcs smu_v13_0_6_ppt_funcs = {
 	/* init dpm */
 	.get_allowed_feature_mask = smu_v13_0_6_get_allowed_feature_mask,
@@ -2856,6 +2946,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = {
 	.log_thermal_throttling_event = smu_v13_0_6_log_thermal_throttling_event,
 	.get_pp_feature_mask = smu_cmn_get_pp_feature_mask,
 	.get_gpu_metrics = smu_v13_0_6_get_gpu_metrics,
+	.get_pm_metrics = smu_v13_0_6_get_pm_metrics,
 	.get_thermal_temperature_range = smu_v13_0_6_get_thermal_temperature_range,
 	.mode1_reset_is_support = smu_v13_0_6_is_mode1_reset_supported,
 	.mode2_reset_is_support = smu_v13_0_6_is_mode2_reset_supported,
@@ -2865,7 +2956,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = {
 	.i2c_init = smu_v13_0_6_i2c_control_init,
 	.i2c_fini = smu_v13_0_6_i2c_control_fini,
 	.send_hbm_bad_pages_num = smu_v13_0_6_smu_send_hbm_bad_page_num,
-	.post_init = smu_v13_0_6_post_init,
+	.get_ecc_info = smu_v13_0_6_get_ecc_info,
 };
 
 void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index 81eafed76045..0ffdb58af74e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -140,6 +140,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] =
 	MSG_MAP(AllowGpo,			PPSMC_MSG_SetGpoAllow,           0),
 	MSG_MAP(GetPptLimit,			PPSMC_MSG_GetPptLimit,                 0),
 	MSG_MAP(NotifyPowerSource,		PPSMC_MSG_NotifyPowerSource,           0),
+	MSG_MAP(EnableUCLKShadow,		PPSMC_MSG_EnableUCLKShadow,            0),
 };
 
 static struct cmn2asic_mapping smu_v13_0_7_clk_map[SMU_CLK_COUNT] = {
@@ -222,6 +223,7 @@ static struct cmn2asic_mapping smu_v13_0_7_table_map[SMU_TABLE_COUNT] = {
 	TAB_MAP(ACTIVITY_MONITOR_COEFF),
 	[SMU_TABLE_COMBO_PPTABLE] = {1, TABLE_COMBO_PPTABLE},
 	TAB_MAP(OVERDRIVE),
+	TAB_MAP(WIFIBAND),
 };
 
 static struct cmn2asic_mapping smu_v13_0_7_pwr_src_map[SMU_POWER_SOURCE_COUNT] = {
@@ -512,6 +514,9 @@ static int smu_v13_0_7_tables_init(struct smu_context *smu)
 		       AMDGPU_GEM_DOMAIN_VRAM);
 	SMU_TABLE_INIT(tables, SMU_TABLE_COMBO_PPTABLE, MP0_MP1_DATA_REGION_SIZE_COMBOPPTABLE,
 			PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
+	SMU_TABLE_INIT(tables, SMU_TABLE_WIFIBAND,
+		       sizeof(WifiBandEntryTable_t), PAGE_SIZE,
+		       AMDGPU_GEM_DOMAIN_VRAM);
 
 	smu_table->metrics_table = kzalloc(sizeof(SmuMetricsExternal_t), GFP_KERNEL);
 	if (!smu_table->metrics_table)
@@ -2316,6 +2321,7 @@ static int smu_v13_0_7_get_power_limit(struct smu_context *smu,
 	PPTable_t *pptable = table_context->driver_pptable;
 	SkuTable_t *skutable = &pptable->SkuTable;
 	uint32_t power_limit, od_percent_upper, od_percent_lower;
+	uint32_t msg_limit = skutable->MsgLimits.Power[PPT_THROTTLER_PPT0][POWER_SOURCE_AC];
 
 	if (smu_v13_0_get_current_power_limit(smu, &power_limit))
 		power_limit = smu->adev->pm.ac_power ?
@@ -2339,7 +2345,7 @@ static int smu_v13_0_7_get_power_limit(struct smu_context *smu,
 					od_percent_upper, od_percent_lower, power_limit);
 
 	if (max_power_limit) {
-		*max_power_limit = power_limit * (100 + od_percent_upper);
+		*max_power_limit = msg_limit * (100 + od_percent_upper);
 		*max_power_limit /= 100;
 	}
 
@@ -2499,13 +2505,7 @@ static int smu_v13_0_7_set_mp1_state(struct smu_context *smu,
 
 	switch (mp1_state) {
 	case PP_MP1_STATE_UNLOAD:
-		ret = smu_cmn_send_smc_msg_with_param(smu,
-											  SMU_MSG_PrepareMp1ForUnload,
-											  0x55, NULL);
-
-		if (!ret && smu->smu_baco.state == SMU_BACO_STATE_EXIT)
-			ret = smu_v13_0_disable_pmfw_state(smu);
-
+		ret = smu_cmn_set_mp1_state(smu, mp1_state);
 		break;
 	default:
 		/* Ignore others */
@@ -2515,38 +2515,6 @@ static int smu_v13_0_7_set_mp1_state(struct smu_context *smu,
 	return ret;
 }
 
-static int smu_v13_0_7_baco_enter(struct smu_context *smu)
-{
-	struct smu_baco_context *smu_baco = &smu->smu_baco;
-	struct amdgpu_device *adev = smu->adev;
-
-	if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev))
-		return smu_v13_0_baco_set_armd3_sequence(smu,
-				(smu_baco->maco_support && amdgpu_runtime_pm != 1) ?
-					BACO_SEQ_BAMACO : BACO_SEQ_BACO);
-	else
-		return smu_v13_0_baco_enter(smu);
-}
-
-static int smu_v13_0_7_baco_exit(struct smu_context *smu)
-{
-	struct amdgpu_device *adev = smu->adev;
-	int ret;
-
-	if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) {
-		/* Wait for PMFW handling for the Dstate change */
-		usleep_range(10000, 11000);
-		ret = smu_v13_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS);
-	} else {
-		ret = smu_v13_0_baco_exit(smu);
-	}
-
-	if (!ret)
-		adev->gfx.is_poweron = false;
-
-	return ret;
-}
-
 static bool smu_v13_0_7_is_mode1_reset_supported(struct smu_context *smu)
 {
 	struct amdgpu_device *adev = smu->adev;
@@ -2567,6 +2535,60 @@ static int smu_v13_0_7_set_df_cstate(struct smu_context *smu,
 					       NULL);
 }
 
+static bool smu_v13_0_7_wbrf_support_check(struct smu_context *smu)
+{
+	return smu->smc_fw_version > 0x00524600;
+}
+
+static int smu_v13_0_7_set_power_limit(struct smu_context *smu,
+				       enum smu_ppt_limit_type limit_type,
+				       uint32_t limit)
+{
+	PPTable_t *pptable = smu->smu_table.driver_pptable;
+	SkuTable_t *skutable = &pptable->SkuTable;
+	uint32_t msg_limit = skutable->MsgLimits.Power[PPT_THROTTLER_PPT0][POWER_SOURCE_AC];
+	struct smu_table_context *table_context = &smu->smu_table;
+	OverDriveTableExternal_t *od_table =
+		(OverDriveTableExternal_t *)table_context->overdrive_table;
+	int ret = 0;
+
+	if (limit_type != SMU_DEFAULT_PPT_LIMIT)
+		return -EINVAL;
+
+	if (limit <= msg_limit) {
+		if (smu->current_power_limit > msg_limit) {
+			od_table->OverDriveTable.Ppt = 0;
+			od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_PPT_BIT;
+
+			ret = smu_v13_0_7_upload_overdrive_table(smu, od_table);
+			if (ret) {
+				dev_err(smu->adev->dev, "Failed to upload overdrive table!\n");
+				return ret;
+			}
+		}
+		return smu_v13_0_set_power_limit(smu, limit_type, limit);
+	} else if (smu->od_enabled) {
+		ret = smu_v13_0_set_power_limit(smu, limit_type, msg_limit);
+		if (ret)
+			return ret;
+
+		od_table->OverDriveTable.Ppt = (limit * 100) / msg_limit - 100;
+		od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_PPT_BIT;
+
+		ret = smu_v13_0_7_upload_overdrive_table(smu, od_table);
+		if (ret) {
+		  dev_err(smu->adev->dev, "Failed to upload overdrive table!\n");
+		  return ret;
+		}
+
+		smu->current_power_limit = limit;
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
 	.get_allowed_feature_mask = smu_v13_0_7_get_allowed_feature_mask,
 	.set_default_dpm_table = smu_v13_0_7_set_default_dpm_table,
@@ -2618,7 +2640,7 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
 	.set_fan_control_mode = smu_v13_0_set_fan_control_mode,
 	.enable_mgpu_fan_boost = smu_v13_0_7_enable_mgpu_fan_boost,
 	.get_power_limit = smu_v13_0_7_get_power_limit,
-	.set_power_limit = smu_v13_0_set_power_limit,
+	.set_power_limit = smu_v13_0_7_set_power_limit,
 	.set_power_source = smu_v13_0_set_power_source,
 	.get_power_profile_mode = smu_v13_0_7_get_power_profile_mode,
 	.set_power_profile_mode = smu_v13_0_7_set_power_profile_mode,
@@ -2626,15 +2648,16 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
 	.get_pp_feature_mask = smu_cmn_get_pp_feature_mask,
 	.set_pp_feature_mask = smu_cmn_set_pp_feature_mask,
 	.baco_is_support = smu_v13_0_baco_is_support,
-	.baco_get_state = smu_v13_0_baco_get_state,
-	.baco_set_state = smu_v13_0_baco_set_state,
-	.baco_enter = smu_v13_0_7_baco_enter,
-	.baco_exit = smu_v13_0_7_baco_exit,
+	.baco_enter = smu_v13_0_baco_enter,
+	.baco_exit = smu_v13_0_baco_exit,
 	.mode1_reset_is_support = smu_v13_0_7_is_mode1_reset_supported,
 	.mode1_reset = smu_v13_0_mode1_reset,
 	.set_mp1_state = smu_v13_0_7_set_mp1_state,
 	.set_df_cstate = smu_v13_0_7_set_df_cstate,
 	.gpo_control = smu_v13_0_gpo_control,
+	.is_asic_wbrf_supported = smu_v13_0_7_wbrf_support_check,
+	.enable_uclk_shadow = smu_v13_0_enable_uclk_shadow,
+	.set_wbrf_exclusion_ranges = smu_v13_0_set_wbrf_exclusion_ranges,
 };
 
 void smu_v13_0_7_set_ppt_funcs(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
index d8f8ad0e7137..4894f7ee737b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
@@ -224,7 +224,7 @@ int smu_v14_0_check_fw_version(struct smu_context *smu)
 	if (smu->is_apu)
 		adev->pm.fw_version = smu_version;
 
-	switch (adev->ip_versions[MP1_HWIP][0]) {
+	switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
 	case IP_VERSION(14, 0, 2):
 		smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_SMU_V14_0_2;
 		break;
@@ -235,7 +235,7 @@ int smu_v14_0_check_fw_version(struct smu_context *smu)
 		break;
 	default:
 		dev_err(adev->dev, "smu unsupported IP version: 0x%x.\n",
-			adev->ip_versions[MP1_HWIP][0]);
+			amdgpu_ip_version(adev, MP1_HWIP, 0));
 		smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_INV;
 		break;
 	}
@@ -733,7 +733,7 @@ int smu_v14_0_gfx_off_control(struct smu_context *smu, bool enable)
 	int ret = 0;
 	struct amdgpu_device *adev = smu->adev;
 
-	switch (adev->ip_versions[MP1_HWIP][0]) {
+	switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
 	case IP_VERSION(14, 0, 2):
 	case IP_VERSION(14, 0, 0):
 		if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
index 03b38c3a9968..47fdbae4adfc 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
@@ -246,11 +246,20 @@ static int smu_v14_0_0_get_smu_metrics_data(struct smu_context *smu,
 		*value = 0;
 		break;
 	case METRICS_AVERAGE_UCLK:
-		*value = 0;
+		*value = metrics->MemclkFrequency;
 		break;
 	case METRICS_AVERAGE_FCLK:
 		*value = metrics->FclkFrequency;
 		break;
+	case METRICS_AVERAGE_VPECLK:
+		*value = metrics->VpeclkFrequency;
+		break;
+	case METRICS_AVERAGE_IPUCLK:
+		*value = metrics->IpuclkFrequency;
+		break;
+	case METRICS_AVERAGE_MPIPUCLK:
+		*value = metrics->MpipuclkFrequency;
+		break;
 	case METRICS_AVERAGE_GFXACTIVITY:
 		*value = metrics->GfxActivity / 100;
 		break;
@@ -270,8 +279,26 @@ static int smu_v14_0_0_get_smu_metrics_data(struct smu_context *smu,
 		*value = metrics->SocTemperature / 100 *
 		SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
 		break;
-	case METRICS_THROTTLER_STATUS:
-		*value = 0;
+	case METRICS_THROTTLER_RESIDENCY_PROCHOT:
+		*value = metrics->ThrottleResidency_PROCHOT;
+		break;
+	case METRICS_THROTTLER_RESIDENCY_SPL:
+		*value = metrics->ThrottleResidency_SPL;
+		break;
+	case METRICS_THROTTLER_RESIDENCY_FPPT:
+		*value = metrics->ThrottleResidency_FPPT;
+		break;
+	case METRICS_THROTTLER_RESIDENCY_SPPT:
+		*value = metrics->ThrottleResidency_SPPT;
+		break;
+	case METRICS_THROTTLER_RESIDENCY_THM_CORE:
+		*value = metrics->ThrottleResidency_THM_CORE;
+		break;
+	case METRICS_THROTTLER_RESIDENCY_THM_GFX:
+		*value = metrics->ThrottleResidency_THM_GFX;
+		break;
+	case METRICS_THROTTLER_RESIDENCY_THM_SOC:
+		*value = metrics->ThrottleResidency_THM_SOC;
 		break;
 	case METRICS_VOLTAGE_VDDGFX:
 		*value = 0;
@@ -498,6 +525,8 @@ static ssize_t smu_v14_0_0_get_gpu_metrics(struct smu_context *smu,
 		sizeof(uint16_t) * 16);
 	gpu_metrics->average_dram_reads = metrics.DRAMReads;
 	gpu_metrics->average_dram_writes = metrics.DRAMWrites;
+	gpu_metrics->average_ipu_reads = metrics.IpuReads;
+	gpu_metrics->average_ipu_writes = metrics.IpuWrites;
 
 	gpu_metrics->average_socket_power = metrics.SocketPower;
 	gpu_metrics->average_ipu_power = metrics.IpuPower;
@@ -505,6 +534,7 @@ static ssize_t smu_v14_0_0_get_gpu_metrics(struct smu_context *smu,
 	gpu_metrics->average_gfx_power = metrics.GfxPower;
 	gpu_metrics->average_dgpu_power = metrics.dGpuPower;
 	gpu_metrics->average_all_core_power = metrics.AllCorePower;
+	gpu_metrics->average_sys_power = metrics.Psys;
 	memcpy(&gpu_metrics->average_core_power[0],
 		&metrics.CorePower[0],
 		sizeof(uint16_t) * 16);
@@ -515,6 +545,8 @@ static ssize_t smu_v14_0_0_get_gpu_metrics(struct smu_context *smu,
 	gpu_metrics->average_fclk_frequency = metrics.FclkFrequency;
 	gpu_metrics->average_vclk_frequency = metrics.VclkFrequency;
 	gpu_metrics->average_ipuclk_frequency = metrics.IpuclkFrequency;
+	gpu_metrics->average_uclk_frequency = metrics.MemclkFrequency;
+	gpu_metrics->average_mpipu_frequency = metrics.MpipuclkFrequency;
 
 	memcpy(&gpu_metrics->current_coreclk[0],
 		&metrics.CoreFrequency[0],
@@ -522,6 +554,14 @@ static ssize_t smu_v14_0_0_get_gpu_metrics(struct smu_context *smu,
 	gpu_metrics->current_core_maxfreq = metrics.InfrastructureCpuMaxFreq;
 	gpu_metrics->current_gfx_maxfreq = metrics.InfrastructureGfxMaxFreq;
 
+	gpu_metrics->throttle_residency_prochot = metrics.ThrottleResidency_PROCHOT;
+	gpu_metrics->throttle_residency_spl = metrics.ThrottleResidency_SPL;
+	gpu_metrics->throttle_residency_fppt = metrics.ThrottleResidency_FPPT;
+	gpu_metrics->throttle_residency_sppt = metrics.ThrottleResidency_SPPT;
+	gpu_metrics->throttle_residency_thm_core = metrics.ThrottleResidency_THM_CORE;
+	gpu_metrics->throttle_residency_thm_gfx = metrics.ThrottleResidency_THM_GFX;
+	gpu_metrics->throttle_residency_thm_soc = metrics.ThrottleResidency_THM_SOC;
+
 	gpu_metrics->time_filter_alphavalue = metrics.FilterAlphaValue;
 	gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
 
@@ -1045,6 +1085,25 @@ static int smu_v14_0_0_set_umsch_mm_enable(struct smu_context *smu,
 					       0, NULL);
 }
 
+static int smu_14_0_0_get_dpm_table(struct smu_context *smu, struct dpm_clocks *clock_table)
+{
+	DpmClocks_t *clk_table = smu->smu_table.clocks_table;
+	uint8_t idx;
+
+	/* Only the Clock information of SOC and VPE is copied to provide VPE DPM settings for use. */
+	for (idx = 0; idx < NUM_SOCCLK_DPM_LEVELS; idx++) {
+		clock_table->SocClocks[idx].Freq = (idx < clk_table->NumSocClkLevelsEnabled) ? clk_table->SocClocks[idx]:0;
+		clock_table->SocClocks[idx].Vol = 0;
+	}
+
+	for (idx = 0; idx < NUM_VPE_DPM_LEVELS; idx++) {
+		clock_table->VPEClocks[idx].Freq = (idx < clk_table->VpeClkLevelsEnabled) ? clk_table->VPEClocks[idx]:0;
+		clock_table->VPEClocks[idx].Vol = 0;
+	}
+
+	return 0;
+}
+
 static const struct pptable_funcs smu_v14_0_0_ppt_funcs = {
 	.check_fw_status = smu_v14_0_check_fw_status,
 	.check_fw_version = smu_v14_0_check_fw_version,
@@ -1075,6 +1134,7 @@ static const struct pptable_funcs smu_v14_0_0_ppt_funcs = {
 	.set_gfx_power_up_by_imu = smu_v14_0_set_gfx_power_up_by_imu,
 	.dpm_set_vpe_enable = smu_v14_0_0_set_vpe_enable,
 	.dpm_set_umsch_mm_enable = smu_v14_0_0_set_umsch_mm_enable,
+	.get_dpm_clock_table = smu_14_0_0_get_dpm_table,
 };
 
 static void smu_v14_0_0_set_smu_mailbox_registers(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
index 001a5cf09657..00cd615bbcdc 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
@@ -989,6 +989,9 @@ void smu_cmn_init_soft_gpu_metrics(void *table, uint8_t frev, uint8_t crev)
 	case METRICS_VERSION(1, 4):
 		structure_size = sizeof(struct gpu_metrics_v1_4);
 		break;
+	case METRICS_VERSION(1, 5):
+		structure_size = sizeof(struct gpu_metrics_v1_5);
+		break;
 	case METRICS_VERSION(2, 0):
 		structure_size = sizeof(struct gpu_metrics_v2_0);
 		break;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
index 80b3c3efc006..6f4d212607d7 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
@@ -97,6 +97,10 @@
 #define smu_get_default_config_table_settings(smu, config_table)	smu_ppt_funcs(get_default_config_table_settings, -EOPNOTSUPP, smu, config_table)
 #define smu_set_config_table(smu, config_table)				smu_ppt_funcs(set_config_table, -EOPNOTSUPP, smu, config_table)
 #define smu_init_pptable_microcode(smu)					smu_ppt_funcs(init_pptable_microcode, 0, smu)
+#define smu_notify_rlc_state(smu, en)					smu_ppt_funcs(notify_rlc_state, 0, smu, en)
+#define smu_is_asic_wbrf_supported(smu)			smu_ppt_funcs(is_asic_wbrf_supported, false, smu)
+#define smu_enable_uclk_shadow(smu, enable)		smu_ppt_funcs(enable_uclk_shadow, 0, smu, enable)
+#define smu_set_wbrf_exclusion_ranges(smu, freq_band_range)		smu_ppt_funcs(set_wbrf_exclusion_ranges, -EOPNOTSUPP, smu, freq_band_range)
 
 #endif
 #endif
diff --git a/drivers/gpu/drm/arm/malidp_crtc.c b/drivers/gpu/drm/arm/malidp_crtc.c
index dc01c43f6193..d72c22dcf685 100644
--- a/drivers/gpu/drm/arm/malidp_crtc.c
+++ b/drivers/gpu/drm/arm/malidp_crtc.c
@@ -221,7 +221,7 @@ static int malidp_crtc_atomic_check_ctm(struct drm_crtc *crtc,
 
 	/*
 	 * The size of the ctm is checked in
-	 * drm_atomic_replace_property_blob_from_id.
+	 * drm_property_replace_blob_from_id.
 	 */
 	ctm = (struct drm_color_ctm *)state->ctm->data;
 	for (i = 0; i < ARRAY_SIZE(ctm->matrix); ++i) {
diff --git a/drivers/gpu/drm/ast/ast_i2c.c b/drivers/gpu/drm/ast/ast_i2c.c
index 0e845e7acd9b..e5d3f7121de4 100644
--- a/drivers/gpu/drm/ast/ast_i2c.c
+++ b/drivers/gpu/drm/ast/ast_i2c.c
@@ -120,7 +120,6 @@ struct ast_i2c_chan *ast_i2c_create(struct drm_device *dev)
 		return NULL;
 
 	i2c->adapter.owner = THIS_MODULE;
-	i2c->adapter.class = I2C_CLASS_DDC;
 	i2c->adapter.dev.parent = dev->dev;
 	i2c->dev = dev;
 	i2c_set_adapdata(&i2c->adapter, i2c);
diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig
index 19d2dc05c397..efd996f6c138 100644
--- a/drivers/gpu/drm/bridge/Kconfig
+++ b/drivers/gpu/drm/bridge/Kconfig
@@ -330,6 +330,7 @@ config DRM_TOSHIBA_TC358768
 	select REGMAP_I2C
 	select DRM_PANEL
 	select DRM_MIPI_DSI
+	select VIDEOMODE_HELPERS
 	help
 	  Toshiba TC358768AXBG/TC358778XBG DSI bridge chip driver.
 
diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.c b/drivers/gpu/drm/bridge/analogix/anx7625.c
index ef31033439bc..29d91493b101 100644
--- a/drivers/gpu/drm/bridge/analogix/anx7625.c
+++ b/drivers/gpu/drm/bridge/analogix/anx7625.c
@@ -1762,6 +1762,7 @@ static ssize_t anx7625_aux_transfer(struct drm_dp_aux *aux,
 	u8 request = msg->request & ~DP_AUX_I2C_MOT;
 	int ret = 0;
 
+	mutex_lock(&ctx->aux_lock);
 	pm_runtime_get_sync(dev);
 	msg->reply = 0;
 	switch (request) {
@@ -1778,6 +1779,7 @@ static ssize_t anx7625_aux_transfer(struct drm_dp_aux *aux,
 					msg->size, msg->buffer);
 	pm_runtime_mark_last_busy(dev);
 	pm_runtime_put_autosuspend(dev);
+	mutex_unlock(&ctx->aux_lock);
 
 	return ret;
 }
@@ -2474,7 +2476,9 @@ static void anx7625_bridge_atomic_disable(struct drm_bridge *bridge,
 	ctx->connector = NULL;
 	anx7625_dp_stop(ctx);
 
-	pm_runtime_put_sync(dev);
+	mutex_lock(&ctx->aux_lock);
+	pm_runtime_put_sync_suspend(dev);
+	mutex_unlock(&ctx->aux_lock);
 }
 
 static enum drm_connector_status
@@ -2668,6 +2672,7 @@ static int anx7625_i2c_probe(struct i2c_client *client)
 
 	mutex_init(&platform->lock);
 	mutex_init(&platform->hdcp_wq_lock);
+	mutex_init(&platform->aux_lock);
 
 	INIT_DELAYED_WORK(&platform->hdcp_work, hdcp_check_work_func);
 	platform->hdcp_workqueue = create_workqueue("hdcp workqueue");
diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.h b/drivers/gpu/drm/bridge/analogix/anx7625.h
index 66ebee7f3d83..39ed35d33836 100644
--- a/drivers/gpu/drm/bridge/analogix/anx7625.h
+++ b/drivers/gpu/drm/bridge/analogix/anx7625.h
@@ -475,6 +475,8 @@ struct anx7625_data {
 	struct workqueue_struct *hdcp_workqueue;
 	/* Lock for hdcp work queue */
 	struct mutex hdcp_wq_lock;
+	/* Lock for aux transfer and disable */
+	struct mutex aux_lock;
 	char edid_block;
 	struct display_timing dt;
 	u8 display_timing_valid;
diff --git a/drivers/gpu/drm/bridge/aux-bridge.c b/drivers/gpu/drm/bridge/aux-bridge.c
index 49d7c2ab1ecc..b29980f95379 100644
--- a/drivers/gpu/drm/bridge/aux-bridge.c
+++ b/drivers/gpu/drm/bridge/aux-bridge.c
@@ -6,6 +6,7 @@
  */
 #include <linux/auxiliary_bus.h>
 #include <linux/module.h>
+#include <linux/of.h>
 
 #include <drm/drm_bridge.h>
 #include <drm/bridge/aux-bridge.h>
@@ -57,7 +58,7 @@ int drm_aux_bridge_register(struct device *parent)
 	adev->id = ret;
 	adev->name = "aux_bridge";
 	adev->dev.parent = parent;
-	adev->dev.of_node = parent->of_node;
+	adev->dev.of_node = of_node_get(parent->of_node);
 	adev->dev.release = drm_aux_bridge_release;
 
 	ret = auxiliary_device_init(adev);
diff --git a/drivers/gpu/drm/bridge/aux-hpd-bridge.c b/drivers/gpu/drm/bridge/aux-hpd-bridge.c
index 1999a053d59b..6886db2d9e00 100644
--- a/drivers/gpu/drm/bridge/aux-hpd-bridge.c
+++ b/drivers/gpu/drm/bridge/aux-hpd-bridge.c
@@ -25,20 +25,18 @@ static void drm_aux_hpd_bridge_release(struct device *dev)
 	ida_free(&drm_aux_hpd_bridge_ida, adev->id);
 
 	of_node_put(adev->dev.platform_data);
+	of_node_put(adev->dev.of_node);
 
 	kfree(adev);
 }
 
-static void drm_aux_hpd_bridge_unregister_adev(void *_adev)
+static void drm_aux_hpd_bridge_free_adev(void *_adev)
 {
-	struct auxiliary_device *adev = _adev;
-
-	auxiliary_device_delete(adev);
-	auxiliary_device_uninit(adev);
+	auxiliary_device_uninit(_adev);
 }
 
 /**
- * drm_dp_hpd_bridge_register - Create a simple HPD DisplayPort bridge
+ * devm_drm_dp_hpd_bridge_alloc - allocate a HPD DisplayPort bridge
  * @parent: device instance providing this bridge
  * @np: device node pointer corresponding to this bridge instance
  *
@@ -46,11 +44,9 @@ static void drm_aux_hpd_bridge_unregister_adev(void *_adev)
  * DRM_MODE_CONNECTOR_DisplayPort, which terminates the bridge chain and is
  * able to send the HPD events.
  *
- * Return: device instance that will handle created bridge or an error code
- * encoded into the pointer.
+ * Return: bridge auxiliary device pointer or an error pointer
  */
-struct device *drm_dp_hpd_bridge_register(struct device *parent,
-					  struct device_node *np)
+struct auxiliary_device *devm_drm_dp_hpd_bridge_alloc(struct device *parent, struct device_node *np)
 {
 	struct auxiliary_device *adev;
 	int ret;
@@ -68,24 +64,68 @@ struct device *drm_dp_hpd_bridge_register(struct device *parent,
 	adev->id = ret;
 	adev->name = "dp_hpd_bridge";
 	adev->dev.parent = parent;
-	adev->dev.of_node = parent->of_node;
+	adev->dev.of_node = of_node_get(parent->of_node);
 	adev->dev.release = drm_aux_hpd_bridge_release;
-	adev->dev.platform_data = np;
+	adev->dev.platform_data = of_node_get(np);
 
 	ret = auxiliary_device_init(adev);
 	if (ret) {
+		of_node_put(adev->dev.platform_data);
+		of_node_put(adev->dev.of_node);
 		ida_free(&drm_aux_hpd_bridge_ida, adev->id);
 		kfree(adev);
 		return ERR_PTR(ret);
 	}
 
-	ret = auxiliary_device_add(adev);
-	if (ret) {
-		auxiliary_device_uninit(adev);
+	ret = devm_add_action_or_reset(parent, drm_aux_hpd_bridge_free_adev, adev);
+	if (ret)
 		return ERR_PTR(ret);
-	}
 
-	ret = devm_add_action_or_reset(parent, drm_aux_hpd_bridge_unregister_adev, adev);
+	return adev;
+}
+EXPORT_SYMBOL_GPL(devm_drm_dp_hpd_bridge_alloc);
+
+static void drm_aux_hpd_bridge_del_adev(void *_adev)
+{
+	auxiliary_device_delete(_adev);
+}
+
+/**
+ * devm_drm_dp_hpd_bridge_add - register a HDP DisplayPort bridge
+ * @dev: struct device to tie registration lifetime to
+ * @adev: bridge auxiliary device to be registered
+ *
+ * Returns: zero on success or a negative errno
+ */
+int devm_drm_dp_hpd_bridge_add(struct device *dev, struct auxiliary_device *adev)
+{
+	int ret;
+
+	ret = auxiliary_device_add(adev);
+	if (ret)
+		return ret;
+
+	return devm_add_action_or_reset(dev, drm_aux_hpd_bridge_del_adev, adev);
+}
+EXPORT_SYMBOL_GPL(devm_drm_dp_hpd_bridge_add);
+
+/**
+ * drm_dp_hpd_bridge_register - allocate and register a HDP DisplayPort bridge
+ * @parent: device instance providing this bridge
+ * @np: device node pointer corresponding to this bridge instance
+ *
+ * Return: device instance that will handle created bridge or an error pointer
+ */
+struct device *drm_dp_hpd_bridge_register(struct device *parent, struct device_node *np)
+{
+	struct auxiliary_device *adev;
+	int ret;
+
+	adev = devm_drm_dp_hpd_bridge_alloc(parent, np);
+	if (IS_ERR(adev))
+		return ERR_CAST(adev);
+
+	ret = devm_drm_dp_hpd_bridge_add(parent, adev);
 	if (ret)
 		return ERR_PTR(ret);
 
diff --git a/drivers/gpu/drm/bridge/panel.c b/drivers/gpu/drm/bridge/panel.c
index e48823a4f1ed..7f41525f7a6e 100644
--- a/drivers/gpu/drm/bridge/panel.c
+++ b/drivers/gpu/drm/bridge/panel.c
@@ -4,8 +4,6 @@
  * Copyright (C) 2017 Broadcom
  */
 
-#include <linux/device.h>
-
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_bridge.h>
 #include <drm/drm_connector.h>
@@ -21,7 +19,6 @@ struct panel_bridge {
 	struct drm_bridge bridge;
 	struct drm_connector connector;
 	struct drm_panel *panel;
-	struct device_link *link;
 	u32 connector_type;
 };
 
@@ -63,24 +60,13 @@ static int panel_bridge_attach(struct drm_bridge *bridge,
 {
 	struct panel_bridge *panel_bridge = drm_bridge_to_panel_bridge(bridge);
 	struct drm_connector *connector = &panel_bridge->connector;
-	struct drm_panel *panel = panel_bridge->panel;
-	struct drm_device *drm_dev = bridge->dev;
 	int ret;
 
-	panel_bridge->link = device_link_add(drm_dev->dev, panel->dev,
-					     DL_FLAG_STATELESS);
-	if (!panel_bridge->link) {
-		DRM_ERROR("Failed to add device link between %s and %s\n",
-			  dev_name(drm_dev->dev), dev_name(panel->dev));
-		return -EINVAL;
-	}
-
 	if (flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR)
 		return 0;
 
 	if (!bridge->encoder) {
 		DRM_ERROR("Missing encoder\n");
-		device_link_del(panel_bridge->link);
 		return -ENODEV;
 	}
 
@@ -92,7 +78,6 @@ static int panel_bridge_attach(struct drm_bridge *bridge,
 				 panel_bridge->connector_type);
 	if (ret) {
 		DRM_ERROR("Failed to initialize connector\n");
-		device_link_del(panel_bridge->link);
 		return ret;
 	}
 
@@ -115,8 +100,6 @@ static void panel_bridge_detach(struct drm_bridge *bridge)
 	struct panel_bridge *panel_bridge = drm_bridge_to_panel_bridge(bridge);
 	struct drm_connector *connector = &panel_bridge->connector;
 
-	device_link_del(panel_bridge->link);
-
 	/*
 	 * Cleanup the connector if we know it was initialized.
 	 *
diff --git a/drivers/gpu/drm/bridge/parade-ps8640.c b/drivers/gpu/drm/bridge/parade-ps8640.c
index 8161b1a1a4b1..14d4dcf239da 100644
--- a/drivers/gpu/drm/bridge/parade-ps8640.c
+++ b/drivers/gpu/drm/bridge/parade-ps8640.c
@@ -107,6 +107,7 @@ struct ps8640 {
 	struct device_link *link;
 	bool pre_enabled;
 	bool need_post_hpd_delay;
+	struct mutex aux_lock;
 };
 
 static const struct regmap_config ps8640_regmap_config[] = {
@@ -210,7 +211,7 @@ static ssize_t ps8640_aux_transfer_msg(struct drm_dp_aux *aux,
 	struct ps8640 *ps_bridge = aux_to_ps8640(aux);
 	struct regmap *map = ps_bridge->regmap[PAGE0_DP_CNTL];
 	struct device *dev = &ps_bridge->page[PAGE0_DP_CNTL]->dev;
-	unsigned int len = msg->size;
+	size_t len = msg->size;
 	unsigned int data;
 	unsigned int base;
 	int ret;
@@ -330,11 +331,12 @@ static ssize_t ps8640_aux_transfer_msg(struct drm_dp_aux *aux,
 				return ret;
 			}
 
-			buf[i] = data;
+			if (i < msg->size)
+				buf[i] = data;
 		}
 	}
 
-	return len;
+	return min(len, msg->size);
 }
 
 static ssize_t ps8640_aux_transfer(struct drm_dp_aux *aux,
@@ -344,11 +346,20 @@ static ssize_t ps8640_aux_transfer(struct drm_dp_aux *aux,
 	struct device *dev = &ps_bridge->page[PAGE0_DP_CNTL]->dev;
 	int ret;
 
+	mutex_lock(&ps_bridge->aux_lock);
 	pm_runtime_get_sync(dev);
+	ret = _ps8640_wait_hpd_asserted(ps_bridge, 200 * 1000);
+	if (ret) {
+		pm_runtime_put_sync_suspend(dev);
+		goto exit;
+	}
 	ret = ps8640_aux_transfer_msg(aux, msg);
 	pm_runtime_mark_last_busy(dev);
 	pm_runtime_put_autosuspend(dev);
 
+exit:
+	mutex_unlock(&ps_bridge->aux_lock);
+
 	return ret;
 }
 
@@ -469,7 +480,18 @@ static void ps8640_atomic_post_disable(struct drm_bridge *bridge,
 	ps_bridge->pre_enabled = false;
 
 	ps8640_bridge_vdo_control(ps_bridge, DISABLE);
+
+	/*
+	 * The bridge seems to expect everything to be power cycled at the
+	 * disable process, so grab a lock here to make sure
+	 * ps8640_aux_transfer() is not holding a runtime PM reference and
+	 * preventing the bridge from suspend.
+	 */
+	mutex_lock(&ps_bridge->aux_lock);
+
 	pm_runtime_put_sync_suspend(&ps_bridge->page[PAGE0_DP_CNTL]->dev);
+
+	mutex_unlock(&ps_bridge->aux_lock);
 }
 
 static int ps8640_bridge_attach(struct drm_bridge *bridge,
@@ -618,6 +640,8 @@ static int ps8640_probe(struct i2c_client *client)
 	if (!ps_bridge)
 		return -ENOMEM;
 
+	mutex_init(&ps_bridge->aux_lock);
+
 	ps_bridge->supplies[0].supply = "vdd12";
 	ps_bridge->supplies[1].supply = "vdd33";
 	ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(ps_bridge->supplies),
diff --git a/drivers/gpu/drm/bridge/samsung-dsim.c b/drivers/gpu/drm/bridge/samsung-dsim.c
index be5914caa17d..63a1a0c88be4 100644
--- a/drivers/gpu/drm/bridge/samsung-dsim.c
+++ b/drivers/gpu/drm/bridge/samsung-dsim.c
@@ -969,10 +969,6 @@ static int samsung_dsim_init_link(struct samsung_dsim *dsi)
 	reg = samsung_dsim_read(dsi, DSIM_ESCMODE_REG);
 	reg &= ~DSIM_STOP_STATE_CNT_MASK;
 	reg |= DSIM_STOP_STATE_CNT(driver_data->reg_values[STOP_STATE_CNT]);
-
-	if (!samsung_dsim_hw_is_exynos(dsi->plat_data->hw_type))
-		reg |= DSIM_FORCE_STOP_STATE;
-
 	samsung_dsim_write(dsi, DSIM_ESCMODE_REG, reg);
 
 	reg = DSIM_BTA_TIMEOUT(0xff) | DSIM_LPDR_TIMEOUT(0xffff);
@@ -1431,18 +1427,6 @@ static void samsung_dsim_disable_irq(struct samsung_dsim *dsi)
 	disable_irq(dsi->irq);
 }
 
-static void samsung_dsim_set_stop_state(struct samsung_dsim *dsi, bool enable)
-{
-	u32 reg = samsung_dsim_read(dsi, DSIM_ESCMODE_REG);
-
-	if (enable)
-		reg |= DSIM_FORCE_STOP_STATE;
-	else
-		reg &= ~DSIM_FORCE_STOP_STATE;
-
-	samsung_dsim_write(dsi, DSIM_ESCMODE_REG, reg);
-}
-
 static int samsung_dsim_init(struct samsung_dsim *dsi)
 {
 	const struct samsung_dsim_driver_data *driver_data = dsi->driver_data;
@@ -1492,9 +1476,6 @@ static void samsung_dsim_atomic_pre_enable(struct drm_bridge *bridge,
 		ret = samsung_dsim_init(dsi);
 		if (ret)
 			return;
-
-		samsung_dsim_set_display_mode(dsi);
-		samsung_dsim_set_display_enable(dsi, true);
 	}
 }
 
@@ -1503,12 +1484,8 @@ static void samsung_dsim_atomic_enable(struct drm_bridge *bridge,
 {
 	struct samsung_dsim *dsi = bridge_to_dsi(bridge);
 
-	if (samsung_dsim_hw_is_exynos(dsi->plat_data->hw_type)) {
-		samsung_dsim_set_display_mode(dsi);
-		samsung_dsim_set_display_enable(dsi, true);
-	} else {
-		samsung_dsim_set_stop_state(dsi, false);
-	}
+	samsung_dsim_set_display_mode(dsi);
+	samsung_dsim_set_display_enable(dsi, true);
 
 	dsi->state |= DSIM_STATE_VIDOUT_AVAILABLE;
 }
@@ -1521,9 +1498,6 @@ static void samsung_dsim_atomic_disable(struct drm_bridge *bridge,
 	if (!(dsi->state & DSIM_STATE_ENABLED))
 		return;
 
-	if (!samsung_dsim_hw_is_exynos(dsi->plat_data->hw_type))
-		samsung_dsim_set_stop_state(dsi, true);
-
 	dsi->state &= ~DSIM_STATE_VIDOUT_AVAILABLE;
 }
 
@@ -1828,8 +1802,6 @@ static ssize_t samsung_dsim_host_transfer(struct mipi_dsi_host *host,
 	if (ret)
 		return ret;
 
-	samsung_dsim_set_stop_state(dsi, false);
-
 	ret = mipi_dsi_create_packet(&xfer.packet, msg);
 	if (ret < 0)
 		return ret;
diff --git a/drivers/gpu/drm/bridge/sii902x.c b/drivers/gpu/drm/bridge/sii902x.c
index 2bdc5b439beb..4560ae9cbce1 100644
--- a/drivers/gpu/drm/bridge/sii902x.c
+++ b/drivers/gpu/drm/bridge/sii902x.c
@@ -1080,6 +1080,26 @@ static int sii902x_init(struct sii902x *sii902x)
 			return ret;
 	}
 
+	ret = sii902x_audio_codec_init(sii902x, dev);
+	if (ret)
+		return ret;
+
+	i2c_set_clientdata(sii902x->i2c, sii902x);
+
+	sii902x->i2cmux = i2c_mux_alloc(sii902x->i2c->adapter, dev,
+					1, 0, I2C_MUX_GATE,
+					sii902x_i2c_bypass_select,
+					sii902x_i2c_bypass_deselect);
+	if (!sii902x->i2cmux) {
+		ret = -ENOMEM;
+		goto err_unreg_audio;
+	}
+
+	sii902x->i2cmux->priv = sii902x;
+	ret = i2c_mux_add_adapter(sii902x->i2cmux, 0, 0, 0);
+	if (ret)
+		goto err_unreg_audio;
+
 	sii902x->bridge.funcs = &sii902x_bridge_funcs;
 	sii902x->bridge.of_node = dev->of_node;
 	sii902x->bridge.timings = &default_sii902x_timings;
@@ -1090,19 +1110,13 @@ static int sii902x_init(struct sii902x *sii902x)
 
 	drm_bridge_add(&sii902x->bridge);
 
-	sii902x_audio_codec_init(sii902x, dev);
-
-	i2c_set_clientdata(sii902x->i2c, sii902x);
+	return 0;
 
-	sii902x->i2cmux = i2c_mux_alloc(sii902x->i2c->adapter, dev,
-					1, 0, I2C_MUX_GATE,
-					sii902x_i2c_bypass_select,
-					sii902x_i2c_bypass_deselect);
-	if (!sii902x->i2cmux)
-		return -ENOMEM;
+err_unreg_audio:
+	if (!PTR_ERR_OR_ZERO(sii902x->audio.pdev))
+		platform_device_unregister(sii902x->audio.pdev);
 
-	sii902x->i2cmux->priv = sii902x;
-	return i2c_mux_add_adapter(sii902x->i2cmux, 0, 0, 0);
+	return ret;
 }
 
 static int sii902x_probe(struct i2c_client *client)
@@ -1170,12 +1184,14 @@ static int sii902x_probe(struct i2c_client *client)
 }
 
 static void sii902x_remove(struct i2c_client *client)
-
 {
 	struct sii902x *sii902x = i2c_get_clientdata(client);
 
-	i2c_mux_del_adapters(sii902x->i2cmux);
 	drm_bridge_remove(&sii902x->bridge);
+	i2c_mux_del_adapters(sii902x->i2cmux);
+
+	if (!PTR_ERR_OR_ZERO(sii902x->audio.pdev))
+		platform_device_unregister(sii902x->audio.pdev);
 }
 
 static const struct of_device_id sii902x_dt_ids[] = {
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
index 52d91a0df85e..aca5bb0866f8 100644
--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
@@ -515,7 +515,6 @@ static struct i2c_adapter *dw_hdmi_i2c_adapter(struct dw_hdmi *hdmi)
 	init_completion(&i2c->cmp);
 
 	adap = &i2c->adap;
-	adap->class = I2C_CLASS_DDC;
 	adap->owner = THIS_MODULE;
 	adap->dev.parent = hdmi->dev;
 	adap->algo = &dw_hdmi_algorithm;
diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
index 5b8e1dfc458d..62cc3893dca5 100644
--- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c
+++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
@@ -527,6 +527,7 @@ static ssize_t ti_sn_aux_transfer(struct drm_dp_aux *aux,
 	u32 request_val = AUX_CMD_REQ(msg->request);
 	u8 *buf = msg->buffer;
 	unsigned int len = msg->size;
+	unsigned int short_len;
 	unsigned int val;
 	int ret;
 	u8 addr_len[SN_AUX_LENGTH_REG + 1 - SN_AUX_ADDR_19_16_REG];
@@ -600,7 +601,8 @@ static ssize_t ti_sn_aux_transfer(struct drm_dp_aux *aux,
 	}
 
 	if (val & AUX_IRQ_STATUS_AUX_SHORT) {
-		ret = regmap_read(pdata->regmap, SN_AUX_LENGTH_REG, &len);
+		ret = regmap_read(pdata->regmap, SN_AUX_LENGTH_REG, &short_len);
+		len = min(len, short_len);
 		if (ret)
 			goto exit;
 	} else if (val & AUX_IRQ_STATUS_NAT_I2C_FAIL) {
@@ -1413,7 +1415,7 @@ static int ti_sn_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 	int ret;
 
 	if (!pdata->pwm_enabled) {
-		ret = pm_runtime_resume_and_get(pdata->dev);
+		ret = pm_runtime_resume_and_get(chip->dev);
 		if (ret < 0)
 			return ret;
 	}
@@ -1429,7 +1431,7 @@ static int ti_sn_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 						 SN_GPIO_MUX_MASK << (2 * SN_PWM_GPIO_IDX),
 						 SN_GPIO_MUX_SPECIAL << (2 * SN_PWM_GPIO_IDX));
 			if (ret) {
-				dev_err(pdata->dev, "failed to mux in PWM function\n");
+				dev_err(chip->dev, "failed to mux in PWM function\n");
 				goto out;
 			}
 		}
@@ -1505,7 +1507,7 @@ static int ti_sn_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 
 		ret = regmap_write(pdata->regmap, SN_PWM_PRE_DIV_REG, pre_div);
 		if (ret) {
-			dev_err(pdata->dev, "failed to update PWM_PRE_DIV\n");
+			dev_err(chip->dev, "failed to update PWM_PRE_DIV\n");
 			goto out;
 		}
 
@@ -1517,7 +1519,7 @@ static int ti_sn_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 		     FIELD_PREP(SN_PWM_INV_MASK, state->polarity == PWM_POLARITY_INVERSED);
 	ret = regmap_write(pdata->regmap, SN_PWM_EN_INV_REG, pwm_en_inv);
 	if (ret) {
-		dev_err(pdata->dev, "failed to update PWM_EN/PWM_INV\n");
+		dev_err(chip->dev, "failed to update PWM_EN/PWM_INV\n");
 		goto out;
 	}
 
@@ -1525,7 +1527,7 @@ static int ti_sn_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 out:
 
 	if (!pdata->pwm_enabled)
-		pm_runtime_put_sync(pdata->dev);
+		pm_runtime_put_sync(chip->dev);
 
 	return ret;
 }
@@ -1585,12 +1587,14 @@ static int ti_sn_pwm_probe(struct auxiliary_device *adev,
 {
 	struct ti_sn65dsi86 *pdata = dev_get_drvdata(adev->dev.parent);
 
-	pdata->pchip.dev = pdata->dev;
+	pdata->pchip.dev = &adev->dev;
 	pdata->pchip.ops = &ti_sn_pwm_ops;
 	pdata->pchip.npwm = 1;
 	pdata->pchip.of_xlate = of_pwm_single_xlate;
 	pdata->pchip.of_pwm_n_cells = 1;
 
+	devm_pm_runtime_enable(&adev->dev);
+
 	return pwmchip_add(&pdata->pchip);
 }
 
@@ -1601,7 +1605,7 @@ static void ti_sn_pwm_remove(struct auxiliary_device *adev)
 	pwmchip_remove(&pdata->pchip);
 
 	if (pdata->pwm_enabled)
-		pm_runtime_put_sync(pdata->dev);
+		pm_runtime_put_sync(&adev->dev);
 }
 
 static const struct auxiliary_device_id ti_sn_pwm_id_table[] = {
diff --git a/drivers/gpu/drm/ci/arm64.config b/drivers/gpu/drm/ci/arm64.config
index b4f653417883..8dbce9919a57 100644
--- a/drivers/gpu/drm/ci/arm64.config
+++ b/drivers/gpu/drm/ci/arm64.config
@@ -186,6 +186,7 @@ CONFIG_HW_RANDOM_MTK=y
 CONFIG_MTK_DEVAPC=y
 CONFIG_PWM_MTK_DISP=y
 CONFIG_MTK_CMDQ=y
+CONFIG_REGULATOR_DA9211=y
 
 # For nouveau.  Note that DRM must be a module so that it's loaded after NFS is up to provide the firmware.
 CONFIG_ARCH_TEGRA=y
diff --git a/drivers/gpu/drm/ci/build.sh b/drivers/gpu/drm/ci/build.sh
index ca2923ed2290..f73f3471e94e 100644
--- a/drivers/gpu/drm/ci/build.sh
+++ b/drivers/gpu/drm/ci/build.sh
@@ -19,7 +19,7 @@ if [[ "$KERNEL_ARCH" = "arm64" ]]; then
     DEVICE_TREES+=" arch/arm64/boot/dts/amlogic/meson-gxl-s805x-libretech-ac.dtb"
     DEVICE_TREES+=" arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dtb"
     DEVICE_TREES+=" arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dtb"
-    DEVICE_TREES+=" arch/arm64/boot/dts/qcom/apq8016-sbc.dtb"
+    DEVICE_TREES+=" arch/arm64/boot/dts/qcom/apq8016-sbc-usb-host.dtb"
     DEVICE_TREES+=" arch/arm64/boot/dts/qcom/apq8096-db820c.dtb"
     DEVICE_TREES+=" arch/arm64/boot/dts/amlogic/meson-g12b-a311d-khadas-vim3.dtb"
     DEVICE_TREES+=" arch/arm64/boot/dts/mediatek/mt8173-elm-hana.dtb"
@@ -78,19 +78,19 @@ else
     fi
 fi
 
-for opt in $ENABLE_KCONFIGS; do
-  echo CONFIG_$opt=y >> drivers/gpu/drm/ci/${KERNEL_ARCH}.config
-done
-for opt in $DISABLE_KCONFIGS; do
-  echo CONFIG_$opt=n >> drivers/gpu/drm/ci/${KERNEL_ARCH}.config
-done
-
 if [[ -n "${MERGE_FRAGMENT}" ]]; then
     ./scripts/kconfig/merge_config.sh ${DEFCONFIG} drivers/gpu/drm/ci/${MERGE_FRAGMENT}
 else
     make `basename ${DEFCONFIG}`
 fi
 
+for opt in $ENABLE_KCONFIGS; do
+    ./scripts/config --enable CONFIG_$opt
+done
+for opt in $DISABLE_KCONFIGS; do
+    ./scripts/config --disable CONFIG_$opt
+done
+
 make ${KERNEL_IMAGE_NAME}
 
 mkdir -p /lava-files/
diff --git a/drivers/gpu/drm/ci/gitlab-ci.yml b/drivers/gpu/drm/ci/gitlab-ci.yml
index aeb9bab1b069..dac92cc2777c 100644
--- a/drivers/gpu/drm/ci/gitlab-ci.yml
+++ b/drivers/gpu/drm/ci/gitlab-ci.yml
@@ -5,7 +5,7 @@ variables:
   UPSTREAM_REPO: git://anongit.freedesktop.org/drm/drm
   TARGET_BRANCH: drm-next
 
-  IGT_VERSION: d1db7333d9c5fbbb05e50b0804123950d9dc1c46
+  IGT_VERSION: d2af13d9f5be5ce23d996e4afd3e45990f5ab977
 
   DEQP_RUNNER_GIT_URL: https://gitlab.freedesktop.org/anholt/deqp-runner.git
   DEQP_RUNNER_GIT_TAG: v0.15.0
diff --git a/drivers/gpu/drm/ci/igt_runner.sh b/drivers/gpu/drm/ci/igt_runner.sh
index 2f815ee3a8a3..f1a08b9b146f 100755
--- a/drivers/gpu/drm/ci/igt_runner.sh
+++ b/drivers/gpu/drm/ci/igt_runner.sh
@@ -15,15 +15,21 @@ cat /sys/kernel/debug/device_component/*
 '
 
 # Dump drm state to confirm that kernel was able to find a connected display:
-# TODO this path might not exist for all drivers.. maybe run modetest instead?
 set +e
 cat /sys/kernel/debug/dri/*/state
 set -e
 
 case "$DRIVER_NAME" in
-    rockchip|mediatek|meson)
+    rockchip|meson)
         export IGT_FORCE_DRIVER="panfrost"
         ;;
+    mediatek)
+        if [ "$GPU_VERSION" = "mt8173" ]; then
+            export IGT_FORCE_DRIVER=${DRIVER_NAME}
+        elif [ "$GPU_VERSION" = "mt8183" ]; then
+            export IGT_FORCE_DRIVER="panfrost"
+        fi
+        ;;
     amdgpu)
         # Cannot use HWCI_KERNEL_MODULES as at that point we don't have the module in /lib
         mv /install/modules/lib/modules/* /lib/modules/.
diff --git a/drivers/gpu/drm/ci/test.yml b/drivers/gpu/drm/ci/test.yml
index f285ed67eb3d..2c9a1838e728 100644
--- a/drivers/gpu/drm/ci/test.yml
+++ b/drivers/gpu/drm/ci/test.yml
@@ -102,15 +102,12 @@ msm:apq8016:
   stage: msm
   variables:
     DRIVER_NAME: msm
-    BM_DTB: https://${PIPELINE_ARTIFACTS_BASE}/arm64/apq8016-sbc.dtb
+    BM_DTB: https://${PIPELINE_ARTIFACTS_BASE}/arm64/apq8016-sbc-usb-host.dtb
     GPU_VERSION: apq8016
     BM_CMDLINE: "ip=dhcp console=ttyMSM0,115200n8 $BM_KERNEL_EXTRA_ARGS root=/dev/nfs rw nfsrootdebug nfsroot=,tcp,nfsvers=4.2 init=/init $BM_KERNELARGS"
     RUNNER_TAG: google-freedreno-db410c
   script:
     - ./install/bare-metal/fastboot.sh
-  rules:
-    # TODO: current issue: it is not fiding the NFS root. Fix and remove this rule.
-    - when: never
 
 msm:apq8096:
   extends:
@@ -280,9 +277,6 @@ mediatek:mt8173:
     DEVICE_TYPE: mt8173-elm-hana
     GPU_VERSION: mt8173
     RUNNER_TAG: mesa-ci-x86-64-lava-mt8173-elm-hana
-  rules:
-    # TODO: current issue: device is hanging. Fix and remove this rule.
-    - when: never
 
 mediatek:mt8183:
   extends:
@@ -335,11 +329,10 @@ virtio_gpu:none:
   script:
     - ln -sf $CI_PROJECT_DIR/install /install
     - mv install/bzImage /lava-files/bzImage
+    - mkdir -p $CI_PROJECT_DIR/results
+    - ln -sf $CI_PROJECT_DIR/results /results
     - install/crosvm-runner.sh install/igt_runner.sh
   needs:
     - debian/x86_64_test-gl
     - testing:x86_64
     - igt:x86_64
-  rules:
-    # TODO: current issue: malloc(): corrupted top size. Fix and remove this rule.
-    - when: never
-\ No newline at end of file
diff --git a/drivers/gpu/drm/ci/xfails/mediatek-mt8173-fails.txt b/drivers/gpu/drm/ci/xfails/mediatek-mt8173-fails.txt
index 671916067dba..ef0cb7c3698c 100644
--- a/drivers/gpu/drm/ci/xfails/mediatek-mt8173-fails.txt
+++ b/drivers/gpu/drm/ci/xfails/mediatek-mt8173-fails.txt
@@ -1,5 +1,4 @@
 kms_3d,Fail
-kms_addfb_basic@addfb25-bad-modifier,Fail
 kms_bw@linear-tiling-1-displays-1920x1080p,Fail
 kms_bw@linear-tiling-1-displays-2560x1440p,Fail
 kms_bw@linear-tiling-1-displays-3840x2160p,Fail
@@ -9,13 +8,19 @@ kms_bw@linear-tiling-2-displays-3840x2160p,Fail
 kms_bw@linear-tiling-3-displays-1920x1080p,Fail
 kms_bw@linear-tiling-3-displays-2560x1440p,Fail
 kms_bw@linear-tiling-3-displays-3840x2160p,Fail
+kms_color@invalid-gamma-lut-sizes,Fail
 kms_color@pipe-A-invalid-gamma-lut-sizes,Fail
 kms_color@pipe-B-invalid-gamma-lut-sizes,Fail
-kms_force_connector_basic@force-connector-state,Fail
+kms_cursor_legacy@cursor-vs-flip-atomic,Fail
+kms_cursor_legacy@cursor-vs-flip-legacy,Fail
+kms_flip@flip-vs-modeset-vs-hang,Fail
+kms_flip@flip-vs-panning-vs-hang,Fail
+kms_flip@flip-vs-suspend,Fail
+kms_flip@flip-vs-suspend-interruptible,Fail
 kms_force_connector_basic@force-edid,Fail
 kms_force_connector_basic@force-load-detect,Fail
 kms_force_connector_basic@prune-stale-modes,Fail
-kms_invalid_mode@int-max-clock,Fail
+kms_hdmi_inject@inject-4k,Fail
 kms_plane_scaling@planes-upscale-20x20,Fail
 kms_plane_scaling@planes-upscale-20x20-downscale-factor-0-25,Fail
 kms_plane_scaling@planes-upscale-20x20-downscale-factor-0-5,Fail
@@ -27,3 +32,5 @@ kms_properties@get_properties-sanity-atomic,Fail
 kms_properties@plane-properties-atomic,Fail
 kms_properties@plane-properties-legacy,Fail
 kms_rmfb@close-fd,Fail
+kms_selftest@drm_format,Timeout
+kms_selftest@drm_format_helper,Timeout
diff --git a/drivers/gpu/drm/ci/xfails/msm-apq8016-fails.txt b/drivers/gpu/drm/ci/xfails/msm-apq8016-fails.txt
index 9981682feab2..d39d254c935e 100644
--- a/drivers/gpu/drm/ci/xfails/msm-apq8016-fails.txt
+++ b/drivers/gpu/drm/ci/xfails/msm-apq8016-fails.txt
@@ -6,10 +6,15 @@ kms_cursor_legacy@all-pipes-single-bo,Fail
 kms_cursor_legacy@all-pipes-single-move,Fail
 kms_cursor_legacy@all-pipes-torture-bo,Fail
 kms_cursor_legacy@all-pipes-torture-move,Fail
+kms_cursor_legacy@forked-bo,Fail
+kms_cursor_legacy@forked-move,Fail
 kms_cursor_legacy@pipe-A-forked-bo,Fail
 kms_cursor_legacy@pipe-A-forked-move,Fail
 kms_cursor_legacy@pipe-A-single-bo,Fail
 kms_cursor_legacy@pipe-A-single-move,Fail
 kms_cursor_legacy@pipe-A-torture-bo,Fail
 kms_cursor_legacy@pipe-A-torture-move,Fail
+kms_force_connector_basic@force-edid,Fail
 kms_hdmi_inject@inject-4k,Fail
+kms_selftest@drm_format,Timeout
+kms_selftest@drm_format_helper,Timeout
diff --git a/drivers/gpu/drm/ci/xfails/virtio_gpu-none-fails.txt b/drivers/gpu/drm/ci/xfails/virtio_gpu-none-fails.txt
index 9586b2339f6f..007f21e56d89 100644
--- a/drivers/gpu/drm/ci/xfails/virtio_gpu-none-fails.txt
+++ b/drivers/gpu/drm/ci/xfails/virtio_gpu-none-fails.txt
@@ -10,6 +10,49 @@ kms_bw@linear-tiling-1-displays-3840x2160p,Fail
 kms_bw@linear-tiling-2-displays-1920x1080p,Fail
 kms_bw@linear-tiling-2-displays-2560x1440p,Fail
 kms_bw@linear-tiling-2-displays-3840x2160p,Fail
+kms_bw@linear-tiling-3-displays-1920x1080p,Fail
+kms_bw@linear-tiling-3-displays-2560x1440p,Fail
+kms_bw@linear-tiling-3-displays-3840x2160p,Fail
+kms_bw@linear-tiling-4-displays-1920x1080p,Fail
+kms_bw@linear-tiling-4-displays-2560x1440p,Fail
+kms_bw@linear-tiling-4-displays-3840x2160p,Fail
+kms_bw@linear-tiling-5-displays-1920x1080p,Fail
+kms_bw@linear-tiling-5-displays-2560x1440p,Fail
+kms_bw@linear-tiling-5-displays-3840x2160p,Fail
+kms_bw@linear-tiling-6-displays-1920x1080p,Fail
+kms_bw@linear-tiling-6-displays-2560x1440p,Fail
+kms_bw@linear-tiling-6-displays-3840x2160p,Fail
+kms_bw@linear-tiling-7-displays-1920x1080p,Fail
+kms_bw@linear-tiling-7-displays-2560x1440p,Fail
+kms_bw@linear-tiling-7-displays-3840x2160p,Fail
+kms_bw@linear-tiling-8-displays-1920x1080p,Fail
+kms_bw@linear-tiling-8-displays-2560x1440p,Fail
+kms_bw@linear-tiling-8-displays-3840x2160p,Fail
+kms_flip@absolute-wf_vblank,Fail
+kms_flip@absolute-wf_vblank-interruptible,Fail
+kms_flip@basic-flip-vs-wf_vblank,Fail
+kms_flip@blocking-absolute-wf_vblank,Fail
+kms_flip@blocking-absolute-wf_vblank-interruptible,Fail
+kms_flip@blocking-wf_vblank,Fail
+kms_flip@busy-flip,Fail
+kms_flip@dpms-vs-vblank-race,Fail
+kms_flip@dpms-vs-vblank-race-interruptible,Fail
+kms_flip@flip-vs-absolute-wf_vblank,Fail
+kms_flip@flip-vs-absolute-wf_vblank-interruptible,Fail
+kms_flip@flip-vs-blocking-wf-vblank,Fail
+kms_flip@flip-vs-expired-vblank,Fail
+kms_flip@flip-vs-expired-vblank-interruptible,Fail
+kms_flip@flip-vs-modeset-vs-hang,Fail
+kms_flip@flip-vs-panning-vs-hang,Fail
+kms_flip@flip-vs-wf_vblank-interruptible,Fail
+kms_flip@modeset-vs-vblank-race,Fail
+kms_flip@modeset-vs-vblank-race-interruptible,Fail
+kms_flip@plain-flip-fb-recreate,Fail
+kms_flip@plain-flip-fb-recreate-interruptible,Fail
+kms_flip@plain-flip-ts-check,Fail
+kms_flip@plain-flip-ts-check-interruptible,Fail
+kms_flip@wf_vblank-ts-check,Fail
+kms_flip@wf_vblank-ts-check-interruptible,Fail
 kms_invalid_mode@int-max-clock,Fail
 kms_plane_scaling@downscale-with-modifier-factor-0-25,Fail
 kms_plane_scaling@downscale-with-rotation-factor-0-25,Fail
@@ -22,6 +65,9 @@ kms_plane_scaling@upscale-with-modifier-factor-0-25,Fail
 kms_plane_scaling@upscale-with-pixel-format-20x20,Fail
 kms_plane_scaling@upscale-with-pixel-format-factor-0-25,Fail
 kms_plane_scaling@upscale-with-rotation-20x20,Fail
+kms_selftest@drm_format,Timeout
+kms_selftest@drm_format_helper,Timeout
+kms_setmode@basic,Fail
 kms_vblank@crtc-id,Fail
 kms_vblank@invalid,Fail
 kms_vblank@pipe-A-accuracy-idle,Fail
diff --git a/drivers/gpu/drm/display/drm_dp_helper.c b/drivers/gpu/drm/display/drm_dp_helper.c
index d72b6f9a352c..b1ca3a1100da 100644
--- a/drivers/gpu/drm/display/drm_dp_helper.c
+++ b/drivers/gpu/drm/display/drm_dp_helper.c
@@ -2102,7 +2102,6 @@ int drm_dp_aux_register(struct drm_dp_aux *aux)
 	if (!aux->ddc.algo)
 		drm_dp_aux_init(aux);
 
-	aux->ddc.class = I2C_CLASS_DDC;
 	aux->ddc.owner = THIS_MODULE;
 	aux->ddc.dev.parent = aux->dev;
 
diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c
index 8ca01a6bf645..f7c6b60629c2 100644
--- a/drivers/gpu/drm/display/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c
@@ -5491,6 +5491,7 @@ EXPORT_SYMBOL(drm_dp_mst_atomic_enable_dsc);
  *   - 0 if the new state is valid
  *   - %-ENOSPC, if the new state is invalid, because of BW limitation
  *         @failing_port is set to:
+ *
  *         - The non-root port where a BW limit check failed
  *           with all the ports downstream of @failing_port passing
  *           the BW limit check.
@@ -5499,6 +5500,7 @@ EXPORT_SYMBOL(drm_dp_mst_atomic_enable_dsc);
  *         - %NULL if the BW limit check failed at the root port
  *           with all the ports downstream of the root port passing
  *           the BW limit check.
+ *
  *   - %-EINVAL, if the new state is invalid, because the root port has
  *     too many payloads.
  */
@@ -5926,7 +5928,6 @@ static int drm_dp_mst_register_i2c_bus(struct drm_dp_mst_port *port)
 	aux->ddc.algo_data = aux;
 	aux->ddc.retries = 3;
 
-	aux->ddc.class = I2C_CLASS_DDC;
 	aux->ddc.owner = THIS_MODULE;
 	/* FIXME: set the kdev of the port's connector as parent */
 	aux->ddc.dev.parent = parent_dev;
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index c31fc0b48c31..a91737adf8e7 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -733,6 +733,7 @@ static void drm_atomic_plane_print_state(struct drm_printer *p,
 		   drm_get_color_encoding_name(state->color_encoding));
 	drm_printf(p, "\tcolor-range=%s\n",
 		   drm_get_color_range_name(state->color_range));
+	drm_printf(p, "\tcolor_mgmt_changed=%d\n", state->color_mgmt_changed);
 
 	if (plane->funcs->atomic_print_state)
 		plane->funcs->atomic_print_state(p, state);
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index c98a766ca3bd..39ef0a6addeb 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -2014,7 +2014,7 @@ int drm_atomic_helper_commit(struct drm_device *dev,
 			return ret;
 
 		drm_atomic_helper_async_commit(dev, state);
-		drm_atomic_helper_cleanup_planes(dev, state);
+		drm_atomic_helper_unprepare_planes(dev, state);
 
 		return 0;
 	}
@@ -2074,7 +2074,7 @@ int drm_atomic_helper_commit(struct drm_device *dev,
 	return 0;
 
 err:
-	drm_atomic_helper_cleanup_planes(dev, state);
+	drm_atomic_helper_unprepare_planes(dev, state);
 	return ret;
 }
 EXPORT_SYMBOL(drm_atomic_helper_commit);
@@ -2652,6 +2652,39 @@ fail_prepare_fb:
 }
 EXPORT_SYMBOL(drm_atomic_helper_prepare_planes);
 
+/**
+ * drm_atomic_helper_unprepare_planes - release plane resources on aborts
+ * @dev: DRM device
+ * @state: atomic state object with old state structures
+ *
+ * This function cleans up plane state, specifically framebuffers, from the
+ * atomic state. It undoes the effects of drm_atomic_helper_prepare_planes()
+ * when aborting an atomic commit. For cleaning up after a successful commit
+ * use drm_atomic_helper_cleanup_planes().
+ */
+void drm_atomic_helper_unprepare_planes(struct drm_device *dev,
+					struct drm_atomic_state *state)
+{
+	struct drm_plane *plane;
+	struct drm_plane_state *new_plane_state;
+	int i;
+
+	for_each_new_plane_in_state(state, plane, new_plane_state, i) {
+		const struct drm_plane_helper_funcs *funcs = plane->helper_private;
+
+		if (funcs->end_fb_access)
+			funcs->end_fb_access(plane, new_plane_state);
+	}
+
+	for_each_new_plane_in_state(state, plane, new_plane_state, i) {
+		const struct drm_plane_helper_funcs *funcs = plane->helper_private;
+
+		if (funcs->cleanup_fb)
+			funcs->cleanup_fb(plane, new_plane_state);
+	}
+}
+EXPORT_SYMBOL(drm_atomic_helper_unprepare_planes);
+
 static bool plane_crtc_active(const struct drm_plane_state *state)
 {
 	return state->crtc && state->crtc->state->active;
@@ -2786,6 +2819,17 @@ void drm_atomic_helper_commit_planes(struct drm_device *dev,
 
 		funcs->atomic_flush(crtc, old_state);
 	}
+
+	/*
+	 * Signal end of framebuffer access here before hw_done. After hw_done,
+	 * a later commit might have already released the plane state.
+	 */
+	for_each_old_plane_in_state(old_state, plane, old_plane_state, i) {
+		const struct drm_plane_helper_funcs *funcs = plane->helper_private;
+
+		if (funcs->end_fb_access)
+			funcs->end_fb_access(plane, old_plane_state);
+	}
 }
 EXPORT_SYMBOL(drm_atomic_helper_commit_planes);
 
@@ -2913,40 +2957,22 @@ EXPORT_SYMBOL(drm_atomic_helper_disable_planes_on_crtc);
  * configuration. Hence the old configuration must be perserved in @old_state to
  * be able to call this function.
  *
- * This function must also be called on the new state when the atomic update
- * fails at any point after calling drm_atomic_helper_prepare_planes().
+ * This function may not be called on the new state when the atomic update
+ * fails at any point after calling drm_atomic_helper_prepare_planes(). Use
+ * drm_atomic_helper_unprepare_planes() in this case.
  */
 void drm_atomic_helper_cleanup_planes(struct drm_device *dev,
 				      struct drm_atomic_state *old_state)
 {
 	struct drm_plane *plane;
-	struct drm_plane_state *old_plane_state, *new_plane_state;
+	struct drm_plane_state *old_plane_state;
 	int i;
 
-	for_each_oldnew_plane_in_state(old_state, plane, old_plane_state, new_plane_state, i) {
+	for_each_old_plane_in_state(old_state, plane, old_plane_state, i) {
 		const struct drm_plane_helper_funcs *funcs = plane->helper_private;
 
-		if (funcs->end_fb_access)
-			funcs->end_fb_access(plane, new_plane_state);
-	}
-
-	for_each_oldnew_plane_in_state(old_state, plane, old_plane_state, new_plane_state, i) {
-		const struct drm_plane_helper_funcs *funcs;
-		struct drm_plane_state *plane_state;
-
-		/*
-		 * This might be called before swapping when commit is aborted,
-		 * in which case we have to cleanup the new state.
-		 */
-		if (old_plane_state == plane->state)
-			plane_state = new_plane_state;
-		else
-			plane_state = old_plane_state;
-
-		funcs = plane->helper_private;
-
 		if (funcs->cleanup_fb)
-			funcs->cleanup_fb(plane, plane_state);
+			funcs->cleanup_fb(plane, old_plane_state);
 	}
 }
 EXPORT_SYMBOL(drm_atomic_helper_cleanup_planes);
diff --git a/drivers/gpu/drm/drm_atomic_state_helper.c b/drivers/gpu/drm/drm_atomic_state_helper.c
index 54975de44a0e..519228eb1095 100644
--- a/drivers/gpu/drm/drm_atomic_state_helper.c
+++ b/drivers/gpu/drm/drm_atomic_state_helper.c
@@ -352,6 +352,7 @@ void __drm_atomic_helper_plane_duplicate_state(struct drm_plane *plane,
 	state->fence = NULL;
 	state->commit = NULL;
 	state->fb_damage_clips = NULL;
+	state->color_mgmt_changed = false;
 }
 EXPORT_SYMBOL(__drm_atomic_helper_plane_duplicate_state);
 
diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c
index aee4a65d4959..29d4940188d4 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -362,48 +362,6 @@ static s32 __user *get_out_fence_for_connector(struct drm_atomic_state *state,
 	return fence_ptr;
 }
 
-static int
-drm_atomic_replace_property_blob_from_id(struct drm_device *dev,
-					 struct drm_property_blob **blob,
-					 uint64_t blob_id,
-					 ssize_t expected_size,
-					 ssize_t expected_elem_size,
-					 bool *replaced)
-{
-	struct drm_property_blob *new_blob = NULL;
-
-	if (blob_id != 0) {
-		new_blob = drm_property_lookup_blob(dev, blob_id);
-		if (new_blob == NULL) {
-			drm_dbg_atomic(dev,
-				       "cannot find blob ID %llu\n", blob_id);
-			return -EINVAL;
-		}
-
-		if (expected_size > 0 &&
-		    new_blob->length != expected_size) {
-			drm_dbg_atomic(dev,
-				       "[BLOB:%d] length %zu different from expected %zu\n",
-				       new_blob->base.id, new_blob->length, expected_size);
-			drm_property_blob_put(new_blob);
-			return -EINVAL;
-		}
-		if (expected_elem_size > 0 &&
-		    new_blob->length % expected_elem_size != 0) {
-			drm_dbg_atomic(dev,
-				       "[BLOB:%d] length %zu not divisible by element size %zu\n",
-				       new_blob->base.id, new_blob->length, expected_elem_size);
-			drm_property_blob_put(new_blob);
-			return -EINVAL;
-		}
-	}
-
-	*replaced |= drm_property_replace_blob(blob, new_blob);
-	drm_property_blob_put(new_blob);
-
-	return 0;
-}
-
 static int drm_atomic_crtc_set_property(struct drm_crtc *crtc,
 		struct drm_crtc_state *state, struct drm_property *property,
 		uint64_t val)
@@ -424,7 +382,7 @@ static int drm_atomic_crtc_set_property(struct drm_crtc *crtc,
 	} else if (property == config->prop_vrr_enabled) {
 		state->vrr_enabled = val;
 	} else if (property == config->degamma_lut_property) {
-		ret = drm_atomic_replace_property_blob_from_id(dev,
+		ret = drm_property_replace_blob_from_id(dev,
 					&state->degamma_lut,
 					val,
 					-1, sizeof(struct drm_color_lut),
@@ -432,7 +390,7 @@ static int drm_atomic_crtc_set_property(struct drm_crtc *crtc,
 		state->color_mgmt_changed |= replaced;
 		return ret;
 	} else if (property == config->ctm_property) {
-		ret = drm_atomic_replace_property_blob_from_id(dev,
+		ret = drm_property_replace_blob_from_id(dev,
 					&state->ctm,
 					val,
 					sizeof(struct drm_color_ctm), -1,
@@ -440,7 +398,7 @@ static int drm_atomic_crtc_set_property(struct drm_crtc *crtc,
 		state->color_mgmt_changed |= replaced;
 		return ret;
 	} else if (property == config->gamma_lut_property) {
-		ret = drm_atomic_replace_property_blob_from_id(dev,
+		ret = drm_property_replace_blob_from_id(dev,
 					&state->gamma_lut,
 					val,
 					-1, sizeof(struct drm_color_lut),
@@ -581,7 +539,7 @@ static int drm_atomic_plane_set_property(struct drm_plane *plane,
 	} else if (property == plane->color_range_property) {
 		state->color_range = val;
 	} else if (property == config->prop_fb_damage_clips) {
-		ret = drm_atomic_replace_property_blob_from_id(dev,
+		ret = drm_property_replace_blob_from_id(dev,
 					&state->fb_damage_clips,
 					val,
 					-1,
@@ -778,7 +736,7 @@ static int drm_atomic_connector_set_property(struct drm_connector *connector,
 		if (state->link_status != DRM_LINK_STATUS_GOOD)
 			state->link_status = val;
 	} else if (property == config->hdr_output_metadata_property) {
-		ret = drm_atomic_replace_property_blob_from_id(dev,
+		ret = drm_property_replace_blob_from_id(dev,
 				&state->hdr_output_metadata,
 				val,
 				sizeof(struct hdr_output_metadata), -1,
diff --git a/drivers/gpu/drm/drm_auth.c b/drivers/gpu/drm/drm_auth.c
index 252c105d614f..22aa015df387 100644
--- a/drivers/gpu/drm/drm_auth.c
+++ b/drivers/gpu/drm/drm_auth.c
@@ -234,7 +234,7 @@ static int
 drm_master_check_perm(struct drm_device *dev, struct drm_file *file_priv)
 {
 	if (file_priv->was_master &&
-	    rcu_access_pointer(file_priv->pid) == task_pid(current))
+	    rcu_access_pointer(file_priv->pid) == task_tgid(current))
 		return 0;
 
 	if (!capable(CAP_SYS_ADMIN))
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..5ebdd6f8f36e 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -332,6 +332,7 @@ alloc_range_bias(struct drm_buddy *mm,
 		 u64 start, u64 end,
 		 unsigned int order)
 {
+	u64 req_size = mm->chunk_size << order;
 	struct drm_buddy_block *block;
 	struct drm_buddy_block *buddy;
 	LIST_HEAD(dfs);
@@ -367,6 +368,15 @@ alloc_range_bias(struct drm_buddy *mm,
 		if (drm_buddy_block_is_allocated(block))
 			continue;
 
+		if (block_start < start || block_end > end) {
+			u64 adjusted_start = max(block_start, start);
+			u64 adjusted_end = min(block_end, end);
+
+			if (round_down(adjusted_end + 1, req_size) <=
+			    round_up(adjusted_start, req_size))
+				continue;
+		}
+
 		if (contains(start, end, block_start, block_end) &&
 		    order == drm_buddy_block_order(block)) {
 			/*
@@ -538,7 +548,13 @@ static int __alloc_range(struct drm_buddy *mm,
 		list_add(&block->left->tmp_link, dfs);
 	} while (1);
 
+	if (total_allocated < size) {
+		err = -ENOSPC;
+		goto err_free;
+	}
+
 	list_splice_tail(&allocated, blocks);
+
 	return 0;
 
 err_undo:
@@ -755,8 +771,12 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
 		return -EINVAL;
 
 	/* Actual range allocation */
-	if (start + size == end)
+	if (start + size == end) {
+		if (!IS_ALIGNED(start | end, min_block_size))
+			return -EINVAL;
+
 		return __drm_buddy_alloc_range(mm, start, size, NULL, blocks);
+	}
 
 	original_size = size;
 	original_min_size = min_block_size;
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index df9bf3c9206e..65f9f66933bb 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -715,8 +715,7 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data,
 	struct drm_mode_set set;
 	uint32_t __user *set_connectors_ptr;
 	struct drm_modeset_acquire_ctx ctx;
-	int ret;
-	int i;
+	int ret, i, num_connectors = 0;
 
 	if (!drm_core_check_feature(dev, DRIVER_MODESET))
 		return -EOPNOTSUPP;
@@ -871,6 +870,7 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data,
 					connector->name);
 
 			connector_set[i] = connector;
+			num_connectors++;
 		}
 	}
 
@@ -879,7 +879,7 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data,
 	set.y = crtc_req->y;
 	set.mode = mode;
 	set.connectors = connector_set;
-	set.num_connectors = crtc_req->count_connectors;
+	set.num_connectors = num_connectors;
 	set.fb = fb;
 
 	if (drm_drv_uses_atomic_modeset(dev))
@@ -892,7 +892,7 @@ out:
 		drm_framebuffer_put(fb);
 
 	if (connector_set) {
-		for (i = 0; i < crtc_req->count_connectors; i++) {
+		for (i = 0; i < num_connectors; i++) {
 			if (connector_set[i])
 				drm_connector_put(connector_set[i]);
 		}
@@ -904,6 +904,7 @@ out:
 	connector_set = NULL;
 	fb = NULL;
 	mode = NULL;
+	num_connectors = 0;
 
 	DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
 
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index cb4031d5dcbb..69c68804023f 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -2311,7 +2311,8 @@ int drm_edid_override_connector_update(struct drm_connector *connector)
 
 	override = drm_edid_override_get(connector);
 	if (override) {
-		num_modes = drm_edid_connector_update(connector, override);
+		if (drm_edid_connector_update(connector, override) == 0)
+			num_modes = drm_edid_connector_add_modes(connector);
 
 		drm_edid_free(override);
 
diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c
index 3cc0ffc28e86..888aadb6a4ac 100644
--- a/drivers/gpu/drm/drm_framebuffer.c
+++ b/drivers/gpu/drm/drm_framebuffer.c
@@ -461,6 +461,7 @@ int drm_mode_rmfb(struct drm_device *dev, u32 fb_id,
 
 		INIT_WORK_ONSTACK(&arg.work, drm_mode_rmfb_work_fn);
 		INIT_LIST_HEAD(&arg.fbs);
+		drm_WARN_ON(dev, !list_empty(&fb->filp_head));
 		list_add_tail(&fb->filp_head, &arg.fbs);
 
 		schedule_work(&arg.work);
@@ -827,6 +828,8 @@ void drm_framebuffer_free(struct kref *kref)
 			container_of(kref, struct drm_framebuffer, base.refcount);
 	struct drm_device *dev = fb->dev;
 
+	drm_WARN_ON(dev, !list_empty(&fb->filp_head));
+
 	/*
 	 * The lookup idr holds a weak reference, which has not necessarily been
 	 * removed at this point. Check for that.
@@ -1119,7 +1122,7 @@ void drm_framebuffer_remove(struct drm_framebuffer *fb)
 
 	dev = fb->dev;
 
-	WARN_ON(!list_empty(&fb->filp_head));
+	drm_WARN_ON(dev, !list_empty(&fb->filp_head));
 
 	/*
 	 * drm ABI mandates that we remove any deleted framebuffers from active
diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c
index dc8edca61764..f9eb56f24bef 100644
--- a/drivers/gpu/drm/drm_gpuvm.c
+++ b/drivers/gpu/drm/drm_gpuvm.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 OR MIT
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
 /*
  * Copyright (c) 2022 Red Hat.
  *
diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index ac9a406250c5..893f52ee4926 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -2617,8 +2617,7 @@ void drm_mode_convert_to_umode(struct drm_mode_modeinfo *out,
 		break;
 	}
 
-	strncpy(out->name, in->name, DRM_DISPLAY_MODE_LEN);
-	out->name[DRM_DISPLAY_MODE_LEN-1] = 0;
+	strscpy_pad(out->name, in->name, sizeof(out->name));
 }
 
 /**
@@ -2659,8 +2658,7 @@ int drm_mode_convert_umode(struct drm_device *dev,
 	 * useful for the kernel->userspace direction anyway.
 	 */
 	out->type = in->type & DRM_MODE_TYPE_ALL;
-	strncpy(out->name, in->name, DRM_DISPLAY_MODE_LEN);
-	out->name[DRM_DISPLAY_MODE_LEN-1] = 0;
+	strscpy_pad(out->name, in->name, sizeof(out->name));
 
 	/* Clearing picture aspect ratio bits from out flags,
 	 * as the aspect-ratio information is not stored in
diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c
index 9e8e4c60983d..672c655c7a8e 100644
--- a/drivers/gpu/drm/drm_plane.c
+++ b/drivers/gpu/drm/drm_plane.c
@@ -1503,6 +1503,7 @@ retry:
 out:
 	if (fb)
 		drm_framebuffer_put(fb);
+	fb = NULL;
 	if (plane->old_fb)
 		drm_framebuffer_put(plane->old_fb);
 	plane->old_fb = NULL;
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 63b709a67471..7352bde299d5 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -278,7 +278,7 @@ void drm_gem_dmabuf_release(struct dma_buf *dma_buf)
 }
 EXPORT_SYMBOL(drm_gem_dmabuf_release);
 
-/*
+/**
  * drm_gem_prime_fd_to_handle - PRIME import function for GEM drivers
  * @dev: drm_device to import into
  * @file_priv: drm file-private structure
@@ -292,9 +292,9 @@ EXPORT_SYMBOL(drm_gem_dmabuf_release);
  *
  * Returns 0 on success or a negative error code on failure.
  */
-static int drm_gem_prime_fd_to_handle(struct drm_device *dev,
-				      struct drm_file *file_priv, int prime_fd,
-				      uint32_t *handle)
+int drm_gem_prime_fd_to_handle(struct drm_device *dev,
+			       struct drm_file *file_priv, int prime_fd,
+			       uint32_t *handle)
 {
 	struct dma_buf *dma_buf;
 	struct drm_gem_object *obj;
@@ -360,6 +360,7 @@ out_put:
 	dma_buf_put(dma_buf);
 	return ret;
 }
+EXPORT_SYMBOL(drm_gem_prime_fd_to_handle);
 
 int drm_prime_fd_to_handle_ioctl(struct drm_device *dev, void *data,
 				 struct drm_file *file_priv)
@@ -408,7 +409,7 @@ static struct dma_buf *export_and_register_object(struct drm_device *dev,
 	return dmabuf;
 }
 
-/*
+/**
  * drm_gem_prime_handle_to_fd - PRIME export function for GEM drivers
  * @dev: dev to export the buffer from
  * @file_priv: drm file-private structure
@@ -421,10 +422,10 @@ static struct dma_buf *export_and_register_object(struct drm_device *dev,
  * The actual exporting from GEM object to a dma-buf is done through the
  * &drm_gem_object_funcs.export callback.
  */
-static int drm_gem_prime_handle_to_fd(struct drm_device *dev,
-				      struct drm_file *file_priv, uint32_t handle,
-				      uint32_t flags,
-				      int *prime_fd)
+int drm_gem_prime_handle_to_fd(struct drm_device *dev,
+			       struct drm_file *file_priv, uint32_t handle,
+			       uint32_t flags,
+			       int *prime_fd)
 {
 	struct drm_gem_object *obj;
 	int ret = 0;
@@ -506,6 +507,7 @@ out_unlock:
 
 	return ret;
 }
+EXPORT_SYMBOL(drm_gem_prime_handle_to_fd);
 
 int drm_prime_handle_to_fd_ioctl(struct drm_device *dev, void *data,
 				 struct drm_file *file_priv)
@@ -818,7 +820,7 @@ struct sg_table *drm_prime_pages_to_sg(struct drm_device *dev,
 	if (max_segment == 0)
 		max_segment = UINT_MAX;
 	err = sg_alloc_table_from_pages_segment(sg, pages, nr_pages, 0,
-						nr_pages << PAGE_SHIFT,
+						(unsigned long)nr_pages << PAGE_SHIFT,
 						max_segment, GFP_KERNEL);
 	if (err) {
 		kfree(sg);
@@ -864,9 +866,9 @@ EXPORT_SYMBOL(drm_prime_get_contiguous_size);
  * @obj: GEM object to export
  * @flags: flags like DRM_CLOEXEC and DRM_RDWR
  *
- * This is the implementation of the &drm_gem_object_funcs.export functions
- * for GEM drivers using the PRIME helpers. It is used as the default for
- * drivers that do not set their own.
+ * This is the implementation of the &drm_gem_object_funcs.export functions for GEM drivers
+ * using the PRIME helpers. It is used as the default in
+ * drm_gem_prime_handle_to_fd().
  */
 struct dma_buf *drm_gem_prime_export(struct drm_gem_object *obj,
 				     int flags)
@@ -962,9 +964,10 @@ EXPORT_SYMBOL(drm_gem_prime_import_dev);
  * @dev: drm_device to import into
  * @dma_buf: dma-buf object to import
  *
- * This is the implementation of the gem_prime_import functions for GEM
- * drivers using the PRIME helpers. It is the default for drivers that do
- * not set their own &drm_driver.gem_prime_import.
+ * This is the implementation of the gem_prime_import functions for GEM drivers
+ * using the PRIME helpers. Drivers can use this as their
+ * &drm_driver.gem_prime_import implementation. It is used as the default
+ * implementation in drm_gem_prime_fd_to_handle().
  *
  * Drivers must arrange to call drm_prime_gem_destroy() from their
  * &drm_gem_object_funcs.free hook when using this function.
diff --git a/drivers/gpu/drm/drm_property.c b/drivers/gpu/drm/drm_property.c
index dfec479830e4..596272149a35 100644
--- a/drivers/gpu/drm/drm_property.c
+++ b/drivers/gpu/drm/drm_property.c
@@ -27,6 +27,7 @@
 #include <drm/drm_drv.h>
 #include <drm/drm_file.h>
 #include <drm/drm_framebuffer.h>
+#include <drm/drm_print.h>
 #include <drm/drm_property.h>
 
 #include "drm_crtc_internal.h"
@@ -751,6 +752,64 @@ bool drm_property_replace_blob(struct drm_property_blob **blob,
 }
 EXPORT_SYMBOL(drm_property_replace_blob);
 
+/**
+ * drm_property_replace_blob_from_id - replace a blob property taking a reference
+ * @dev: DRM device
+ * @blob: a pointer to the member blob to be replaced
+ * @blob_id: the id of the new blob to replace with
+ * @expected_size: expected size of the blob property
+ * @expected_elem_size: expected size of an element in the blob property
+ * @replaced: if the blob was in fact replaced
+ *
+ * Look up the new blob from id, take its reference, check expected sizes of
+ * the blob and its element and replace the old blob by the new one. Advertise
+ * if the replacement operation was successful.
+ *
+ * Return: true if the blob was in fact replaced. -EINVAL if the new blob was
+ * not found or sizes don't match.
+ */
+int drm_property_replace_blob_from_id(struct drm_device *dev,
+					 struct drm_property_blob **blob,
+					 uint64_t blob_id,
+					 ssize_t expected_size,
+					 ssize_t expected_elem_size,
+					 bool *replaced)
+{
+	struct drm_property_blob *new_blob = NULL;
+
+	if (blob_id != 0) {
+		new_blob = drm_property_lookup_blob(dev, blob_id);
+		if (new_blob == NULL) {
+			drm_dbg_atomic(dev,
+				       "cannot find blob ID %llu\n", blob_id);
+			return -EINVAL;
+		}
+
+		if (expected_size > 0 &&
+		    new_blob->length != expected_size) {
+			drm_dbg_atomic(dev,
+				       "[BLOB:%d] length %zu different from expected %zu\n",
+				       new_blob->base.id, new_blob->length, expected_size);
+			drm_property_blob_put(new_blob);
+			return -EINVAL;
+		}
+		if (expected_elem_size > 0 &&
+		    new_blob->length % expected_elem_size != 0) {
+			drm_dbg_atomic(dev,
+				       "[BLOB:%d] length %zu not divisible by element size %zu\n",
+				       new_blob->base.id, new_blob->length, expected_elem_size);
+			drm_property_blob_put(new_blob);
+			return -EINVAL;
+		}
+	}
+
+	*replaced |= drm_property_replace_blob(blob, new_blob);
+	drm_property_blob_put(new_blob);
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_property_replace_blob_from_id);
+
 int drm_mode_getblob_ioctl(struct drm_device *dev,
 			   void *data, struct drm_file *file_priv)
 {
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index cbb65b7ba425..a6c19de46292 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -1040,7 +1040,8 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
 	uint64_t *points;
 	uint32_t signaled_count, i;
 
-	if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)
+	if (flags & (DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT |
+		     DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE))
 		lockdep_assert_none_held_once();
 
 	points = kmalloc_array(count, sizeof(*points), GFP_KERNEL);
@@ -1109,7 +1110,8 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
 	 * fallthough and try a 0 timeout wait!
 	 */
 
-	if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
+	if (flags & (DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT |
+		     DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE)) {
 		for (i = 0; i < count; ++i)
 			drm_syncobj_fence_add_wait(syncobjs[i], &entries[i]);
 	}
@@ -1403,7 +1405,7 @@ static void syncobj_eventfd_entry_fence_func(struct dma_fence *fence,
 	struct syncobj_eventfd_entry *entry =
 		container_of(cb, struct syncobj_eventfd_entry, fence_cb);
 
-	eventfd_signal(entry->ev_fd_ctx, 1);
+	eventfd_signal(entry->ev_fd_ctx);
 	syncobj_eventfd_entry_free(entry);
 }
 
@@ -1416,23 +1418,34 @@ syncobj_eventfd_entry_func(struct drm_syncobj *syncobj,
 
 	/* This happens inside the syncobj lock */
 	fence = dma_fence_get(rcu_dereference_protected(syncobj->fence, 1));
+	if (!fence)
+		return;
+
 	ret = dma_fence_chain_find_seqno(&fence, entry->point);
-	if (ret != 0 || !fence) {
+	if (ret != 0) {
+		/* The given seqno has not been submitted yet. */
 		dma_fence_put(fence);
 		return;
+	} else if (!fence) {
+		/* If dma_fence_chain_find_seqno returns 0 but sets the fence
+		 * to NULL, it implies that the given seqno is signaled and a
+		 * later seqno has already been submitted. Assign a stub fence
+		 * so that the eventfd still gets signaled below.
+		 */
+		fence = dma_fence_get_stub();
 	}
 
 	list_del_init(&entry->node);
 	entry->fence = fence;
 
 	if (entry->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE) {
-		eventfd_signal(entry->ev_fd_ctx, 1);
+		eventfd_signal(entry->ev_fd_ctx);
 		syncobj_eventfd_entry_free(entry);
 	} else {
 		ret = dma_fence_add_callback(fence, &entry->fence_cb,
 					     syncobj_eventfd_entry_fence_func);
 		if (ret == -ENOENT) {
-			eventfd_signal(entry->ev_fd_ctx, 1);
+			eventfd_signal(entry->ev_fd_ctx);
 			syncobj_eventfd_entry_free(entry);
 		}
 	}
diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
index 4d986077738b..0ef7bc8848b0 100644
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -319,9 +319,9 @@ static void decon_win_set_bldmod(struct decon_context *ctx, unsigned int win,
 static void decon_win_set_pixfmt(struct decon_context *ctx, unsigned int win,
 				 struct drm_framebuffer *fb)
 {
-	struct exynos_drm_plane plane = ctx->planes[win];
+	struct exynos_drm_plane *plane = &ctx->planes[win];
 	struct exynos_drm_plane_state *state =
-		to_exynos_plane_state(plane.base.state);
+		to_exynos_plane_state(plane->base.state);
 	unsigned int alpha = state->base.alpha;
 	unsigned int pixel_alpha;
 	unsigned long val;
@@ -862,18 +862,16 @@ err_disable_pm_runtime:
 	return ret;
 }
 
-static int exynos5433_decon_remove(struct platform_device *pdev)
+static void exynos5433_decon_remove(struct platform_device *pdev)
 {
 	pm_runtime_disable(&pdev->dev);
 
 	component_del(&pdev->dev, &decon_component_ops);
-
-	return 0;
 }
 
 struct platform_driver exynos5433_decon_driver = {
 	.probe		= exynos5433_decon_probe,
-	.remove		= exynos5433_decon_remove,
+	.remove_new	= exynos5433_decon_remove,
 	.driver		= {
 		.name	= "exynos5433-decon",
 		.pm	= pm_ptr(&exynos5433_decon_pm_ops),
diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
index 0156a5e94435..0d185c0564b9 100644
--- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
@@ -765,7 +765,7 @@ err_iounmap:
 	return ret;
 }
 
-static int decon_remove(struct platform_device *pdev)
+static void decon_remove(struct platform_device *pdev)
 {
 	struct decon_context *ctx = dev_get_drvdata(&pdev->dev);
 
@@ -774,8 +774,6 @@ static int decon_remove(struct platform_device *pdev)
 	iounmap(ctx->regs);
 
 	component_del(&pdev->dev, &decon_component_ops);
-
-	return 0;
 }
 
 static int exynos7_decon_suspend(struct device *dev)
@@ -840,7 +838,7 @@ static DEFINE_RUNTIME_DEV_PM_OPS(exynos7_decon_pm_ops, exynos7_decon_suspend,
 
 struct platform_driver decon_driver = {
 	.probe		= decon_probe,
-	.remove		= decon_remove,
+	.remove_new	= decon_remove,
 	.driver		= {
 		.name	= "exynos-decon",
 		.pm	= pm_ptr(&exynos7_decon_pm_ops),
diff --git a/drivers/gpu/drm/exynos/exynos_dp.c b/drivers/gpu/drm/exynos/exynos_dp.c
index 3404ec1367fb..ca31bad6c576 100644
--- a/drivers/gpu/drm/exynos/exynos_dp.c
+++ b/drivers/gpu/drm/exynos/exynos_dp.c
@@ -250,14 +250,12 @@ out:
 	return component_add(&pdev->dev, &exynos_dp_ops);
 }
 
-static int exynos_dp_remove(struct platform_device *pdev)
+static void exynos_dp_remove(struct platform_device *pdev)
 {
 	struct exynos_dp_device *dp = platform_get_drvdata(pdev);
 
 	component_del(&pdev->dev, &exynos_dp_ops);
 	analogix_dp_remove(dp->adp);
-
-	return 0;
 }
 
 static int exynos_dp_suspend(struct device *dev)
@@ -285,7 +283,7 @@ MODULE_DEVICE_TABLE(of, exynos_dp_match);
 
 struct platform_driver dp_driver = {
 	.probe		= exynos_dp_probe,
-	.remove		= exynos_dp_remove,
+	.remove_new	= exynos_dp_remove,
 	.driver		= {
 		.name	= "exynos-dp",
 		.owner	= THIS_MODULE,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_dma.c b/drivers/gpu/drm/exynos/exynos_drm_dma.c
index a971590b8132..e2c7373f20c6 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dma.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dma.c
@@ -107,18 +107,16 @@ int exynos_drm_register_dma(struct drm_device *drm, struct device *dev,
 		return 0;
 
 	if (!priv->mapping) {
-		void *mapping;
+		void *mapping = NULL;
 
 		if (IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU))
 			mapping = arm_iommu_create_mapping(&platform_bus_type,
 				EXYNOS_DEV_ADDR_START, EXYNOS_DEV_ADDR_SIZE);
 		else if (IS_ENABLED(CONFIG_IOMMU_DMA))
 			mapping = iommu_get_domain_for_dev(priv->dma_dev);
-		else
-			mapping = ERR_PTR(-ENODEV);
 
-		if (IS_ERR(mapping))
-			return PTR_ERR(mapping);
+		if (!mapping)
+			return -ENODEV;
 		priv->mapping = mapping;
 	}
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_dpi.c b/drivers/gpu/drm/exynos/exynos_drm_dpi.c
index 378e5381978f..0dc36df6ada3 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dpi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dpi.c
@@ -101,7 +101,7 @@ static int exynos_dpi_create_connector(struct drm_encoder *encoder)
 
 	ret = drm_connector_init(encoder->dev, connector,
 				 &exynos_dpi_connector_funcs,
-				 DRM_MODE_CONNECTOR_VGA);
+				 DRM_MODE_CONNECTOR_DPI);
 	if (ret) {
 		DRM_DEV_ERROR(ctx->dev,
 			      "failed to initialize connector with drm\n");
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index 8399256cb5c9..7c59e1164a48 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -300,6 +300,7 @@ err_mode_config_cleanup:
 	drm_mode_config_cleanup(drm);
 	exynos_drm_cleanup_dma(drm);
 	kfree(private);
+	dev_set_drvdata(dev, NULL);
 err_free_drm:
 	drm_dev_put(drm);
 
@@ -313,6 +314,7 @@ static void exynos_drm_unbind(struct device *dev)
 	drm_dev_unregister(drm);
 
 	drm_kms_helper_poll_fini(drm);
+	drm_atomic_helper_shutdown(drm);
 
 	component_unbind_all(drm->dev, drm);
 	drm_mode_config_cleanup(drm);
@@ -344,15 +346,23 @@ static int exynos_drm_platform_probe(struct platform_device *pdev)
 					       match);
 }
 
-static int exynos_drm_platform_remove(struct platform_device *pdev)
+static void exynos_drm_platform_remove(struct platform_device *pdev)
 {
 	component_master_del(&pdev->dev, &exynos_drm_ops);
-	return 0;
+}
+
+static void exynos_drm_platform_shutdown(struct platform_device *pdev)
+{
+	struct drm_device *drm = platform_get_drvdata(pdev);
+
+	if (drm)
+		drm_atomic_helper_shutdown(drm);
 }
 
 static struct platform_driver exynos_drm_platform_driver = {
 	.probe	= exynos_drm_platform_probe,
-	.remove	= exynos_drm_platform_remove,
+	.remove_new	= exynos_drm_platform_remove,
+	.shutdown = exynos_drm_platform_shutdown,
 	.driver	= {
 		.name	= "exynos-drm",
 		.pm	= &exynos_drm_pm_ops,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
index 8de2714599fc..e81a576de398 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
@@ -1367,7 +1367,7 @@ err_pm_dis:
 	return ret;
 }
 
-static int fimc_remove(struct platform_device *pdev)
+static void fimc_remove(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct fimc_context *ctx = get_fimc_context(dev);
@@ -1377,8 +1377,6 @@ static int fimc_remove(struct platform_device *pdev)
 	pm_runtime_disable(dev);
 
 	fimc_put_clocks(ctx);
-
-	return 0;
 }
 
 static int fimc_runtime_suspend(struct device *dev)
@@ -1410,7 +1408,7 @@ MODULE_DEVICE_TABLE(of, fimc_of_match);
 
 struct platform_driver fimc_driver = {
 	.probe		= fimc_probe,
-	.remove		= fimc_remove,
+	.remove_new	= fimc_remove,
 	.driver		= {
 		.of_match_table = fimc_of_match,
 		.name	= "exynos-drm-fimc",
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index 8dde7b1e9b35..f2145227a1e0 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -480,7 +480,7 @@ static void fimd_commit(struct exynos_drm_crtc *crtc)
 	struct fimd_context *ctx = crtc->ctx;
 	struct drm_display_mode *mode = &crtc->base.state->adjusted_mode;
 	const struct fimd_driver_data *driver_data = ctx->driver_data;
-	void *timing_base = ctx->regs + driver_data->timing_base;
+	void __iomem *timing_base = ctx->regs + driver_data->timing_base;
 	u32 val;
 
 	if (ctx->suspended)
@@ -661,9 +661,9 @@ static void fimd_win_set_bldmod(struct fimd_context *ctx, unsigned int win,
 static void fimd_win_set_pixfmt(struct fimd_context *ctx, unsigned int win,
 				struct drm_framebuffer *fb, int width)
 {
-	struct exynos_drm_plane plane = ctx->planes[win];
+	struct exynos_drm_plane *plane = &ctx->planes[win];
 	struct exynos_drm_plane_state *state =
-		to_exynos_plane_state(plane.base.state);
+		to_exynos_plane_state(plane->base.state);
 	uint32_t pixel_format = fb->format->format;
 	unsigned int alpha = state->base.alpha;
 	u32 val = WINCONx_ENWIN;
@@ -1277,13 +1277,11 @@ err_disable_pm_runtime:
 	return ret;
 }
 
-static int fimd_remove(struct platform_device *pdev)
+static void fimd_remove(struct platform_device *pdev)
 {
 	pm_runtime_disable(&pdev->dev);
 
 	component_del(&pdev->dev, &fimd_component_ops);
-
-	return 0;
 }
 
 static int exynos_fimd_suspend(struct device *dev)
@@ -1325,7 +1323,7 @@ static DEFINE_RUNTIME_DEV_PM_OPS(exynos_fimd_pm_ops, exynos_fimd_suspend,
 
 struct platform_driver fimd_driver = {
 	.probe		= fimd_probe,
-	.remove		= fimd_remove,
+	.remove_new	= fimd_remove,
 	.driver		= {
 		.name	= "exynos4-fb",
 		.owner	= THIS_MODULE,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
index 414e585ec7dd..f3138423612e 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
@@ -1530,7 +1530,7 @@ err_destroy_slab:
 	return ret;
 }
 
-static int g2d_remove(struct platform_device *pdev)
+static void g2d_remove(struct platform_device *pdev)
 {
 	struct g2d_data *g2d = platform_get_drvdata(pdev);
 
@@ -1545,8 +1545,6 @@ static int g2d_remove(struct platform_device *pdev)
 	g2d_fini_cmdlist(g2d);
 	destroy_workqueue(g2d->g2d_workq);
 	kmem_cache_destroy(g2d->runqueue_slab);
-
-	return 0;
 }
 
 static int g2d_suspend(struct device *dev)
@@ -1609,7 +1607,7 @@ MODULE_DEVICE_TABLE(of, exynos_g2d_match);
 
 struct platform_driver g2d_driver = {
 	.probe		= g2d_probe,
-	.remove		= g2d_remove,
+	.remove_new	= g2d_remove,
 	.driver		= {
 		.name	= "exynos-drm-g2d",
 		.owner	= THIS_MODULE,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
index 35771fb4e85d..180507a47700 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
@@ -1309,15 +1309,13 @@ err_pm_dis:
 	return ret;
 }
 
-static int gsc_remove(struct platform_device *pdev)
+static void gsc_remove(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 
 	component_del(dev, &gsc_component_ops);
 	pm_runtime_dont_use_autosuspend(dev);
 	pm_runtime_disable(dev);
-
-	return 0;
 }
 
 static int __maybe_unused gsc_runtime_suspend(struct device *dev)
@@ -1343,7 +1341,7 @@ static int __maybe_unused gsc_runtime_resume(struct device *dev)
 	for (i = 0; i < ctx->num_clocks; i++) {
 		ret = clk_prepare_enable(ctx->clocks[i]);
 		if (ret) {
-			while (--i > 0)
+			while (--i >= 0)
 				clk_disable_unprepare(ctx->clocks[i]);
 			return ret;
 		}
@@ -1422,7 +1420,7 @@ MODULE_DEVICE_TABLE(of, exynos_drm_gsc_of_match);
 
 struct platform_driver gsc_driver = {
 	.probe		= gsc_probe,
-	.remove		= gsc_remove,
+	.remove_new	= gsc_remove,
 	.driver		= {
 		.name	= "exynos-drm-gsc",
 		.owner	= THIS_MODULE,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_mic.c b/drivers/gpu/drm/exynos/exynos_drm_mic.c
index 17bab5b1663f..e2920960180f 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_mic.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_mic.c
@@ -442,7 +442,7 @@ err:
 	return ret;
 }
 
-static int exynos_mic_remove(struct platform_device *pdev)
+static void exynos_mic_remove(struct platform_device *pdev)
 {
 	struct exynos_mic *mic = platform_get_drvdata(pdev);
 
@@ -450,8 +450,6 @@ static int exynos_mic_remove(struct platform_device *pdev)
 	pm_runtime_disable(&pdev->dev);
 
 	drm_bridge_remove(&mic->bridge);
-
-	return 0;
 }
 
 static const struct of_device_id exynos_mic_of_match[] = {
@@ -462,7 +460,7 @@ MODULE_DEVICE_TABLE(of, exynos_mic_of_match);
 
 struct platform_driver mic_driver = {
 	.probe		= exynos_mic_probe,
-	.remove		= exynos_mic_remove,
+	.remove_new	= exynos_mic_remove,
 	.driver		= {
 		.name	= "exynos-mic",
 		.pm	= pm_ptr(&exynos_mic_pm_ops),
diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
index ffb327c5139e..5f7516655b08 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_rotator.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
@@ -329,15 +329,13 @@ err_component:
 	return ret;
 }
 
-static int rotator_remove(struct platform_device *pdev)
+static void rotator_remove(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 
 	component_del(dev, &rotator_component_ops);
 	pm_runtime_dont_use_autosuspend(dev);
 	pm_runtime_disable(dev);
-
-	return 0;
 }
 
 static int rotator_runtime_suspend(struct device *dev)
@@ -453,7 +451,7 @@ static DEFINE_RUNTIME_DEV_PM_OPS(rotator_pm_ops, rotator_runtime_suspend,
 
 struct platform_driver rotator_driver = {
 	.probe		= rotator_probe,
-	.remove		= rotator_remove,
+	.remove_new	= rotator_remove,
 	.driver		= {
 		.name	= "exynos-rotator",
 		.owner	= THIS_MODULE,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_scaler.c b/drivers/gpu/drm/exynos/exynos_drm_scaler.c
index f2b8b09a6b4e..392f721f13ab 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_scaler.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_scaler.c
@@ -539,15 +539,13 @@ err_ippdrv_register:
 	return ret;
 }
 
-static int scaler_remove(struct platform_device *pdev)
+static void scaler_remove(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 
 	component_del(dev, &scaler_component_ops);
 	pm_runtime_dont_use_autosuspend(dev);
 	pm_runtime_disable(dev);
-
-	return 0;
 }
 
 static int clk_disable_unprepare_wrapper(struct clk *clk)
@@ -721,7 +719,7 @@ MODULE_DEVICE_TABLE(of, exynos_scaler_match);
 
 struct platform_driver scaler_driver = {
 	.probe		= scaler_probe,
-	.remove		= scaler_remove,
+	.remove_new	= scaler_remove,
 	.driver		= {
 		.name	= "exynos-scaler",
 		.owner	= THIS_MODULE,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
index f5e1adfcaa51..00382f28748a 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
@@ -462,7 +462,7 @@ static int vidi_probe(struct platform_device *pdev)
 	return component_add(dev, &vidi_component_ops);
 }
 
-static int vidi_remove(struct platform_device *pdev)
+static void vidi_remove(struct platform_device *pdev)
 {
 	struct vidi_context *ctx = platform_get_drvdata(pdev);
 
@@ -472,13 +472,11 @@ static int vidi_remove(struct platform_device *pdev)
 	}
 
 	component_del(&pdev->dev, &vidi_component_ops);
-
-	return 0;
 }
 
 struct platform_driver vidi_driver = {
 	.probe		= vidi_probe,
-	.remove		= vidi_remove,
+	.remove_new	= vidi_remove,
 	.driver		= {
 		.name	= "exynos-drm-vidi",
 		.owner	= THIS_MODULE,
diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c
index f3aaa4ea3e68..43bed6cbaaea 100644
--- a/drivers/gpu/drm/exynos/exynos_hdmi.c
+++ b/drivers/gpu/drm/exynos/exynos_hdmi.c
@@ -1861,6 +1861,8 @@ static int hdmi_bind(struct device *dev, struct device *master, void *data)
 		return ret;
 
 	crtc = exynos_drm_crtc_get_by_type(drm_dev, EXYNOS_DISPLAY_TYPE_HDMI);
+	if (IS_ERR(crtc))
+		return PTR_ERR(crtc);
 	crtc->pipe_clk = &hdata->phy_clk;
 
 	ret = hdmi_create_connector(encoder);
@@ -2067,7 +2069,7 @@ err_ddc:
 	return ret;
 }
 
-static int hdmi_remove(struct platform_device *pdev)
+static void hdmi_remove(struct platform_device *pdev)
 {
 	struct hdmi_context *hdata = platform_get_drvdata(pdev);
 
@@ -2090,8 +2092,6 @@ static int hdmi_remove(struct platform_device *pdev)
 	put_device(&hdata->ddc_adpt->dev);
 
 	mutex_destroy(&hdata->mutex);
-
-	return 0;
 }
 
 static int __maybe_unused exynos_hdmi_suspend(struct device *dev)
@@ -2123,7 +2123,7 @@ static const struct dev_pm_ops exynos_hdmi_pm_ops = {
 
 struct platform_driver hdmi_driver = {
 	.probe		= hdmi_probe,
-	.remove		= hdmi_remove,
+	.remove_new	= hdmi_remove,
 	.driver		= {
 		.name	= "exynos-hdmi",
 		.owner	= THIS_MODULE,
diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c
index b302392ff0d7..6822333fd0e6 100644
--- a/drivers/gpu/drm/exynos/exynos_mixer.c
+++ b/drivers/gpu/drm/exynos/exynos_mixer.c
@@ -1258,13 +1258,11 @@ static int mixer_probe(struct platform_device *pdev)
 	return ret;
 }
 
-static int mixer_remove(struct platform_device *pdev)
+static void mixer_remove(struct platform_device *pdev)
 {
 	pm_runtime_disable(&pdev->dev);
 
 	component_del(&pdev->dev, &mixer_component_ops);
-
-	return 0;
 }
 
 static int __maybe_unused exynos_mixer_suspend(struct device *dev)
@@ -1338,5 +1336,5 @@ struct platform_driver mixer_driver = {
 		.of_match_table = mixer_match_types,
 	},
 	.probe = mixer_probe,
-	.remove = mixer_remove,
+	.remove_new = mixer_remove,
 };
diff --git a/drivers/gpu/drm/gma500/cdv_intel_dp.c b/drivers/gpu/drm/gma500/cdv_intel_dp.c
index 8992a95076f2..dd1eb7e9877d 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_dp.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_dp.c
@@ -855,7 +855,6 @@ cdv_intel_dp_i2c_init(struct gma_connector *connector,
 
 	memset(&intel_dp->adapter, '\0', sizeof (intel_dp->adapter));
 	intel_dp->adapter.owner = THIS_MODULE;
-	intel_dp->adapter.class = I2C_CLASS_DDC;
 	strncpy (intel_dp->adapter.name, name, sizeof(intel_dp->adapter.name) - 1);
 	intel_dp->adapter.name[sizeof(intel_dp->adapter.name) - 1] = '\0';
 	intel_dp->adapter.algo_data = &intel_dp->algo;
diff --git a/drivers/gpu/drm/gma500/intel_gmbus.c b/drivers/gpu/drm/gma500/intel_gmbus.c
index 09cedabf4776..aa45509859f2 100644
--- a/drivers/gpu/drm/gma500/intel_gmbus.c
+++ b/drivers/gpu/drm/gma500/intel_gmbus.c
@@ -411,7 +411,6 @@ int gma_intel_setup_gmbus(struct drm_device *dev)
 		struct intel_gmbus *bus = &dev_priv->gmbus[i];
 
 		bus->adapter.owner = THIS_MODULE;
-		bus->adapter.class = I2C_CLASS_DDC;
 		snprintf(bus->adapter.name,
 			 sizeof(bus->adapter.name),
 			 "gma500 gmbus %s",
diff --git a/drivers/gpu/drm/gma500/oaktrail_hdmi_i2c.c b/drivers/gpu/drm/gma500/oaktrail_hdmi_i2c.c
index fc9a34ed58bd..6daa6669ed23 100644
--- a/drivers/gpu/drm/gma500/oaktrail_hdmi_i2c.c
+++ b/drivers/gpu/drm/gma500/oaktrail_hdmi_i2c.c
@@ -168,7 +168,6 @@ static struct i2c_adapter oaktrail_hdmi_i2c_adapter = {
 	.name		= "oaktrail_hdmi_i2c",
 	.nr		= 3,
 	.owner		= THIS_MODULE,
-	.class		= I2C_CLASS_DDC,
 	.algo		= &oaktrail_hdmi_i2c_algorithm,
 };
 
diff --git a/drivers/gpu/drm/gma500/psb_intel_sdvo.c b/drivers/gpu/drm/gma500/psb_intel_sdvo.c
index d6fd5d726216..e4f914deceba 100644
--- a/drivers/gpu/drm/gma500/psb_intel_sdvo.c
+++ b/drivers/gpu/drm/gma500/psb_intel_sdvo.c
@@ -2426,7 +2426,6 @@ psb_intel_sdvo_init_ddc_proxy(struct psb_intel_sdvo *sdvo,
 			  struct drm_device *dev)
 {
 	sdvo->ddc.owner = THIS_MODULE;
-	sdvo->ddc.class = I2C_CLASS_DDC;
 	snprintf(sdvo->ddc.name, I2C_NAME_SIZE, "SDVO DDC proxy");
 	sdvo->ddc.dev.parent = dev->dev;
 	sdvo->ddc.algo_data = sdvo;
diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c
index 410bd019bb35..e6e48651c15c 100644
--- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c
+++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c
@@ -81,7 +81,6 @@ int hibmc_ddc_create(struct drm_device *drm_dev,
 		     struct hibmc_connector *connector)
 {
 	connector->adapter.owner = THIS_MODULE;
-	connector->adapter.class = I2C_CLASS_DDC;
 	snprintf(connector->adapter.name, I2C_NAME_SIZE, "HIS i2c bit bus");
 	connector->adapter.dev.parent = drm_dev->dev;
 	i2c_set_adapdata(&connector->adapter, connector);
diff --git a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c
index d511d17c5bdf..cff85086f2d6 100644
--- a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c
+++ b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c
@@ -7,7 +7,6 @@
 #include <linux/hyperv.h>
 #include <linux/module.h>
 #include <linux/pci.h>
-#include <linux/screen_info.h>
 
 #include <drm/drm_aperture.h>
 #include <drm/drm_atomic_helper.h>
@@ -73,11 +72,6 @@ static int hyperv_setup_vram(struct hyperv_drm_device *hv,
 	struct drm_device *dev = &hv->dev;
 	int ret;
 
-	if (IS_ENABLED(CONFIG_SYSFB))
-		drm_aperture_remove_conflicting_framebuffers(screen_info.lfb_base,
-							     screen_info.lfb_size,
-							     &hyperv_driver);
-
 	hv->fb_size = (unsigned long)hv->mmio_megabytes * 1024 * 1024;
 
 	ret = vmbus_allocate_mmio(&hv->mem, hdev, 0, -1, hv->fb_size, 0x100000,
@@ -130,6 +124,8 @@ static int hyperv_vmbus_probe(struct hv_device *hdev,
 		goto err_hv_set_drv_data;
 	}
 
+	drm_aperture_remove_framebuffers(&hyperv_driver);
+
 	ret = hyperv_setup_vram(hv, hdev);
 	if (ret)
 		goto err_vmbus_close;
diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index b5d6e3352071..3089029abba4 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -140,7 +140,7 @@ config DRM_I915_GVT_KVMGT
 
 	  Note that this driver only supports newer device from Broadwell on.
 	  For further information and setup guide, you can visit:
-	  http://01.org/igvt-g.
+	  https://github.com/intel/gvt-linux/wiki.
 
 	  If in doubt, say "N".
 
diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index 2d21930d5501..5b7162076850 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -24,7 +24,9 @@ config DRM_I915_DEBUG
 	select DEBUG_FS
 	select PREEMPT_COUNT
 	select I2C_CHARDEV
+	select REF_TRACKER
 	select STACKDEPOT
+	select STACKTRACE
 	select DRM_DP_AUX_CHARDEV
 	select X86_MSR # used by igt/pm_rpm
 	select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks)
@@ -38,6 +40,7 @@ config DRM_I915_DEBUG
 	select DRM_I915_DEBUG_GEM_ONCE
 	select DRM_I915_DEBUG_MMIO
 	select DRM_I915_DEBUG_RUNTIME_PM
+	select DRM_I915_DEBUG_WAKEREF
 	select DRM_I915_SW_FENCE_DEBUG_OBJECTS
 	select DRM_I915_SELFTEST
 	default n
@@ -231,7 +234,9 @@ config DRM_I915_DEBUG_RUNTIME_PM
 	bool "Enable extra state checking for runtime PM"
 	depends on DRM_I915
 	default n
+	select REF_TRACKER
 	select STACKDEPOT
+	select STACKTRACE
 	help
 	  Choose this option to turn on extra state checking for the
 	  runtime PM functionality. This may introduce overhead during
@@ -240,3 +245,16 @@ config DRM_I915_DEBUG_RUNTIME_PM
 	  Recommended for driver developers only.
 
 	  If in doubt, say "N"
+
+config DRM_I915_DEBUG_WAKEREF
+	bool "Enable extra tracking for wakerefs"
+	depends on DRM_I915
+	select REF_TRACKER
+	select STACKDEPOT
+	select STACKTRACE
+	help
+	  Choose this option to turn on extra state checking and usage
+	  tracking for the wakerefPM functionality. This may introduce
+	  overhead during driver runtime.
+
+	  If in doubt, say "N"
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 65e984242089..c13f14edb508 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -17,7 +17,6 @@ subdir-ccflags-y += $(call cc-option, -Wunused-const-variable)
 subdir-ccflags-y += $(call cc-option, -Wpacked-not-aligned)
 subdir-ccflags-y += $(call cc-option, -Wformat-overflow)
 subdir-ccflags-y += $(call cc-option, -Wformat-truncation)
-subdir-ccflags-y += $(call cc-option, -Wstringop-overflow)
 subdir-ccflags-y += $(call cc-option, -Wstringop-truncation)
 # The following turn off the warnings enabled by -Wextra
 ifeq ($(findstring 2, $(KBUILD_EXTRA_WARN)),)
@@ -280,6 +279,7 @@ i915-y += \
 	display/intel_dsb.o \
 	display/intel_dsb_buffer.o \
 	display/intel_fb.o \
+	display/intel_fb_bo.o \
 	display/intel_fb_pin.o \
 	display/intel_fbc.o \
 	display/intel_fdi.o \
@@ -318,7 +318,8 @@ i915-$(CONFIG_ACPI) += \
 	display/intel_acpi.o \
 	display/intel_opregion.o
 i915-$(CONFIG_DRM_FBDEV_EMULATION) += \
-	display/intel_fbdev.o
+	display/intel_fbdev.o \
+	display/intel_fbdev_fb.o
 i915-$(CONFIG_DEBUG_FS) += \
 	display/intel_display_debugfs.o \
 	display/intel_display_debugfs_params.o \
diff --git a/drivers/gpu/drm/i915/display/i9xx_wm.c b/drivers/gpu/drm/i915/display/i9xx_wm.c
index b37c0d02d500..11ca9572e8b3 100644
--- a/drivers/gpu/drm/i915/display/i9xx_wm.c
+++ b/drivers/gpu/drm/i915/display/i9xx_wm.c
@@ -608,7 +608,7 @@ static bool intel_crtc_active(struct intel_crtc *crtc)
 	 * crtc->state->active once we have proper CRTC states wired up
 	 * for atomic.
 	 */
-	return crtc && crtc->active && crtc->base.primary->state->fb &&
+	return crtc->active && crtc->base.primary->state->fb &&
 		crtc->config->hw.adjusted_mode.crtc_clock;
 }
 
@@ -2477,7 +2477,7 @@ static unsigned int ilk_plane_wm_max(const struct drm_i915_private *dev_priv,
 		 * FIFO size is only half of the self
 		 * refresh FIFO size on ILK/SNB.
 		 */
-		if (DISPLAY_VER(dev_priv) <= 6)
+		if (DISPLAY_VER(dev_priv) < 7)
 			fifo_size /= 2;
 	}
 
@@ -2818,7 +2818,7 @@ static int ilk_compute_pipe_wm(struct intel_atomic_state *state,
 	usable_level = dev_priv->display.wm.num_levels - 1;
 
 	/* ILK/SNB: LP2+ watermarks only w/o sprites */
-	if (DISPLAY_VER(dev_priv) <= 6 && pipe_wm->sprites_enabled)
+	if (DISPLAY_VER(dev_priv) < 7 && pipe_wm->sprites_enabled)
 		usable_level = 1;
 
 	/* ILK/SNB/IVB: LP1+ watermarks only w/o scaling */
@@ -2961,7 +2961,7 @@ static void ilk_wm_merge(struct drm_i915_private *dev_priv,
 	int last_enabled_level = num_levels - 1;
 
 	/* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */
-	if ((DISPLAY_VER(dev_priv) <= 6 || IS_IVYBRIDGE(dev_priv)) &&
+	if ((DISPLAY_VER(dev_priv) < 7 || IS_IVYBRIDGE(dev_priv)) &&
 	    config->num_pipes_active > 1)
 		last_enabled_level = 0;
 
@@ -3060,7 +3060,7 @@ static void ilk_compute_wm_results(struct drm_i915_private *dev_priv,
 		 * Always set WM_LP_SPRITE_EN when spr_val != 0, even if the
 		 * level is disabled. Doing otherwise could cause underruns.
 		 */
-		if (DISPLAY_VER(dev_priv) <= 6 && r->spr_val) {
+		if (DISPLAY_VER(dev_priv) < 7 && r->spr_val) {
 			drm_WARN_ON(&dev_priv->drm, wm_lp != 1);
 			results->wm_lp_spr[wm_lp - 1] |= WM_LP_SPRITE_ENABLE;
 		}
diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c
index 481fcb650850..eda4a8b88590 100644
--- a/drivers/gpu/drm/i915/display/icl_dsi.c
+++ b/drivers/gpu/drm/i915/display/icl_dsi.c
@@ -1155,6 +1155,7 @@ static void gen11_dsi_powerup_panel(struct intel_encoder *encoder)
 	}
 
 	intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_INIT_OTP);
+	intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON);
 
 	/* ensure all panel commands dispatched before enabling transcoder */
 	wait_for_cmds_dispatched_to_panel(encoder);
@@ -1255,8 +1256,6 @@ static void gen11_dsi_enable(struct intel_atomic_state *state,
 	/* step6d: enable dsi transcoder */
 	gen11_dsi_enable_transcoder(encoder);
 
-	intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON);
-
 	/* step7: enable backlight */
 	intel_backlight_enable(crtc_state, conn_state);
 	intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_ON);
@@ -1440,6 +1439,13 @@ static void gen11_dsi_post_disable(struct intel_atomic_state *state,
 static enum drm_mode_status gen11_dsi_mode_valid(struct drm_connector *connector,
 						 struct drm_display_mode *mode)
 {
+	struct drm_i915_private *i915 = to_i915(connector->dev);
+	enum drm_mode_status status;
+
+	status = intel_cpu_transcoder_mode_valid(i915, mode);
+	if (status != MODE_OK)
+		return status;
+
 	/* FIXME: DSC? */
 	return intel_dsi_mode_valid(connector, mode);
 }
diff --git a/drivers/gpu/drm/i915/display/intel_backlight.c b/drivers/gpu/drm/i915/display/intel_backlight.c
index 612d4cd9dacb..3f3cd944a1c5 100644
--- a/drivers/gpu/drm/i915/display/intel_backlight.c
+++ b/drivers/gpu/drm/i915/display/intel_backlight.c
@@ -275,7 +275,7 @@ static void ext_pwm_set_backlight(const struct drm_connector_state *conn_state,
 	struct intel_panel *panel = &to_intel_connector(conn_state->connector)->panel;
 
 	pwm_set_relative_duty_cycle(&panel->backlight.pwm_state, level, 100);
-	pwm_apply_state(panel->backlight.pwm, &panel->backlight.pwm_state);
+	pwm_apply_might_sleep(panel->backlight.pwm, &panel->backlight.pwm_state);
 }
 
 static void
@@ -428,7 +428,7 @@ static void ext_pwm_disable_backlight(const struct drm_connector_state *old_conn
 	intel_backlight_set_pwm_level(old_conn_state, level);
 
 	panel->backlight.pwm_state.enabled = false;
-	pwm_apply_state(panel->backlight.pwm, &panel->backlight.pwm_state);
+	pwm_apply_might_sleep(panel->backlight.pwm, &panel->backlight.pwm_state);
 }
 
 void intel_backlight_disable(const struct drm_connector_state *old_conn_state)
@@ -750,7 +750,7 @@ static void ext_pwm_enable_backlight(const struct intel_crtc_state *crtc_state,
 
 	pwm_set_relative_duty_cycle(&panel->backlight.pwm_state, level, 100);
 	panel->backlight.pwm_state.enabled = true;
-	pwm_apply_state(panel->backlight.pwm, &panel->backlight.pwm_state);
+	pwm_apply_might_sleep(panel->backlight.pwm, &panel->backlight.pwm_state);
 }
 
 static void __intel_backlight_enable(const struct intel_crtc_state *crtc_state,
diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c
index 2fd72b2fd109..aa169b0055e9 100644
--- a/drivers/gpu/drm/i915/display/intel_bios.c
+++ b/drivers/gpu/drm/i915/display/intel_bios.c
@@ -2201,6 +2201,9 @@ static u8 map_ddc_pin(struct drm_i915_private *i915, u8 vbt_pin)
 	const u8 *ddc_pin_map;
 	int i, n_entries;
 
+	if (IS_DGFX(i915))
+		return vbt_pin;
+
 	if (INTEL_PCH_TYPE(i915) >= PCH_LNL || HAS_PCH_MTP(i915) ||
 	    IS_ALDERLAKE_P(i915)) {
 		ddc_pin_map = adlp_ddc_pin_map;
@@ -2208,8 +2211,6 @@ static u8 map_ddc_pin(struct drm_i915_private *i915, u8 vbt_pin)
 	} else if (IS_ALDERLAKE_S(i915)) {
 		ddc_pin_map = adls_ddc_pin_map;
 		n_entries = ARRAY_SIZE(adls_ddc_pin_map);
-	} else if (INTEL_PCH_TYPE(i915) >= PCH_DG1) {
-		return vbt_pin;
 	} else if (IS_ROCKETLAKE(i915) && INTEL_PCH_TYPE(i915) == PCH_TGP) {
 		ddc_pin_map = rkl_pch_tgp_ddc_pin_map;
 		n_entries = ARRAY_SIZE(rkl_pch_tgp_ddc_pin_map);
@@ -3474,8 +3475,7 @@ bool intel_bios_get_dsc_params(struct intel_encoder *encoder,
 			if (!devdata->dsc)
 				return false;
 
-			if (crtc_state)
-				fill_dsc(crtc_state, devdata->dsc, dsc_max_bpc);
+			fill_dsc(crtc_state, devdata->dsc, dsc_max_bpc);
 
 			return true;
 		}
diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c
index bef96db62c80..7f2a50b4f494 100644
--- a/drivers/gpu/drm/i915/display/intel_bw.c
+++ b/drivers/gpu/drm/i915/display/intel_bw.c
@@ -87,7 +87,8 @@ static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv,
 		return ret;
 
 	dclk = val & 0xffff;
-	sp->dclk = DIV_ROUND_UP((16667 * dclk) + (DISPLAY_VER(dev_priv) > 11 ? 500 : 0), 1000);
+	sp->dclk = DIV_ROUND_UP((16667 * dclk) + (DISPLAY_VER(dev_priv) >= 12 ? 500 : 0),
+				1000);
 	sp->t_rp = (val & 0xff0000) >> 16;
 	sp->t_rcd = (val & 0xff000000) >> 24;
 
@@ -480,7 +481,7 @@ static int tgl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel
 	if (num_channels < qi.max_numchannels && DISPLAY_VER(dev_priv) >= 12)
 		qi.deinterleave = max(DIV_ROUND_UP(qi.deinterleave, 2), 1);
 
-	if (DISPLAY_VER(dev_priv) > 11 && num_channels > qi.max_numchannels)
+	if (DISPLAY_VER(dev_priv) >= 12 && num_channels > qi.max_numchannels)
 		drm_warn(&dev_priv->drm, "Number of channels exceeds max number of channels.");
 	if (qi.max_numchannels != 0)
 		num_channels = min_t(u8, num_channels, qi.max_numchannels);
@@ -897,7 +898,7 @@ static int icl_find_qgv_points(struct drm_i915_private *i915,
 		unsigned int idx;
 		unsigned int max_data_rate;
 
-		if (DISPLAY_VER(i915) > 11)
+		if (DISPLAY_VER(i915) >= 12)
 			idx = tgl_max_bw_index(i915, num_active_planes, i);
 		else
 			idx = icl_max_bw_index(i915, num_active_planes, i);
diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c
index b93d1ad7936d..c985ebb6831a 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -1180,7 +1180,7 @@ sanitize:
 	/* force cdclk programming */
 	dev_priv->display.cdclk.hw.cdclk = 0;
 	/* force full PLL disable + enable */
-	dev_priv->display.cdclk.hw.vco = -1;
+	dev_priv->display.cdclk.hw.vco = ~0;
 }
 
 static void skl_cdclk_init_hw(struct drm_i915_private *dev_priv)
@@ -1446,50 +1446,77 @@ static u8 bxt_calc_voltage_level(int cdclk)
 	return DIV_ROUND_UP(cdclk, 25000);
 }
 
+static u8 calc_voltage_level(int cdclk, int num_voltage_levels,
+			     const int voltage_level_max_cdclk[])
+{
+	int voltage_level;
+
+	for (voltage_level = 0; voltage_level < num_voltage_levels; voltage_level++) {
+		if (cdclk <= voltage_level_max_cdclk[voltage_level])
+			return voltage_level;
+	}
+
+	MISSING_CASE(cdclk);
+	return num_voltage_levels - 1;
+}
+
 static u8 icl_calc_voltage_level(int cdclk)
 {
-	if (cdclk > 556800)
-		return 2;
-	else if (cdclk > 312000)
-		return 1;
-	else
-		return 0;
+	static const int icl_voltage_level_max_cdclk[] = {
+		[0] = 312000,
+		[1] = 556800,
+		[2] = 652800,
+	};
+
+	return calc_voltage_level(cdclk,
+				  ARRAY_SIZE(icl_voltage_level_max_cdclk),
+				  icl_voltage_level_max_cdclk);
 }
 
 static u8 ehl_calc_voltage_level(int cdclk)
 {
-	if (cdclk > 326400)
-		return 3;
-	else if (cdclk > 312000)
-		return 2;
-	else if (cdclk > 180000)
-		return 1;
-	else
-		return 0;
+	static const int ehl_voltage_level_max_cdclk[] = {
+		[0] = 180000,
+		[1] = 312000,
+		[2] = 326400,
+		/*
+		 * Bspec lists the limit as 556.8 MHz, but some JSL
+		 * development boards (at least) boot with 652.8 MHz
+		 */
+		[3] = 652800,
+	};
+
+	return calc_voltage_level(cdclk,
+				  ARRAY_SIZE(ehl_voltage_level_max_cdclk),
+				  ehl_voltage_level_max_cdclk);
 }
 
 static u8 tgl_calc_voltage_level(int cdclk)
 {
-	if (cdclk > 556800)
-		return 3;
-	else if (cdclk > 326400)
-		return 2;
-	else if (cdclk > 312000)
-		return 1;
-	else
-		return 0;
+	static const int tgl_voltage_level_max_cdclk[] = {
+		[0] = 312000,
+		[1] = 326400,
+		[2] = 556800,
+		[3] = 652800,
+	};
+
+	return calc_voltage_level(cdclk,
+				  ARRAY_SIZE(tgl_voltage_level_max_cdclk),
+				  tgl_voltage_level_max_cdclk);
 }
 
 static u8 rplu_calc_voltage_level(int cdclk)
 {
-	if (cdclk > 556800)
-		return 3;
-	else if (cdclk > 480000)
-		return 2;
-	else if (cdclk > 312000)
-		return 1;
-	else
-		return 0;
+	static const int rplu_voltage_level_max_cdclk[] = {
+		[0] = 312000,
+		[1] = 480000,
+		[2] = 556800,
+		[3] = 652800,
+	};
+
+	return calc_voltage_level(cdclk,
+				  ARRAY_SIZE(rplu_voltage_level_max_cdclk),
+				  rplu_voltage_level_max_cdclk);
 }
 
 static void icl_readout_refclk(struct drm_i915_private *dev_priv,
@@ -1800,6 +1827,8 @@ static bool cdclk_pll_is_unknown(unsigned int vco)
 	return vco == ~0;
 }
 
+static const int cdclk_squash_len = 16;
+
 static int cdclk_squash_divider(u16 waveform)
 {
 	return hweight16(waveform ?: 0xffff);
@@ -1811,7 +1840,6 @@ static bool cdclk_compute_crawl_and_squash_midpoint(struct drm_i915_private *i91
 						    struct intel_cdclk_config *mid_cdclk_config)
 {
 	u16 old_waveform, new_waveform, mid_waveform;
-	int size = 16;
 	int div = 2;
 
 	/* Return if PLL is in an unknown state, force a complete disable and re-enable. */
@@ -1850,7 +1878,8 @@ static bool cdclk_compute_crawl_and_squash_midpoint(struct drm_i915_private *i91
 	}
 
 	mid_cdclk_config->cdclk = DIV_ROUND_CLOSEST(cdclk_squash_divider(mid_waveform) *
-						    mid_cdclk_config->vco, size * div);
+						    mid_cdclk_config->vco,
+						    cdclk_squash_len * div);
 
 	/* make sure the mid clock came out sane */
 
@@ -1878,9 +1907,9 @@ static void _bxt_set_cdclk(struct drm_i915_private *dev_priv,
 {
 	int cdclk = cdclk_config->cdclk;
 	int vco = cdclk_config->vco;
-	u32 val;
+	int unsquashed_cdclk;
 	u16 waveform;
-	int clock;
+	u32 val;
 
 	if (HAS_CDCLK_CRAWL(dev_priv) && dev_priv->display.cdclk.hw.vco > 0 && vco > 0 &&
 	    !cdclk_pll_is_unknown(dev_priv->display.cdclk.hw.vco)) {
@@ -1897,15 +1926,13 @@ static void _bxt_set_cdclk(struct drm_i915_private *dev_priv,
 
 	waveform = cdclk_squash_waveform(dev_priv, cdclk);
 
-	if (waveform)
-		clock = vco / 2;
-	else
-		clock = cdclk;
+	unsquashed_cdclk = DIV_ROUND_CLOSEST(cdclk * cdclk_squash_len,
+					     cdclk_squash_divider(waveform));
 
 	if (HAS_CDCLK_SQUASH(dev_priv))
 		dg2_cdclk_squash_program(dev_priv, waveform);
 
-	val = bxt_cdclk_cd2x_div_sel(dev_priv, clock, vco) |
+	val = bxt_cdclk_cd2x_div_sel(dev_priv, unsquashed_cdclk, vco) |
 		bxt_cdclk_cd2x_pipe(dev_priv, pipe);
 
 	/*
@@ -2075,7 +2102,7 @@ sanitize:
 	dev_priv->display.cdclk.hw.cdclk = 0;
 
 	/* force full PLL disable + enable */
-	dev_priv->display.cdclk.hw.vco = -1;
+	dev_priv->display.cdclk.hw.vco = ~0;
 }
 
 static void bxt_cdclk_init_hw(struct drm_i915_private *dev_priv)
@@ -2597,7 +2624,7 @@ static int intel_vdsc_min_cdclk(const struct intel_crtc_state *crtc_state)
 		 * Since PPC = 2 with bigjoiner
 		 * => CDCLK >= compressed_bpp * Pixel clock  / 2 * Bigjoiner Interface bits
 		 */
-		int bigjoiner_interface_bits = DISPLAY_VER(i915) > 13 ? 36 : 24;
+		int bigjoiner_interface_bits = DISPLAY_VER(i915) >= 14 ? 36 : 24;
 		int min_cdclk_bj =
 			(to_bpp_int_roundup(crtc_state->dsc.compressed_bpp_x16) *
 			 pixel_clock) / (2 * bigjoiner_interface_bits);
@@ -3489,7 +3516,7 @@ static const struct intel_cdclk_funcs mtl_cdclk_funcs = {
 	.get_cdclk = bxt_get_cdclk,
 	.set_cdclk = bxt_set_cdclk,
 	.modeset_calc_cdclk = bxt_modeset_calc_cdclk,
-	.calc_voltage_level = tgl_calc_voltage_level,
+	.calc_voltage_level = rplu_calc_voltage_level,
 };
 
 static const struct intel_cdclk_funcs rplu_cdclk_funcs = {
diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c
index 0e33a0523a75..abaacea5c2cc 100644
--- a/drivers/gpu/drm/i915/display/intel_crt.c
+++ b/drivers/gpu/drm/i915/display/intel_crt.c
@@ -348,8 +348,13 @@ intel_crt_mode_valid(struct drm_connector *connector,
 	struct drm_device *dev = connector->dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	int max_dotclk = dev_priv->max_dotclk_freq;
+	enum drm_mode_status status;
 	int max_clock;
 
+	status = intel_cpu_transcoder_mode_valid(dev_priv, mode);
+	if (status != MODE_OK)
+		return status;
+
 	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
 		return MODE_NO_DBLESCAN;
 
diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c
index 1fd068e6e26c..8a84a31c7b48 100644
--- a/drivers/gpu/drm/i915/display/intel_crtc.c
+++ b/drivers/gpu/drm/i915/display/intel_crtc.c
@@ -553,8 +553,15 @@ void intel_pipe_update_start(struct intel_atomic_state *state,
 
 	intel_psr_lock(new_crtc_state);
 
-	if (new_crtc_state->do_async_flip)
+	if (new_crtc_state->do_async_flip) {
+		spin_lock_irq(&crtc->base.dev->event_lock);
+		/* arm the event for the flip done irq handler */
+		crtc->flip_done_event = new_crtc_state->uapi.event;
+		spin_unlock_irq(&crtc->base.dev->event_lock);
+
+		new_crtc_state->uapi.event = NULL;
 		return;
+	}
 
 	if (intel_crtc_needs_vblank_work(new_crtc_state))
 		intel_crtc_vblank_work_init(new_crtc_state);
diff --git a/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c b/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c
index 2d15e82c0b3d..49fd100ec98a 100644
--- a/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c
+++ b/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c
@@ -262,6 +262,15 @@ void intel_crtc_state_dump(const struct intel_crtc_state *pipe_config,
 		drm_dbg_kms(&i915->drm, "fec: %s, enhanced framing: %s\n",
 			    str_enabled_disabled(pipe_config->fec_enable),
 			    str_enabled_disabled(pipe_config->enhanced_framing));
+
+		drm_dbg_kms(&i915->drm, "sdp split: %s\n",
+			    str_enabled_disabled(pipe_config->sdp_split_enable));
+
+		drm_dbg_kms(&i915->drm, "psr: %s, psr2: %s, panel replay: %s, selective fetch: %s\n",
+			    str_enabled_disabled(pipe_config->has_psr),
+			    str_enabled_disabled(pipe_config->has_psr2),
+			    str_enabled_disabled(pipe_config->has_panel_replay),
+			    str_enabled_disabled(pipe_config->enable_psr2_sel_fetch));
 	}
 
 	drm_dbg_kms(&i915->drm, "framestart delay: %d, MSA timing delay: %d\n",
diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c
index b342fad180ca..926e2de00eb5 100644
--- a/drivers/gpu/drm/i915/display/intel_cursor.c
+++ b/drivers/gpu/drm/i915/display/intel_cursor.c
@@ -21,8 +21,11 @@
 #include "intel_fb_pin.h"
 #include "intel_frontbuffer.h"
 #include "intel_psr.h"
+#include "intel_psr_regs.h"
 #include "skl_watermark.h"
 
+#include "gem/i915_gem_object.h"
+
 /* Cursor formats */
 static const u32 intel_cursor_formats[] = {
 	DRM_FORMAT_ARGB8888,
@@ -33,11 +36,11 @@ static u32 intel_cursor_base(const struct intel_plane_state *plane_state)
 	struct drm_i915_private *dev_priv =
 		to_i915(plane_state->uapi.plane->dev);
 	const struct drm_framebuffer *fb = plane_state->hw.fb;
-	const struct drm_i915_gem_object *obj = intel_fb_obj(fb);
+	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 	u32 base;
 
 	if (DISPLAY_INFO(dev_priv)->cursor_needs_physical)
-		base = sg_dma_address(obj->mm.pages->sgl);
+		base = i915_gem_object_get_dma_address(obj, 0);
 	else
 		base = intel_plane_ggtt_offset(plane_state);
 
@@ -484,6 +487,35 @@ static int i9xx_check_cursor(struct intel_crtc_state *crtc_state,
 	return 0;
 }
 
+static void i9xx_cursor_disable_sel_fetch_arm(struct intel_plane *plane,
+					      const struct intel_crtc_state *crtc_state)
+{
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+	enum pipe pipe = plane->pipe;
+
+	if (!crtc_state->enable_psr2_sel_fetch)
+		return;
+
+	intel_de_write_fw(dev_priv, PLANE_SEL_FETCH_CTL(pipe, plane->id), 0);
+}
+
+static void i9xx_cursor_update_sel_fetch_arm(struct intel_plane *plane,
+					     const struct intel_crtc_state *crtc_state,
+					     const struct intel_plane_state *plane_state)
+{
+	struct drm_i915_private *i915 = to_i915(plane->base.dev);
+	enum pipe pipe = plane->pipe;
+
+	if (!crtc_state->enable_psr2_sel_fetch)
+		return;
+
+	if (drm_rect_height(&plane_state->psr2_sel_fetch_area) > 0)
+		intel_de_write_fw(i915, PLANE_SEL_FETCH_CTL(pipe, plane->id),
+				  plane_state->ctl);
+	else
+		i9xx_cursor_disable_sel_fetch_arm(plane, crtc_state);
+}
+
 /* TODO: split into noarm+arm pair */
 static void i9xx_cursor_update_arm(struct intel_plane *plane,
 				   const struct intel_crtc_state *crtc_state,
@@ -531,10 +563,10 @@ static void i9xx_cursor_update_arm(struct intel_plane *plane,
 		skl_write_cursor_wm(plane, crtc_state);
 
 	if (plane_state)
-		intel_psr2_program_plane_sel_fetch_arm(plane, crtc_state,
-						       plane_state);
+		i9xx_cursor_update_sel_fetch_arm(plane, crtc_state,
+						 plane_state);
 	else
-		intel_psr2_disable_plane_sel_fetch_arm(plane, crtc_state);
+		i9xx_cursor_disable_sel_fetch_arm(plane, crtc_state);
 
 	if (plane->cursor.base != base ||
 	    plane->cursor.size != fbc_ctl ||
diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c
index a8fa76580802..6b25e195232f 100644
--- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c
+++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c
@@ -415,9 +415,15 @@ void intel_cx0_phy_set_signal_levels(struct intel_encoder *encoder,
 	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
 	const struct intel_ddi_buf_trans *trans;
 	enum phy phy = intel_port_to_phy(i915, encoder->port);
-	u8 owned_lane_mask = intel_cx0_get_owned_lane_mask(i915, encoder);
+	u8 owned_lane_mask;
 	intel_wakeref_t wakeref;
 	int n_entries, ln;
+	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
+
+	if (intel_tc_port_in_tbt_alt_mode(dig_port))
+		return;
+
+	owned_lane_mask = intel_cx0_get_owned_lane_mask(i915, encoder);
 
 	wakeref = intel_cx0_phy_transaction_begin(encoder);
 
@@ -739,7 +745,6 @@ static const struct intel_c10pll_state * const mtl_c10_edp_tables[] = {
 
 /* C20 basic DP 1.4 tables */
 static const struct intel_c20pll_state mtl_c20_dp_rbr = {
-	.link_bit_rate = 162000,
 	.clock = 162000,
 	.tx = {	0xbe88, /* tx cfg0 */
 		0x5800, /* tx cfg1 */
@@ -765,7 +770,6 @@ static const struct intel_c20pll_state mtl_c20_dp_rbr = {
 };
 
 static const struct intel_c20pll_state mtl_c20_dp_hbr1 = {
-	.link_bit_rate = 270000,
 	.clock = 270000,
 	.tx = {	0xbe88, /* tx cfg0 */
 		0x4800, /* tx cfg1 */
@@ -791,7 +795,6 @@ static const struct intel_c20pll_state mtl_c20_dp_hbr1 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_dp_hbr2 = {
-	.link_bit_rate = 540000,
 	.clock = 540000,
 	.tx = {	0xbe88, /* tx cfg0 */
 		0x4800, /* tx cfg1 */
@@ -817,7 +820,6 @@ static const struct intel_c20pll_state mtl_c20_dp_hbr2 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_dp_hbr3 = {
-	.link_bit_rate = 810000,
 	.clock = 810000,
 	.tx = {	0xbe88, /* tx cfg0 */
 		0x4800, /* tx cfg1 */
@@ -844,8 +846,7 @@ static const struct intel_c20pll_state mtl_c20_dp_hbr3 = {
 
 /* C20 basic DP 2.0 tables */
 static const struct intel_c20pll_state mtl_c20_dp_uhbr10 = {
-	.link_bit_rate = 1000000, /* 10 Gbps */
-	.clock = 312500,
+	.clock = 1000000, /* 10 Gbps */
 	.tx = {	0xbe21, /* tx cfg0 */
 		0x4800, /* tx cfg1 */
 		0x0000, /* tx cfg2 */
@@ -869,8 +870,7 @@ static const struct intel_c20pll_state mtl_c20_dp_uhbr10 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_dp_uhbr13_5 = {
-	.link_bit_rate = 1350000, /* 13.5 Gbps */
-	.clock = 421875,
+	.clock = 1350000, /* 13.5 Gbps */
 	.tx = {	0xbea0, /* tx cfg0 */
 		0x4800, /* tx cfg1 */
 		0x0000, /* tx cfg2 */
@@ -895,8 +895,7 @@ static const struct intel_c20pll_state mtl_c20_dp_uhbr13_5 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_dp_uhbr20 = {
-	.link_bit_rate = 2000000, /* 20 Gbps */
-	.clock = 625000,
+	.clock = 2000000, /* 20 Gbps */
 	.tx = {	0xbe20, /* tx cfg0 */
 		0x4800, /* tx cfg1 */
 		0x0000, /* tx cfg2 */
@@ -1515,7 +1514,6 @@ static const struct intel_c10pll_state * const mtl_c10_hdmi_tables[] = {
 };
 
 static const struct intel_c20pll_state mtl_c20_hdmi_25_175 = {
-	.link_bit_rate = 25175,
 	.clock = 25175,
 	.tx = {  0xbe88, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
@@ -1541,7 +1539,6 @@ static const struct intel_c20pll_state mtl_c20_hdmi_25_175 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_hdmi_27_0 = {
-	.link_bit_rate = 27000,
 	.clock = 27000,
 	.tx = {  0xbe88, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
@@ -1567,7 +1564,6 @@ static const struct intel_c20pll_state mtl_c20_hdmi_27_0 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_hdmi_74_25 = {
-	.link_bit_rate = 74250,
 	.clock = 74250,
 	.tx = {  0xbe88, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
@@ -1593,7 +1589,6 @@ static const struct intel_c20pll_state mtl_c20_hdmi_74_25 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_hdmi_148_5 = {
-	.link_bit_rate = 148500,
 	.clock = 148500,
 	.tx = {  0xbe88, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
@@ -1619,7 +1614,6 @@ static const struct intel_c20pll_state mtl_c20_hdmi_148_5 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_hdmi_594 = {
-	.link_bit_rate = 594000,
 	.clock = 594000,
 	.tx = {  0xbe88, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
@@ -1645,8 +1639,7 @@ static const struct intel_c20pll_state mtl_c20_hdmi_594 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_hdmi_300 = {
-	.link_bit_rate = 3000000,
-	.clock = 166670,
+	.clock = 3000000,
 	.tx = {  0xbe98, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
 		  0x0000, /* tx cfg2 */
@@ -1671,8 +1664,7 @@ static const struct intel_c20pll_state mtl_c20_hdmi_300 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_hdmi_600 = {
-	.link_bit_rate = 6000000,
-	.clock = 333330,
+	.clock = 6000000,
 	.tx = {  0xbe98, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
 		  0x0000, /* tx cfg2 */
@@ -1697,8 +1689,7 @@ static const struct intel_c20pll_state mtl_c20_hdmi_600 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_hdmi_800 = {
-	.link_bit_rate = 8000000,
-	.clock = 444440,
+	.clock = 8000000,
 	.tx = {  0xbe98, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
 		  0x0000, /* tx cfg2 */
@@ -1723,8 +1714,7 @@ static const struct intel_c20pll_state mtl_c20_hdmi_800 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_hdmi_1000 = {
-	.link_bit_rate = 10000000,
-	.clock = 555560,
+	.clock = 10000000,
 	.tx = {  0xbe98, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
 		  0x0000, /* tx cfg2 */
@@ -1749,8 +1739,7 @@ static const struct intel_c20pll_state mtl_c20_hdmi_1000 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_hdmi_1200 = {
-	.link_bit_rate = 12000000,
-	.clock = 666670,
+	.clock = 12000000,
 	.tx = {  0xbe98, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
 		  0x0000, /* tx cfg2 */
@@ -1999,7 +1988,6 @@ static int intel_c20_compute_hdmi_tmds_pll(u64 pixel_clock, struct intel_c20pll_
 	else
 		mpllb_ana_freq_vco = MPLLB_ANA_FREQ_VCO_0;
 
-	pll_state->link_bit_rate	= pixel_clock;
 	pll_state->clock	= pixel_clock;
 	pll_state->tx[0]	= 0xbe88;
 	pll_state->tx[1]	= 0x9800;
@@ -2036,7 +2024,7 @@ static int intel_c20_phy_check_hdmi_link_rate(int clock)
 	int i;
 
 	for (i = 0; tables[i]; i++) {
-		if (clock == tables[i]->link_bit_rate)
+		if (clock == tables[i]->clock)
 			return MODE_OK;
 	}
 
@@ -2088,7 +2076,7 @@ static int intel_c20pll_calc_state(struct intel_crtc_state *crtc_state,
 		return -EINVAL;
 
 	for (i = 0; tables[i]; i++) {
-		if (crtc_state->port_clock == tables[i]->link_bit_rate) {
+		if (crtc_state->port_clock == tables[i]->clock) {
 			crtc_state->cx0pll_state.c20 = *tables[i];
 			return 0;
 		}
@@ -2111,7 +2099,7 @@ int intel_cx0pll_calc_state(struct intel_crtc_state *crtc_state,
 static bool intel_c20_use_mplla(u32 clock)
 {
 	/* 10G and 20G rates use MPLLA */
-	if (clock == 312500 || clock == 625000)
+	if (clock == 1000000 || clock == 2000000)
 		return true;
 
 	return false;
@@ -2214,11 +2202,11 @@ static u8 intel_c20_get_dp_rate(u32 clock)
 		return 6;
 	case 432000: /* 4.32 Gbps eDP */
 		return 7;
-	case 312500: /* 10 Gbps DP2.0 */
+	case 1000000: /* 10 Gbps DP2.0 */
 		return 8;
-	case 421875: /* 13.5 Gbps DP2.0 */
+	case 1350000: /* 13.5 Gbps DP2.0 */
 		return 9;
-	case 625000: /* 20 Gbps DP2.0*/
+	case 2000000: /* 20 Gbps DP2.0 */
 		return 10;
 	case 648000: /* 6.48 Gbps eDP*/
 		return 11;
@@ -2236,13 +2224,13 @@ static u8 intel_c20_get_hdmi_rate(u32 clock)
 		return 0;
 
 	switch (clock) {
-	case 166670: /* 3 Gbps */
-	case 333330: /* 6 Gbps */
-	case 666670: /* 12 Gbps */
+	case 300000: /* 3 Gbps */
+	case 600000: /* 6 Gbps */
+	case 1200000: /* 12 Gbps */
 		return 1;
-	case 444440: /* 8 Gbps */
+	case 800000: /* 8 Gbps */
 		return 2;
-	case 555560: /* 10 Gbps */
+	case 1000000: /* 10 Gbps */
 		return 3;
 	default:
 		MISSING_CASE(clock);
@@ -2253,7 +2241,7 @@ static u8 intel_c20_get_hdmi_rate(u32 clock)
 static bool is_dp2(u32 clock)
 {
 	/* DP2.0 clock rates */
-	if (clock == 312500 || clock == 421875 || clock  == 625000)
+	if (clock == 1000000 || clock == 1350000 || clock  == 2000000)
 		return true;
 
 	return false;
@@ -2262,11 +2250,11 @@ static bool is_dp2(u32 clock)
 static bool is_hdmi_frl(u32 clock)
 {
 	switch (clock) {
-	case 166670: /* 3 Gbps */
-	case 333330: /* 6 Gbps */
-	case 444440: /* 8 Gbps */
-	case 555560: /* 10 Gbps */
-	case 666670: /* 12 Gbps */
+	case 300000: /* 3 Gbps */
+	case 600000: /* 6 Gbps */
+	case 800000: /* 8 Gbps */
+	case 1000000: /* 10 Gbps */
+	case 1200000: /* 12 Gbps */
 		return true;
 	default:
 		return false;
@@ -2299,6 +2287,7 @@ static void intel_c20_pll_program(struct drm_i915_private *i915,
 	const struct intel_c20pll_state *pll_state = &crtc_state->cx0pll_state.c20;
 	bool dp = false;
 	int lane = crtc_state->lane_count > 2 ? INTEL_CX0_BOTH_LANES : INTEL_CX0_LANE0;
+	u32 clock = crtc_state->port_clock;
 	bool cntx;
 	int i;
 
@@ -2337,7 +2326,7 @@ static void intel_c20_pll_program(struct drm_i915_private *i915,
 	}
 
 	/* 3.3 mpllb or mplla configuration */
-	if (intel_c20_use_mplla(pll_state->clock)) {
+	if (intel_c20_use_mplla(clock)) {
 		for (i = 0; i < ARRAY_SIZE(pll_state->mplla); i++) {
 			if (cntx)
 				intel_c20_sram_write(i915, encoder->port, INTEL_CX0_LANE0,
@@ -2364,23 +2353,23 @@ static void intel_c20_pll_program(struct drm_i915_private *i915,
 	/* 4. Program custom width to match the link protocol */
 	intel_cx0_rmw(i915, encoder->port, lane, PHY_C20_VDR_CUSTOM_WIDTH,
 		      PHY_C20_CUSTOM_WIDTH_MASK,
-		      PHY_C20_CUSTOM_WIDTH(intel_get_c20_custom_width(pll_state->clock, dp)),
+		      PHY_C20_CUSTOM_WIDTH(intel_get_c20_custom_width(clock, dp)),
 		      MB_WRITE_COMMITTED);
 
 	/* 5. For DP or 6. For HDMI */
 	if (dp) {
 		intel_cx0_rmw(i915, encoder->port, lane, PHY_C20_VDR_CUSTOM_SERDES_RATE,
 			      BIT(6) | PHY_C20_CUSTOM_SERDES_MASK,
-			      BIT(6) | PHY_C20_CUSTOM_SERDES(intel_c20_get_dp_rate(pll_state->clock)),
+			      BIT(6) | PHY_C20_CUSTOM_SERDES(intel_c20_get_dp_rate(clock)),
 			      MB_WRITE_COMMITTED);
 	} else {
 		intel_cx0_rmw(i915, encoder->port, lane, PHY_C20_VDR_CUSTOM_SERDES_RATE,
 			      BIT(7) | PHY_C20_CUSTOM_SERDES_MASK,
-			      is_hdmi_frl(pll_state->clock) ? BIT(7) : 0,
+			      is_hdmi_frl(clock) ? BIT(7) : 0,
 			      MB_WRITE_COMMITTED);
 
 		intel_cx0_write(i915, encoder->port, INTEL_CX0_BOTH_LANES, PHY_C20_VDR_HDMI_RATE,
-				intel_c20_get_hdmi_rate(pll_state->clock),
+				intel_c20_get_hdmi_rate(clock),
 				MB_WRITE_COMMITTED);
 	}
 
@@ -2479,7 +2468,8 @@ static void intel_program_port_clock_ctl(struct intel_encoder *encoder,
 
 	val |= XELPDP_FORWARD_CLOCK_UNGATE;
 
-	if (is_hdmi_frl(crtc_state->port_clock))
+	if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI) &&
+	    is_hdmi_frl(crtc_state->port_clock))
 		val |= XELPDP_DDI_CLOCK_SELECT(XELPDP_DDI_CLOCK_SELECT_DIV18CLK);
 	else
 		val |= XELPDP_DDI_CLOCK_SELECT(XELPDP_DDI_CLOCK_SELECT_MAXPCLK);
@@ -3077,24 +3067,29 @@ static void intel_c20pll_state_verify(const struct intel_crtc_state *state,
 {
 	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
 	const struct intel_c20pll_state *mpll_sw_state = &state->cx0pll_state.c20;
-	bool use_mplla;
+	bool sw_use_mpllb = mpll_sw_state->tx[0] & C20_PHY_USE_MPLLB;
+	bool hw_use_mpllb = mpll_hw_state->tx[0] & C20_PHY_USE_MPLLB;
 	int i;
 
-	use_mplla = intel_c20_use_mplla(mpll_hw_state->clock);
-	if (use_mplla) {
-		for (i = 0; i < ARRAY_SIZE(mpll_sw_state->mplla); i++) {
-			I915_STATE_WARN(i915, mpll_hw_state->mplla[i] != mpll_sw_state->mplla[i],
-					"[CRTC:%d:%s] mismatch in C20MPLLA: Register[%d] (expected 0x%04x, found 0x%04x)",
-					crtc->base.base.id, crtc->base.name, i,
-					mpll_sw_state->mplla[i], mpll_hw_state->mplla[i]);
-		}
-	} else {
+	I915_STATE_WARN(i915, sw_use_mpllb != hw_use_mpllb,
+			"[CRTC:%d:%s] mismatch in C20: Register MPLLB selection (expected %d, found %d)",
+			crtc->base.base.id, crtc->base.name,
+			sw_use_mpllb, hw_use_mpllb);
+
+	if (hw_use_mpllb) {
 		for (i = 0; i < ARRAY_SIZE(mpll_sw_state->mpllb); i++) {
 			I915_STATE_WARN(i915, mpll_hw_state->mpllb[i] != mpll_sw_state->mpllb[i],
 					"[CRTC:%d:%s] mismatch in C20MPLLB: Register[%d] (expected 0x%04x, found 0x%04x)",
 					crtc->base.base.id, crtc->base.name, i,
 					mpll_sw_state->mpllb[i], mpll_hw_state->mpllb[i]);
 		}
+	} else {
+		for (i = 0; i < ARRAY_SIZE(mpll_sw_state->mplla); i++) {
+			I915_STATE_WARN(i915, mpll_hw_state->mplla[i] != mpll_sw_state->mplla[i],
+					"[CRTC:%d:%s] mismatch in C20MPLLA: Register[%d] (expected 0x%04x, found 0x%04x)",
+					crtc->base.base.id, crtc->base.name, i,
+					mpll_sw_state->mplla[i], mpll_hw_state->mplla[i]);
+		}
 	}
 
 	for (i = 0; i < ARRAY_SIZE(mpll_sw_state->tx); i++) {
@@ -3136,6 +3131,9 @@ void intel_cx0pll_state_verify(struct intel_atomic_state *state,
 	encoder = intel_get_crtc_new_encoder(state, new_crtc_state);
 	phy = intel_port_to_phy(i915, encoder->port);
 
+	if (intel_tc_port_in_tbt_alt_mode(enc_to_dig_port(encoder)))
+		return;
+
 	intel_cx0pll_readout_hw_state(encoder, &mpll_hw_state);
 
 	if (intel_is_c10phy(i915, phy))
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index 38f28c480b38..12a29363e5df 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -3672,16 +3672,42 @@ static bool intel_ddi_is_audio_enabled(struct drm_i915_private *dev_priv,
 		AUDIO_OUTPUT_ENABLE(cpu_transcoder);
 }
 
-void intel_ddi_compute_min_voltage_level(struct drm_i915_private *dev_priv,
-					 struct intel_crtc_state *crtc_state)
+static int tgl_ddi_min_voltage_level(const struct intel_crtc_state *crtc_state)
 {
-	if (DISPLAY_VER(dev_priv) >= 12 && crtc_state->port_clock > 594000)
-		crtc_state->min_voltage_level = 2;
-	else if ((IS_JASPERLAKE(dev_priv) || IS_ELKHARTLAKE(dev_priv)) &&
-		 crtc_state->port_clock > 594000)
-		crtc_state->min_voltage_level = 3;
-	else if (DISPLAY_VER(dev_priv) >= 11 && crtc_state->port_clock > 594000)
-		crtc_state->min_voltage_level = 1;
+	if (crtc_state->port_clock > 594000)
+		return 2;
+	else
+		return 0;
+}
+
+static int jsl_ddi_min_voltage_level(const struct intel_crtc_state *crtc_state)
+{
+	if (crtc_state->port_clock > 594000)
+		return 3;
+	else
+		return 0;
+}
+
+static int icl_ddi_min_voltage_level(const struct intel_crtc_state *crtc_state)
+{
+	if (crtc_state->port_clock > 594000)
+		return 1;
+	else
+		return 0;
+}
+
+void intel_ddi_compute_min_voltage_level(struct intel_crtc_state *crtc_state)
+{
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
+
+	if (DISPLAY_VER(dev_priv) >= 14)
+		crtc_state->min_voltage_level = icl_ddi_min_voltage_level(crtc_state);
+	else if (DISPLAY_VER(dev_priv) >= 12)
+		crtc_state->min_voltage_level = tgl_ddi_min_voltage_level(crtc_state);
+	else if (IS_JASPERLAKE(dev_priv) || IS_ELKHARTLAKE(dev_priv))
+		crtc_state->min_voltage_level = jsl_ddi_min_voltage_level(crtc_state);
+	else if (DISPLAY_VER(dev_priv) >= 11)
+		crtc_state->min_voltage_level = icl_ddi_min_voltage_level(crtc_state);
 }
 
 static enum transcoder bdw_transcoder_master_readout(struct drm_i915_private *dev_priv,
@@ -3895,7 +3921,7 @@ static void intel_ddi_get_config(struct intel_encoder *encoder,
 		pipe_config->lane_lat_optim_mask =
 			bxt_ddi_phy_get_lane_lat_optim_mask(encoder);
 
-	intel_ddi_compute_min_voltage_level(dev_priv, pipe_config);
+	intel_ddi_compute_min_voltage_level(pipe_config);
 
 	intel_hdmi_read_gcp_infoframe(encoder, pipe_config);
 
@@ -4175,7 +4201,7 @@ static int intel_ddi_compute_config(struct intel_encoder *encoder,
 		pipe_config->lane_lat_optim_mask =
 			bxt_ddi_phy_calc_lane_lat_optim_mask(pipe_config->lane_count);
 
-	intel_ddi_compute_min_voltage_level(dev_priv, pipe_config);
+	intel_ddi_compute_min_voltage_level(pipe_config);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.h b/drivers/gpu/drm/i915/display/intel_ddi.h
index 63853a1f6582..434de7196875 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.h
+++ b/drivers/gpu/drm/i915/display/intel_ddi.h
@@ -70,8 +70,7 @@ void intel_ddi_set_dp_msa(const struct intel_crtc_state *crtc_state,
 bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector);
 void intel_ddi_set_vc_payload_alloc(const struct intel_crtc_state *crtc_state,
 				    bool state);
-void intel_ddi_compute_min_voltage_level(struct drm_i915_private *dev_priv,
-					 struct intel_crtc_state *crtc_state);
+void intel_ddi_compute_min_voltage_level(struct intel_crtc_state *crtc_state);
 int intel_ddi_toggle_hdcp_bits(struct intel_encoder *intel_encoder,
 			       enum transcoder cpu_transcoder,
 			       bool enable, u32 hdcp_mask);
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 5cf162628b95..b10aad15a63d 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -2627,7 +2627,7 @@ static void intel_set_transcoder_timings(const struct intel_crtc_state *crtc_sta
 		crtc_vblank_start = 1;
 	}
 
-	if (DISPLAY_VER(dev_priv) > 3)
+	if (DISPLAY_VER(dev_priv) >= 4)
 		intel_de_write(dev_priv, TRANS_VSYNCSHIFT(cpu_transcoder),
 			       vsyncshift);
 
@@ -3167,7 +3167,7 @@ static void bdw_set_pipe_misc(const struct intel_crtc_state *crtc_state)
 		break;
 	case 36:
 		/* Port output 12BPC defined for ADLP+ */
-		if (DISPLAY_VER(dev_priv) > 12)
+		if (DISPLAY_VER(dev_priv) >= 13)
 			val |= PIPE_MISC_BPC_12_ADLP;
 		break;
 	default:
@@ -3224,7 +3224,7 @@ int bdw_get_pipe_misc_bpp(struct intel_crtc *crtc)
 	 * MIPI DSI HW readout.
 	 */
 	case PIPE_MISC_BPC_12_ADLP:
-		if (DISPLAY_VER(dev_priv) > 12)
+		if (DISPLAY_VER(dev_priv) >= 13)
 			return 36;
 		fallthrough;
 	default:
@@ -3746,8 +3746,8 @@ static bool hsw_get_pipe_config(struct intel_crtc *crtc,
 	if (!active)
 		goto out;
 
-	intel_dsc_get_config(pipe_config);
 	intel_bigjoiner_get_config(pipe_config);
+	intel_dsc_get_config(pipe_config);
 
 	if (!transcoder_is_dsi(pipe_config->cpu_transcoder) ||
 	    DISPLAY_VER(dev_priv) >= 11)
@@ -4923,6 +4923,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 
 #define PIPE_CONF_CHECK_X(name) do { \
 	if (current_config->name != pipe_config->name) { \
+		BUILD_BUG_ON_MSG(__same_type(current_config->name, bool), \
+				 __stringify(name) " is bool");	\
 		pipe_config_mismatch(fastset, crtc, __stringify(name), \
 				     "(expected 0x%08x, found 0x%08x)", \
 				     current_config->name, \
@@ -4933,6 +4935,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 
 #define PIPE_CONF_CHECK_X_WITH_MASK(name, mask) do { \
 	if ((current_config->name & (mask)) != (pipe_config->name & (mask))) { \
+		BUILD_BUG_ON_MSG(__same_type(current_config->name, bool), \
+				 __stringify(name) " is bool");	\
 		pipe_config_mismatch(fastset, crtc, __stringify(name), \
 				     "(expected 0x%08x, found 0x%08x)", \
 				     current_config->name & (mask), \
@@ -4943,6 +4947,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 
 #define PIPE_CONF_CHECK_I(name) do { \
 	if (current_config->name != pipe_config->name) { \
+		BUILD_BUG_ON_MSG(__same_type(current_config->name, bool), \
+				 __stringify(name) " is bool");	\
 		pipe_config_mismatch(fastset, crtc, __stringify(name), \
 				     "(expected %i, found %i)", \
 				     current_config->name, \
@@ -4953,6 +4959,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 
 #define PIPE_CONF_CHECK_BOOL(name) do { \
 	if (current_config->name != pipe_config->name) { \
+		BUILD_BUG_ON_MSG(!__same_type(current_config->name, bool), \
+				 __stringify(name) " is not bool");	\
 		pipe_config_mismatch(fastset, crtc,  __stringify(name), \
 				     "(expected %s, found %s)", \
 				     str_yes_no(current_config->name), \
@@ -5091,8 +5099,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 #define PIPE_CONF_QUIRK(quirk) \
 	((current_config->quirks | pipe_config->quirks) & (quirk))
 
-	PIPE_CONF_CHECK_I(hw.enable);
-	PIPE_CONF_CHECK_I(hw.active);
+	PIPE_CONF_CHECK_BOOL(hw.enable);
+	PIPE_CONF_CHECK_BOOL(hw.active);
 
 	PIPE_CONF_CHECK_I(cpu_transcoder);
 	PIPE_CONF_CHECK_I(mst_master_transcoder);
@@ -5301,8 +5309,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 	PIPE_CONF_CHECK_I(dsc.config.second_line_bpg_offset);
 	PIPE_CONF_CHECK_I(dsc.config.nsl_bpg_offset);
 
-	PIPE_CONF_CHECK_I(dsc.compression_enable);
-	PIPE_CONF_CHECK_I(dsc.dsc_split);
+	PIPE_CONF_CHECK_BOOL(dsc.compression_enable);
+	PIPE_CONF_CHECK_BOOL(dsc.dsc_split);
 	PIPE_CONF_CHECK_I(dsc.compressed_bpp_x16);
 
 	PIPE_CONF_CHECK_BOOL(splitter.enable);
@@ -5918,6 +5926,17 @@ static int intel_async_flip_check_uapi(struct intel_atomic_state *state,
 		return -EINVAL;
 	}
 
+	/*
+	 * FIXME: Bigjoiner+async flip is busted currently.
+	 * Remove this check once the issues are fixed.
+	 */
+	if (new_crtc_state->bigjoiner_pipes) {
+		drm_dbg_kms(&i915->drm,
+			    "[CRTC:%d:%s] async flip disallowed with bigjoiner\n",
+			    crtc->base.base.id, crtc->base.name);
+		return -EINVAL;
+	}
+
 	for_each_oldnew_intel_plane_in_state(state, plane, old_plane_state,
 					     new_plane_state, i) {
 		if (plane->pipe != crtc->pipe)
@@ -6955,24 +6974,6 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state)
 	drm_WARN_ON(&dev_priv->drm, update_pipes);
 }
 
-static void intel_atomic_helper_free_state(struct drm_i915_private *dev_priv)
-{
-	struct intel_atomic_state *state, *next;
-	struct llist_node *freed;
-
-	freed = llist_del_all(&dev_priv->display.atomic_helper.free_list);
-	llist_for_each_entry_safe(state, next, freed, freed)
-		drm_atomic_state_put(&state->base);
-}
-
-void intel_atomic_helper_free_state_worker(struct work_struct *work)
-{
-	struct drm_i915_private *dev_priv =
-		container_of(work, typeof(*dev_priv), display.atomic_helper.free_work);
-
-	intel_atomic_helper_free_state(dev_priv);
-}
-
 static void intel_atomic_commit_fence_wait(struct intel_atomic_state *intel_state)
 {
 	struct drm_i915_private *i915 = to_i915(intel_state->base.dev);
@@ -7008,8 +7009,6 @@ static void intel_atomic_cleanup_work(struct work_struct *work)
 	drm_atomic_helper_cleanup_planes(&i915->drm, &state->base);
 	drm_atomic_helper_commit_cleanup_done(&state->base);
 	drm_atomic_state_put(&state->base);
-
-	intel_atomic_helper_free_state(i915);
 }
 
 static void intel_atomic_prepare_plane_clear_colors(struct intel_atomic_state *state)
@@ -7354,7 +7353,7 @@ int intel_atomic_commit(struct drm_device *dev, struct drm_atomic_state *_state,
 		for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i)
 			intel_color_cleanup_commit(new_crtc_state);
 
-		drm_atomic_helper_cleanup_planes(dev, &state->base);
+		drm_atomic_helper_unprepare_planes(dev, &state->base);
 		intel_runtime_pm_put(&dev_priv->runtime_pm, state->wakeref);
 		return ret;
 	}
@@ -7734,6 +7733,16 @@ enum drm_mode_status intel_mode_valid(struct drm_device *dev,
 	    mode->vtotal > vtotal_max)
 		return MODE_V_ILLEGAL;
 
+	return MODE_OK;
+}
+
+enum drm_mode_status intel_cpu_transcoder_mode_valid(struct drm_i915_private *dev_priv,
+						     const struct drm_display_mode *mode)
+{
+	/*
+	 * Additional transcoder timing limits,
+	 * excluding BXT/GLK DSI transcoders.
+	 */
 	if (DISPLAY_VER(dev_priv) >= 5) {
 		if (mode->hdisplay < 64 ||
 		    mode->htotal - mode->hdisplay < 32)
@@ -7753,7 +7762,7 @@ enum drm_mode_status intel_mode_valid(struct drm_device *dev,
 	 * Cantiga+ cannot handle modes with a hsync front porch of 0.
 	 * WaPruneModeWithIncorrectHsyncOffset:ctg,elk,ilk,snb,ivb,vlv,hsw.
 	 */
-	if ((DISPLAY_VER(dev_priv) > 4 || IS_G4X(dev_priv)) &&
+	if ((DISPLAY_VER(dev_priv) >= 5 || IS_G4X(dev_priv)) &&
 	    mode->hsync_start == mode->hdisplay)
 		return MODE_H_ILLEGAL;
 
diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h
index 8548f49e3972..f4a0773f0fca 100644
--- a/drivers/gpu/drm/i915/display/intel_display.h
+++ b/drivers/gpu/drm/i915/display/intel_display.h
@@ -402,6 +402,9 @@ enum drm_mode_status
 intel_mode_valid_max_plane_size(struct drm_i915_private *dev_priv,
 				const struct drm_display_mode *mode,
 				bool bigjoiner);
+enum drm_mode_status
+intel_cpu_transcoder_mode_valid(struct drm_i915_private *i915,
+				const struct drm_display_mode *mode);
 enum phy intel_port_to_phy(struct drm_i915_private *i915, enum port port);
 bool is_trans_port_sync_mode(const struct intel_crtc_state *state);
 bool is_trans_port_sync_master(const struct intel_crtc_state *state);
diff --git a/drivers/gpu/drm/i915/display/intel_display_core.h b/drivers/gpu/drm/i915/display/intel_display_core.h
index 7e82b87e9cde..47297ed85822 100644
--- a/drivers/gpu/drm/i915/display/intel_display_core.h
+++ b/drivers/gpu/drm/i915/display/intel_display_core.h
@@ -298,12 +298,6 @@ struct intel_display {
 		const struct intel_audio_funcs *audio;
 	} funcs;
 
-	/* Grouping using anonymous structs. Keep sorted. */
-	struct intel_atomic_helper {
-		struct llist_head free_list;
-		struct work_struct free_work;
-	} atomic_helper;
-
 	struct {
 		/* backlight registers and fields in struct intel_panel */
 		struct mutex lock;
diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c
index 915420d0cef8..d951edb36687 100644
--- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c
+++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c
@@ -1095,7 +1095,7 @@ void intel_display_debugfs_register(struct drm_i915_private *i915)
 
 	for (i = 0; i < ARRAY_SIZE(intel_display_debugfs_files); i++) {
 		debugfs_create_file(intel_display_debugfs_files[i].name,
-				    S_IRUGO | S_IWUSR,
+				    0644,
 				    minor->debugfs_root,
 				    to_i915(minor->dev),
 				    intel_display_debugfs_files[i].fops);
@@ -1116,11 +1116,10 @@ void intel_display_debugfs_register(struct drm_i915_private *i915)
 
 static int i915_panel_show(struct seq_file *m, void *data)
 {
-	struct drm_connector *connector = m->private;
-	struct intel_dp *intel_dp =
-		intel_attached_dp(to_intel_connector(connector));
+	struct intel_connector *connector = m->private;
+	struct intel_dp *intel_dp = intel_attached_dp(connector);
 
-	if (connector->status != connector_status_connected)
+	if (connector->base.status != connector_status_connected)
 		return -ENODEV;
 
 	seq_printf(m, "Panel power up delay: %d\n",
@@ -1138,23 +1137,23 @@ DEFINE_SHOW_ATTRIBUTE(i915_panel);
 
 static int i915_hdcp_sink_capability_show(struct seq_file *m, void *data)
 {
-	struct drm_connector *connector = m->private;
-	struct drm_i915_private *i915 = to_i915(connector->dev);
-	struct intel_connector *intel_connector = to_intel_connector(connector);
+	struct intel_connector *connector = m->private;
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
 	int ret;
 
 	ret = drm_modeset_lock_single_interruptible(&i915->drm.mode_config.connection_mutex);
 	if (ret)
 		return ret;
 
-	if (!connector->encoder || connector->status != connector_status_connected) {
+	if (!connector->base.encoder ||
+	    connector->base.status != connector_status_connected) {
 		ret = -ENODEV;
 		goto out;
 	}
 
-	seq_printf(m, "%s:%d HDCP version: ", connector->name,
-		   connector->base.id);
-	intel_hdcp_info(m, intel_connector);
+	seq_printf(m, "%s:%d HDCP version: ", connector->base.name,
+		   connector->base.base.id);
+	intel_hdcp_info(m, connector);
 
 out:
 	drm_modeset_unlock(&i915->drm.mode_config.connection_mutex);
@@ -1165,16 +1164,16 @@ DEFINE_SHOW_ATTRIBUTE(i915_hdcp_sink_capability);
 
 static int i915_lpsp_capability_show(struct seq_file *m, void *data)
 {
-	struct drm_connector *connector = m->private;
-	struct drm_i915_private *i915 = to_i915(connector->dev);
-	struct intel_encoder *encoder;
+	struct intel_connector *connector = m->private;
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
+	struct intel_encoder *encoder = intel_attached_encoder(connector);
+	int connector_type = connector->base.connector_type;
 	bool lpsp_capable = false;
 
-	encoder = intel_attached_encoder(to_intel_connector(connector));
 	if (!encoder)
 		return -ENODEV;
 
-	if (connector->status != connector_status_connected)
+	if (connector->base.status != connector_status_connected)
 		return -ENODEV;
 
 	if (DISPLAY_VER(i915) >= 13)
@@ -1187,15 +1186,15 @@ static int i915_lpsp_capability_show(struct seq_file *m, void *data)
 		 */
 		lpsp_capable = encoder->port <= PORT_B;
 	else if (DISPLAY_VER(i915) == 11)
-		lpsp_capable = (connector->connector_type == DRM_MODE_CONNECTOR_DSI ||
-				connector->connector_type == DRM_MODE_CONNECTOR_eDP);
+		lpsp_capable = (connector_type == DRM_MODE_CONNECTOR_DSI ||
+				connector_type == DRM_MODE_CONNECTOR_eDP);
 	else if (IS_DISPLAY_VER(i915, 9, 10))
 		lpsp_capable = (encoder->port == PORT_A &&
-				(connector->connector_type == DRM_MODE_CONNECTOR_DSI ||
-				 connector->connector_type == DRM_MODE_CONNECTOR_eDP ||
-				 connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort));
+				(connector_type == DRM_MODE_CONNECTOR_DSI ||
+				 connector_type == DRM_MODE_CONNECTOR_eDP ||
+				 connector_type == DRM_MODE_CONNECTOR_DisplayPort));
 	else if (IS_HASWELL(i915) || IS_BROADWELL(i915))
-		lpsp_capable = connector->connector_type == DRM_MODE_CONNECTOR_eDP;
+		lpsp_capable = connector_type == DRM_MODE_CONNECTOR_eDP;
 
 	seq_printf(m, "LPSP: %s\n", lpsp_capable ? "capable" : "incapable");
 
@@ -1205,7 +1204,7 @@ DEFINE_SHOW_ATTRIBUTE(i915_lpsp_capability);
 
 static int i915_dsc_fec_support_show(struct seq_file *m, void *data)
 {
-	struct intel_connector *connector = to_intel_connector(m->private);
+	struct intel_connector *connector = m->private;
 	struct drm_i915_private *i915 = to_i915(connector->base.dev);
 	struct drm_crtc *crtc;
 	struct intel_dp *intel_dp;
@@ -1275,13 +1274,13 @@ static ssize_t i915_dsc_fec_support_write(struct file *file,
 					  const char __user *ubuf,
 					  size_t len, loff_t *offp)
 {
+	struct seq_file *m = file->private_data;
+	struct intel_connector *connector = m->private;
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
+	struct intel_encoder *encoder = intel_attached_encoder(connector);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	bool dsc_enable = false;
 	int ret;
-	struct drm_connector *connector =
-		((struct seq_file *)file->private_data)->private;
-	struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector));
-	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
-	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
 	if (len == 0)
 		return 0;
@@ -1319,22 +1318,22 @@ static const struct file_operations i915_dsc_fec_support_fops = {
 
 static int i915_dsc_bpc_show(struct seq_file *m, void *data)
 {
-	struct drm_connector *connector = m->private;
-	struct drm_device *dev = connector->dev;
+	struct intel_connector *connector = m->private;
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
+	struct intel_encoder *encoder = intel_attached_encoder(connector);
 	struct drm_crtc *crtc;
 	struct intel_crtc_state *crtc_state;
-	struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector));
 	int ret;
 
 	if (!encoder)
 		return -ENODEV;
 
-	ret = drm_modeset_lock_single_interruptible(&dev->mode_config.connection_mutex);
+	ret = drm_modeset_lock_single_interruptible(&i915->drm.mode_config.connection_mutex);
 	if (ret)
 		return ret;
 
-	crtc = connector->state->crtc;
-	if (connector->status != connector_status_connected || !crtc) {
+	crtc = connector->base.state->crtc;
+	if (connector->base.status != connector_status_connected || !crtc) {
 		ret = -ENODEV;
 		goto out;
 	}
@@ -1342,7 +1341,7 @@ static int i915_dsc_bpc_show(struct seq_file *m, void *data)
 	crtc_state = to_intel_crtc_state(crtc->state);
 	seq_printf(m, "Input_BPC: %d\n", crtc_state->dsc.config.bits_per_component);
 
-out:	drm_modeset_unlock(&dev->mode_config.connection_mutex);
+out:	drm_modeset_unlock(&i915->drm.mode_config.connection_mutex);
 
 	return ret;
 }
@@ -1351,9 +1350,9 @@ static ssize_t i915_dsc_bpc_write(struct file *file,
 				  const char __user *ubuf,
 				  size_t len, loff_t *offp)
 {
-	struct drm_connector *connector =
-		((struct seq_file *)file->private_data)->private;
-	struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector));
+	struct seq_file *m = file->private_data;
+	struct intel_connector *connector = m->private;
+	struct intel_encoder *encoder = intel_attached_encoder(connector);
 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	int dsc_bpc = 0;
 	int ret;
@@ -1385,22 +1384,22 @@ static const struct file_operations i915_dsc_bpc_fops = {
 
 static int i915_dsc_output_format_show(struct seq_file *m, void *data)
 {
-	struct drm_connector *connector = m->private;
-	struct drm_device *dev = connector->dev;
+	struct intel_connector *connector = m->private;
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
+	struct intel_encoder *encoder = intel_attached_encoder(connector);
 	struct drm_crtc *crtc;
 	struct intel_crtc_state *crtc_state;
-	struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector));
 	int ret;
 
 	if (!encoder)
 		return -ENODEV;
 
-	ret = drm_modeset_lock_single_interruptible(&dev->mode_config.connection_mutex);
+	ret = drm_modeset_lock_single_interruptible(&i915->drm.mode_config.connection_mutex);
 	if (ret)
 		return ret;
 
-	crtc = connector->state->crtc;
-	if (connector->status != connector_status_connected || !crtc) {
+	crtc = connector->base.state->crtc;
+	if (connector->base.status != connector_status_connected || !crtc) {
 		ret = -ENODEV;
 		goto out;
 	}
@@ -1409,7 +1408,7 @@ static int i915_dsc_output_format_show(struct seq_file *m, void *data)
 	seq_printf(m, "DSC_Output_Format: %s\n",
 		   intel_output_format_name(crtc_state->output_format));
 
-out:	drm_modeset_unlock(&dev->mode_config.connection_mutex);
+out:	drm_modeset_unlock(&i915->drm.mode_config.connection_mutex);
 
 	return ret;
 }
@@ -1418,9 +1417,9 @@ static ssize_t i915_dsc_output_format_write(struct file *file,
 					    const char __user *ubuf,
 					    size_t len, loff_t *offp)
 {
-	struct drm_connector *connector =
-		((struct seq_file *)file->private_data)->private;
-	struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector));
+	struct seq_file *m = file->private_data;
+	struct intel_connector *connector = m->private;
+	struct intel_encoder *encoder = intel_attached_encoder(connector);
 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	int dsc_output_format = 0;
 	int ret;
@@ -1452,33 +1451,32 @@ static const struct file_operations i915_dsc_output_format_fops = {
 
 static int i915_dsc_fractional_bpp_show(struct seq_file *m, void *data)
 {
-	struct drm_connector *connector = m->private;
-	struct drm_device *dev = connector->dev;
+	struct intel_connector *connector = m->private;
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
+	struct intel_encoder *encoder = intel_attached_encoder(connector);
 	struct drm_crtc *crtc;
 	struct intel_dp *intel_dp;
-	struct intel_connector *intel_connector = to_intel_connector(connector);
-	struct intel_encoder *encoder = intel_attached_encoder(intel_connector);
 	int ret;
 
 	if (!encoder)
 		return -ENODEV;
 
-	ret = drm_modeset_lock_single_interruptible(&dev->mode_config.connection_mutex);
+	ret = drm_modeset_lock_single_interruptible(&i915->drm.mode_config.connection_mutex);
 	if (ret)
 		return ret;
 
-	crtc = connector->state->crtc;
-	if (connector->status != connector_status_connected || !crtc) {
+	crtc = connector->base.state->crtc;
+	if (connector->base.status != connector_status_connected || !crtc) {
 		ret = -ENODEV;
 		goto out;
 	}
 
-	intel_dp = intel_attached_dp(intel_connector);
+	intel_dp = intel_attached_dp(connector);
 	seq_printf(m, "Force_DSC_Fractional_BPP_Enable: %s\n",
 		   str_yes_no(intel_dp->force_dsc_fractional_bpp_en));
 
 out:
-	drm_modeset_unlock(&dev->mode_config.connection_mutex);
+	drm_modeset_unlock(&i915->drm.mode_config.connection_mutex);
 
 	return ret;
 }
@@ -1487,10 +1485,10 @@ static ssize_t i915_dsc_fractional_bpp_write(struct file *file,
 					     const char __user *ubuf,
 					     size_t len, loff_t *offp)
 {
-	struct drm_connector *connector =
-		((struct seq_file *)file->private_data)->private;
-	struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector));
-	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
+	struct seq_file *m = file->private_data;
+	struct intel_connector *connector = m->private;
+	struct intel_encoder *encoder = intel_attached_encoder(connector);
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	bool dsc_fractional_bpp_enable = false;
 	int ret;
@@ -1565,39 +1563,38 @@ DEFINE_SHOW_ATTRIBUTE(intel_crtc_pipe);
 
 /**
  * intel_connector_debugfs_add - add i915 specific connector debugfs files
- * @intel_connector: pointer to a registered drm_connector
+ * @connector: pointer to a registered intel_connector
  *
  * Cleanup will be done by drm_connector_unregister() through a call to
  * drm_debugfs_connector_remove().
  */
-void intel_connector_debugfs_add(struct intel_connector *intel_connector)
+void intel_connector_debugfs_add(struct intel_connector *connector)
 {
-	struct drm_connector *connector = &intel_connector->base;
-	struct dentry *root = connector->debugfs_entry;
-	struct drm_i915_private *dev_priv = to_i915(connector->dev);
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
+	struct dentry *root = connector->base.debugfs_entry;
+	int connector_type = connector->base.connector_type;
 
 	/* The connector must have been registered beforehands. */
 	if (!root)
 		return;
 
-	intel_drrs_connector_debugfs_add(intel_connector);
-	intel_psr_connector_debugfs_add(intel_connector);
+	intel_drrs_connector_debugfs_add(connector);
+	intel_psr_connector_debugfs_add(connector);
 
-	if (connector->connector_type == DRM_MODE_CONNECTOR_eDP)
-		debugfs_create_file("i915_panel_timings", S_IRUGO, root,
+	if (connector_type == DRM_MODE_CONNECTOR_eDP)
+		debugfs_create_file("i915_panel_timings", 0444, root,
 				    connector, &i915_panel_fops);
 
-	if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort ||
-	    connector->connector_type == DRM_MODE_CONNECTOR_HDMIA ||
-	    connector->connector_type == DRM_MODE_CONNECTOR_HDMIB) {
-		debugfs_create_file("i915_hdcp_sink_capability", S_IRUGO, root,
+	if (connector_type == DRM_MODE_CONNECTOR_DisplayPort ||
+	    connector_type == DRM_MODE_CONNECTOR_HDMIA ||
+	    connector_type == DRM_MODE_CONNECTOR_HDMIB) {
+		debugfs_create_file("i915_hdcp_sink_capability", 0444, root,
 				    connector, &i915_hdcp_sink_capability_fops);
 	}
 
-	if (DISPLAY_VER(dev_priv) >= 11 &&
-	    ((connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort &&
-	    !to_intel_connector(connector)->mst_port) ||
-	    connector->connector_type == DRM_MODE_CONNECTOR_eDP)) {
+	if (DISPLAY_VER(i915) >= 11 &&
+	    ((connector_type == DRM_MODE_CONNECTOR_DisplayPort && !connector->mst_port) ||
+	     connector_type == DRM_MODE_CONNECTOR_eDP)) {
 		debugfs_create_file("i915_dsc_fec_support", 0644, root,
 				    connector, &i915_dsc_fec_support_fops);
 
@@ -1611,11 +1608,11 @@ void intel_connector_debugfs_add(struct intel_connector *intel_connector)
 				    connector, &i915_dsc_fractional_bpp_fops);
 	}
 
-	if (connector->connector_type == DRM_MODE_CONNECTOR_DSI ||
-	    connector->connector_type == DRM_MODE_CONNECTOR_eDP ||
-	    connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort ||
-	    connector->connector_type == DRM_MODE_CONNECTOR_HDMIA ||
-	    connector->connector_type == DRM_MODE_CONNECTOR_HDMIB)
+	if (connector_type == DRM_MODE_CONNECTOR_DSI ||
+	    connector_type == DRM_MODE_CONNECTOR_eDP ||
+	    connector_type == DRM_MODE_CONNECTOR_DisplayPort ||
+	    connector_type == DRM_MODE_CONNECTOR_HDMIA ||
+	    connector_type == DRM_MODE_CONNECTOR_HDMIB)
 		debugfs_create_file("i915_lpsp_capability", 0444, root,
 				    connector, &i915_lpsp_capability_fops);
 }
diff --git a/drivers/gpu/drm/i915/display/intel_display_device.h b/drivers/gpu/drm/i915/display/intel_display_device.h
index 4299cc452e05..fe4268813786 100644
--- a/drivers/gpu/drm/i915/display/intel_display_device.h
+++ b/drivers/gpu/drm/i915/display/intel_display_device.h
@@ -36,7 +36,7 @@ struct drm_printer;
 #define HAS_ASYNC_FLIPS(i915)		(DISPLAY_VER(i915) >= 5)
 #define HAS_CDCLK_CRAWL(i915)		(DISPLAY_INFO(i915)->has_cdclk_crawl)
 #define HAS_CDCLK_SQUASH(i915)		(DISPLAY_INFO(i915)->has_cdclk_squash)
-#define HAS_CUR_FBC(i915)		(!HAS_GMCH(i915) && DISPLAY_VER(i915) >= 7)
+#define HAS_CUR_FBC(i915)		(!HAS_GMCH(i915) && IS_DISPLAY_VER(i915, 7, 13))
 #define HAS_D12_PLANE_MINIMIZATION(i915) (IS_ROCKETLAKE(i915) || IS_ALDERLAKE_S(i915))
 #define HAS_DDI(i915)			(DISPLAY_INFO(i915)->has_ddi)
 #define HAS_DISPLAY(i915)		(DISPLAY_RUNTIME_INFO(i915)->pipe_mask != 0)
@@ -49,7 +49,7 @@ struct drm_printer;
 #define HAS_DSC(__i915)			(DISPLAY_RUNTIME_INFO(__i915)->has_dsc)
 #define HAS_FBC(i915)			(DISPLAY_RUNTIME_INFO(i915)->fbc_mask != 0)
 #define HAS_FPGA_DBG_UNCLAIMED(i915)	(DISPLAY_INFO(i915)->has_fpga_dbg)
-#define HAS_FW_BLC(i915)		(DISPLAY_VER(i915) > 2)
+#define HAS_FW_BLC(i915)		(DISPLAY_VER(i915) >= 3)
 #define HAS_GMBUS_IRQ(i915)		(DISPLAY_VER(i915) >= 4)
 #define HAS_GMBUS_BURST_READ(i915)	(DISPLAY_VER(i915) >= 10 || IS_KABYLAKE(i915))
 #define HAS_GMCH(i915)			(DISPLAY_INFO(i915)->has_gmch)
diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c
index 62f7b10484be..9df9097a0255 100644
--- a/drivers/gpu/drm/i915/display/intel_display_driver.c
+++ b/drivers/gpu/drm/i915/display/intel_display_driver.c
@@ -259,10 +259,6 @@ int intel_display_driver_probe_noirq(struct drm_i915_private *i915)
 	if (ret)
 		goto cleanup_vga_client_pw_domain_dmc;
 
-	init_llist_head(&i915->display.atomic_helper.free_list);
-	INIT_WORK(&i915->display.atomic_helper.free_work,
-		  intel_atomic_helper_free_state_worker);
-
 	intel_init_quirks(i915);
 
 	intel_fbc_init(i915);
@@ -430,9 +426,6 @@ void intel_display_driver_remove(struct drm_i915_private *i915)
 	flush_workqueue(i915->display.wq.flip);
 	flush_workqueue(i915->display.wq.modeset);
 
-	flush_work(&i915->display.atomic_helper.free_work);
-	drm_WARN_ON(&i915->drm, !llist_empty(&i915->display.atomic_helper.free_list));
-
 	/*
 	 * MST topology needs to be suspended so we don't have any calls to
 	 * fbdev after it's finalized. MST will be destroyed later as part of
diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.c b/drivers/gpu/drm/i915/display/intel_display_irq.c
index bff4a76310c0..a7d8f3fc98de 100644
--- a/drivers/gpu/drm/i915/display/intel_display_irq.c
+++ b/drivers/gpu/drm/i915/display/intel_display_irq.c
@@ -340,18 +340,15 @@ static void flip_done_handler(struct drm_i915_private *i915,
 			      enum pipe pipe)
 {
 	struct intel_crtc *crtc = intel_crtc_for_pipe(i915, pipe);
-	struct drm_crtc_state *crtc_state = crtc->base.state;
-	struct drm_pending_vblank_event *e = crtc_state->event;
-	struct drm_device *dev = &i915->drm;
-	unsigned long irqflags;
-
-	spin_lock_irqsave(&dev->event_lock, irqflags);
 
-	crtc_state->event = NULL;
+	spin_lock(&i915->drm.event_lock);
 
-	drm_crtc_send_vblank_event(&crtc->base, e);
+	if (crtc->flip_done_event) {
+		drm_crtc_send_vblank_event(&crtc->base, crtc->flip_done_event);
+		crtc->flip_done_event = NULL;
+	}
 
-	spin_unlock_irqrestore(&dev->event_lock, irqflags);
+	spin_unlock(&i915->drm.event_lock);
 }
 
 static void hsw_pipe_crc_irq_handler(struct drm_i915_private *dev_priv,
@@ -896,7 +893,7 @@ gen8_de_misc_irq_handler(struct drm_i915_private *dev_priv, u32 iir)
 	}
 
 	if (!found)
-		drm_err(&dev_priv->drm, "Unexpected DE Misc interrupt\n");
+		drm_err(&dev_priv->drm, "Unexpected DE Misc interrupt: 0x%08x\n", iir);
 }
 
 static void gen11_dsi_te_interrupt_handler(struct drm_i915_private *dev_priv,
@@ -1653,7 +1650,7 @@ void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv)
 	else if (HAS_PCH_SPLIT(dev_priv))
 		ibx_irq_postinstall(dev_priv);
 
-	if (DISPLAY_VER(dev_priv) <= 10)
+	if (DISPLAY_VER(dev_priv) < 11)
 		de_misc_masked |= GEN8_DE_MISC_GSE;
 
 	if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv))
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c
index e390595d7341..6fd4fa52253a 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.c
+++ b/drivers/gpu/drm/i915/display/intel_display_power.c
@@ -405,8 +405,8 @@ print_async_put_domains_state(struct i915_power_domains *power_domains)
 						     struct drm_i915_private,
 						     display.power.domains);
 
-	drm_dbg(&i915->drm, "async_put_wakeref %u\n",
-		power_domains->async_put_wakeref);
+	drm_dbg(&i915->drm, "async_put_wakeref: %s\n",
+		str_yes_no(power_domains->async_put_wakeref));
 
 	print_power_domains(power_domains, "async_put_domains[0]",
 			    &power_domains->async_put_domains[0]);
@@ -1697,14 +1697,14 @@ static void icl_display_core_init(struct drm_i915_private *dev_priv,
 	if (resume)
 		intel_dmc_load_program(dev_priv);
 
-	/* Wa_14011508470:tgl,dg1,rkl,adl-s,adl-p */
-	if (DISPLAY_VER(dev_priv) >= 12)
+	/* Wa_14011508470:tgl,dg1,rkl,adl-s,adl-p,dg2 */
+	if (IS_DISPLAY_IP_RANGE(dev_priv, IP_VER(12, 0), IP_VER(13, 0)))
 		intel_de_rmw(dev_priv, GEN11_CHICKEN_DCPR_2, 0,
 			     DCPR_CLEAR_MEMSTAT_DIS | DCPR_SEND_RESP_IMM |
 			     DCPR_MASK_LPMODE | DCPR_MASK_MAXLATENCY_MEMUP_CLR);
 
 	/* Wa_14011503030:xelpd */
-	if (DISPLAY_VER(dev_priv) >= 13)
+	if (DISPLAY_VER(dev_priv) == 13)
 		intel_de_write(dev_priv, XELPD_DISPLAY_ERR_FATAL_MASK, ~0);
 }
 
diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index b3e942f2eeb0..3fdd8a517983 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -684,8 +684,6 @@ struct intel_atomic_state {
 	bool skip_intermediate_wm;
 
 	bool rps_interactive;
-
-	struct llist_node freed;
 };
 
 struct intel_plane_state {
@@ -1022,7 +1020,6 @@ struct intel_c10pll_state {
 };
 
 struct intel_c20pll_state {
-	u32 link_bit_rate;
 	u32 clock; /* in kHz */
 	u16 tx[3];
 	u16 cmn[4];
@@ -1476,6 +1473,9 @@ struct intel_crtc {
 
 	struct intel_crtc_state *config;
 
+	/* armed event for async flip */
+	struct drm_pending_vblank_event *flip_done_event;
+
 	/* Access to these should be protected by dev_priv->irq_lock. */
 	bool cpu_fifo_underrun_disabled;
 	bool pch_fifo_underrun_disabled;
diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c
index 63e080e07023..b70502586ab9 100644
--- a/drivers/gpu/drm/i915/display/intel_dmc.c
+++ b/drivers/gpu/drm/i915/display/intel_dmc.c
@@ -335,77 +335,6 @@ static void disable_event_handler(struct drm_i915_private *i915,
 	intel_de_write(i915, htp_reg, 0);
 }
 
-static void
-disable_flip_queue_event(struct drm_i915_private *i915,
-			 i915_reg_t ctl_reg, i915_reg_t htp_reg)
-{
-	u32 event_ctl;
-	u32 event_htp;
-
-	event_ctl = intel_de_read(i915, ctl_reg);
-	event_htp = intel_de_read(i915, htp_reg);
-	if (event_ctl != (DMC_EVT_CTL_ENABLE |
-			  DMC_EVT_CTL_RECURRING |
-			  REG_FIELD_PREP(DMC_EVT_CTL_TYPE_MASK,
-					 DMC_EVT_CTL_TYPE_EDGE_0_1) |
-			  REG_FIELD_PREP(DMC_EVT_CTL_EVENT_ID_MASK,
-					 DMC_EVT_CTL_EVENT_ID_CLK_MSEC)) ||
-	    !event_htp) {
-		drm_dbg_kms(&i915->drm,
-			    "Unexpected DMC event configuration (control %08x htp %08x)\n",
-			    event_ctl, event_htp);
-		return;
-	}
-
-	disable_event_handler(i915, ctl_reg, htp_reg);
-}
-
-static bool
-get_flip_queue_event_regs(struct drm_i915_private *i915, enum intel_dmc_id dmc_id,
-			  i915_reg_t *ctl_reg, i915_reg_t *htp_reg)
-{
-	if (dmc_id == DMC_FW_MAIN) {
-		if (DISPLAY_VER(i915) == 12) {
-			*ctl_reg = DMC_EVT_CTL(i915, dmc_id, 3);
-			*htp_reg = DMC_EVT_HTP(i915, dmc_id, 3);
-
-			return true;
-		}
-	} else if (dmc_id >= DMC_FW_PIPEA && dmc_id <= DMC_FW_PIPED) {
-		if (IS_DG2(i915)) {
-			*ctl_reg = DMC_EVT_CTL(i915, dmc_id, 2);
-			*htp_reg = DMC_EVT_HTP(i915, dmc_id, 2);
-
-			return true;
-		}
-	}
-
-	return false;
-}
-
-static void
-disable_all_flip_queue_events(struct drm_i915_private *i915)
-{
-	enum intel_dmc_id dmc_id;
-
-	/* TODO: check if the following applies to all D13+ platforms. */
-	if (!IS_DG2(i915) && !IS_TIGERLAKE(i915))
-		return;
-
-	for_each_dmc_id(dmc_id) {
-		i915_reg_t ctl_reg;
-		i915_reg_t htp_reg;
-
-		if (!has_dmc_id_fw(i915, dmc_id))
-			continue;
-
-		if (!get_flip_queue_event_regs(i915, dmc_id, &ctl_reg, &htp_reg))
-			continue;
-
-		disable_flip_queue_event(i915, ctl_reg, htp_reg);
-	}
-}
-
 static void disable_all_event_handlers(struct drm_i915_private *i915)
 {
 	enum intel_dmc_id dmc_id;
@@ -493,6 +422,65 @@ void intel_dmc_disable_pipe(struct drm_i915_private *i915, enum pipe pipe)
 		intel_de_rmw(i915, PIPEDMC_CONTROL(pipe), PIPEDMC_ENABLE, 0);
 }
 
+static bool is_dmc_evt_ctl_reg(struct drm_i915_private *i915,
+			       enum intel_dmc_id dmc_id, i915_reg_t reg)
+{
+	u32 offset = i915_mmio_reg_offset(reg);
+	u32 start = i915_mmio_reg_offset(DMC_EVT_CTL(i915, dmc_id, 0));
+	u32 end = i915_mmio_reg_offset(DMC_EVT_CTL(i915, dmc_id, DMC_EVENT_HANDLER_COUNT_GEN12));
+
+	return offset >= start && offset < end;
+}
+
+static bool is_dmc_evt_htp_reg(struct drm_i915_private *i915,
+			       enum intel_dmc_id dmc_id, i915_reg_t reg)
+{
+	u32 offset = i915_mmio_reg_offset(reg);
+	u32 start = i915_mmio_reg_offset(DMC_EVT_HTP(i915, dmc_id, 0));
+	u32 end = i915_mmio_reg_offset(DMC_EVT_HTP(i915, dmc_id, DMC_EVENT_HANDLER_COUNT_GEN12));
+
+	return offset >= start && offset < end;
+}
+
+static bool disable_dmc_evt(struct drm_i915_private *i915,
+			    enum intel_dmc_id dmc_id,
+			    i915_reg_t reg, u32 data)
+{
+	if (!is_dmc_evt_ctl_reg(i915, dmc_id, reg))
+		return false;
+
+	/* keep all pipe DMC events disabled by default */
+	if (dmc_id != DMC_FW_MAIN)
+		return true;
+
+	/* also disable the flip queue event on the main DMC on TGL */
+	if (IS_TIGERLAKE(i915) &&
+	    REG_FIELD_GET(DMC_EVT_CTL_EVENT_ID_MASK, data) == DMC_EVT_CTL_EVENT_ID_CLK_MSEC)
+		return true;
+
+	/* also disable the HRR event on the main DMC on TGL/ADLS */
+	if ((IS_TIGERLAKE(i915) || IS_ALDERLAKE_S(i915)) &&
+	    REG_FIELD_GET(DMC_EVT_CTL_EVENT_ID_MASK, data) == DMC_EVT_CTL_EVENT_ID_VBLANK_A)
+		return true;
+
+	return false;
+}
+
+static u32 dmc_mmiodata(struct drm_i915_private *i915,
+			struct intel_dmc *dmc,
+			enum intel_dmc_id dmc_id, int i)
+{
+	if (disable_dmc_evt(i915, dmc_id,
+			    dmc->dmc_info[dmc_id].mmioaddr[i],
+			    dmc->dmc_info[dmc_id].mmiodata[i]))
+		return REG_FIELD_PREP(DMC_EVT_CTL_TYPE_MASK,
+				      DMC_EVT_CTL_TYPE_EDGE_0_1) |
+			REG_FIELD_PREP(DMC_EVT_CTL_EVENT_ID_MASK,
+				       DMC_EVT_CTL_EVENT_ID_FALSE);
+	else
+		return dmc->dmc_info[dmc_id].mmiodata[i];
+}
+
 /**
  * intel_dmc_load_program() - write the firmware from memory to register.
  * @i915: i915 drm device.
@@ -532,7 +520,7 @@ void intel_dmc_load_program(struct drm_i915_private *i915)
 	for_each_dmc_id(dmc_id) {
 		for (i = 0; i < dmc->dmc_info[dmc_id].mmio_count; i++) {
 			intel_de_write(i915, dmc->dmc_info[dmc_id].mmioaddr[i],
-				       dmc->dmc_info[dmc_id].mmiodata[i]);
+				       dmc_mmiodata(i915, dmc, dmc_id, i));
 		}
 	}
 
@@ -540,13 +528,6 @@ void intel_dmc_load_program(struct drm_i915_private *i915)
 
 	gen9_set_dc_state_debugmask(i915);
 
-	/*
-	 * Flip queue events need to be disabled before enabling DC5/6.
-	 * i915 doesn't use the flip queue feature, so disable it already
-	 * here.
-	 */
-	disable_all_flip_queue_events(i915);
-
 	pipedmc_clock_gating_wa(i915, false);
 }
 
@@ -742,9 +723,17 @@ static u32 parse_dmc_fw_header(struct intel_dmc *dmc,
 		return 0;
 	}
 
+	drm_dbg_kms(&i915->drm, "DMC %d:\n", dmc_id);
 	for (i = 0; i < mmio_count; i++) {
 		dmc_info->mmioaddr[i] = _MMIO(mmioaddr[i]);
 		dmc_info->mmiodata[i] = mmiodata[i];
+
+		drm_dbg_kms(&i915->drm, " mmio[%d]: 0x%x = 0x%x%s%s\n",
+			    i, mmioaddr[i], mmiodata[i],
+			    is_dmc_evt_ctl_reg(i915, dmc_id, dmc_info->mmioaddr[i]) ? " (EVT_CTL)" :
+			    is_dmc_evt_htp_reg(i915, dmc_id, dmc_info->mmioaddr[i]) ? " (EVT_HTP)" : "",
+			    disable_dmc_evt(i915, dmc_id, dmc_info->mmioaddr[i],
+					    dmc_info->mmiodata[i]) ? " (disabling)" : "");
 	}
 	dmc_info->mmio_count = mmio_count;
 	dmc_info->start_mmioaddr = start_mmioaddr;
diff --git a/drivers/gpu/drm/i915/display/intel_dmc_regs.h b/drivers/gpu/drm/i915/display/intel_dmc_regs.h
index cf10094acae3..90d0dbb41cfe 100644
--- a/drivers/gpu/drm/i915/display/intel_dmc_regs.h
+++ b/drivers/gpu/drm/i915/display/intel_dmc_regs.h
@@ -60,6 +60,7 @@
 
 #define DMC_EVT_CTL_EVENT_ID_MASK	REG_GENMASK(15, 8)
 #define DMC_EVT_CTL_EVENT_ID_FALSE	0x01
+#define DMC_EVT_CTL_EVENT_ID_VBLANK_A	0x32 /* main DMC */
 /* An event handler scheduled to run at a 1 kHz frequency. */
 #define DMC_EVT_CTL_EVENT_ID_CLK_MSEC	0xbf
 
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 1422c2370269..ae647d03af25 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -1227,6 +1227,10 @@ intel_dp_mode_valid(struct drm_connector *_connector,
 	enum drm_mode_status status;
 	bool dsc = false, bigjoiner = false;
 
+	status = intel_cpu_transcoder_mode_valid(dev_priv, mode);
+	if (status != MODE_OK)
+		return status;
+
 	if (mode->flags & DRM_MODE_FLAG_DBLCLK)
 		return MODE_H_ILLEGAL;
 
@@ -1886,7 +1890,7 @@ static int dsc_src_max_compressed_bpp(struct intel_dp *intel_dp)
 	 * Max Compressed bpp for Gen 13+ is 27bpp.
 	 * For earlier platform is 23bpp. (Bspec:49259).
 	 */
-	if (DISPLAY_VER(i915) <= 12)
+	if (DISPLAY_VER(i915) < 13)
 		return 23;
 	else
 		return 27;
@@ -2097,7 +2101,7 @@ static int intel_dp_dsc_compute_pipe_bpp(struct intel_dp *intel_dp,
 		}
 	}
 
-	dsc_max_bpc = intel_dp_dsc_min_src_input_bpc(i915);
+	dsc_max_bpc = intel_dp_dsc_max_src_input_bpc(i915);
 	if (!dsc_max_bpc)
 		return -EINVAL;
 
@@ -2351,6 +2355,9 @@ intel_dp_compute_config_limits(struct intel_dp *intel_dp,
 	limits->min_rate = intel_dp_common_rate(intel_dp, 0);
 	limits->max_rate = intel_dp_max_link_rate(intel_dp);
 
+	/* FIXME 128b/132b SST support missing */
+	limits->max_rate = min(limits->max_rate, 810000);
+
 	limits->min_lane_count = 1;
 	limits->max_lane_count = intel_dp_max_lane_count(intel_dp);
 
@@ -2844,19 +2851,12 @@ intel_dp_audio_compute_config(struct intel_encoder *encoder,
 			      struct intel_crtc_state *pipe_config,
 			      struct drm_connector_state *conn_state)
 {
-	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
-	struct drm_connector *connector = conn_state->connector;
-
 	pipe_config->has_audio =
 		intel_dp_has_audio(encoder, pipe_config, conn_state) &&
 		intel_audio_compute_config(encoder, pipe_config, conn_state);
 
 	pipe_config->sdp_split_enable = pipe_config->has_audio &&
 					intel_dp_is_uhbr(pipe_config);
-
-	drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s] SDP split enable: %s\n",
-		    connector->base.id, connector->name,
-		    str_yes_no(pipe_config->sdp_split_enable));
 }
 
 int
@@ -4767,7 +4767,7 @@ static void intel_dp_process_phy_request(struct intel_dp *intel_dp,
 			  intel_dp->train_set, crtc_state->lane_count);
 
 	drm_dp_set_phy_test_pattern(&intel_dp->aux, data,
-				    link_status[DP_DPCD_REV]);
+				    intel_dp->dpcd[DP_DPCD_REV]);
 }
 
 static u8 intel_dp_autotest_phy_pattern(struct intel_dp *intel_dp)
diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
index dbc1b66c8ee4..1abfafbbfa75 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
@@ -650,19 +650,30 @@ intel_dp_update_link_bw_set(struct intel_dp *intel_dp,
 			    const struct intel_crtc_state *crtc_state,
 			    u8 link_bw, u8 rate_select)
 {
-	u8 link_config[2];
+	u8 lane_count = crtc_state->lane_count;
 
-	/* Write the link configuration data */
-	link_config[0] = link_bw;
-	link_config[1] = crtc_state->lane_count;
 	if (crtc_state->enhanced_framing)
-		link_config[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
-	drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_BW_SET, link_config, 2);
+		lane_count |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
+
+	if (link_bw) {
+		/* DP and eDP v1.3 and earlier link bw set method. */
+		u8 link_config[] = { link_bw, lane_count };
 
-	/* eDP 1.4 rate select method. */
-	if (!link_bw)
-		drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_RATE_SET,
-				  &rate_select, 1);
+		drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_BW_SET, link_config,
+				  ARRAY_SIZE(link_config));
+	} else {
+		/*
+		 * eDP v1.4 and later link rate set method.
+		 *
+		 * eDP v1.4x sinks shall ignore DP_LINK_RATE_SET if
+		 * DP_LINK_BW_SET is set. Avoid writing DP_LINK_BW_SET.
+		 *
+		 * eDP v1.5 sinks allow choosing either, and the last choice
+		 * shall be active.
+		 */
+		drm_dp_dpcd_writeb(&intel_dp->aux, DP_LANE_COUNT_SET, lane_count);
+		drm_dp_dpcd_writeb(&intel_dp->aux, DP_LINK_RATE_SET, rate_select);
+	}
 }
 
 /*
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 63364c9602ef..8a9432335030 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -54,7 +54,7 @@ static int intel_dp_mst_check_constraints(struct drm_i915_private *i915, int bpp
 					  struct intel_crtc_state *crtc_state,
 					  bool dsc)
 {
-	if (intel_dp_is_uhbr(crtc_state) && DISPLAY_VER(i915) <= 13 && dsc) {
+	if (intel_dp_is_uhbr(crtc_state) && DISPLAY_VER(i915) < 14 && dsc) {
 		int output_bpp = bpp;
 		/* DisplayPort 2 128b/132b, bits per lane is always 32 */
 		int symbol_clock = crtc_state->port_clock / 32;
@@ -614,7 +614,7 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder,
 
 	intel_dp_audio_compute_config(encoder, pipe_config, conn_state);
 
-	intel_ddi_compute_min_voltage_level(dev_priv, pipe_config);
+	intel_ddi_compute_min_voltage_level(pipe_config);
 
 	intel_psr_compute_config(intel_dp, pipe_config, conn_state);
 
@@ -1282,6 +1282,10 @@ intel_dp_mst_mode_valid_ctx(struct drm_connector *connector,
 		return 0;
 	}
 
+	*status = intel_cpu_transcoder_mode_valid(dev_priv, mode);
+	if (*status != MODE_OK)
+		return 0;
+
 	if (mode->flags & DRM_MODE_FLAG_DBLSCAN) {
 		*status = MODE_NO_DBLESCAN;
 		return 0;
@@ -1328,6 +1332,10 @@ intel_dp_mst_mode_valid_ctx(struct drm_connector *connector,
 	if (intel_dp_need_bigjoiner(intel_dp, mode->hdisplay, target_clock)) {
 		bigjoiner = true;
 		max_dotclk *= 2;
+
+		/* TODO: add support for bigjoiner */
+		*status = MODE_CLOCK_HIGH;
+		return 0;
 	}
 
 	if (DISPLAY_VER(dev_priv) >= 10 &&
@@ -1362,11 +1370,15 @@ intel_dp_mst_mode_valid_ctx(struct drm_connector *connector,
 	 * Big joiner configuration needs DSC for TGL which is not true for
 	 * XE_LPD where uncompressed joiner is supported.
 	 */
-	if (DISPLAY_VER(dev_priv) < 13 && bigjoiner && !dsc)
-		return MODE_CLOCK_HIGH;
+	if (DISPLAY_VER(dev_priv) < 13 && bigjoiner && !dsc) {
+		*status = MODE_CLOCK_HIGH;
+		return 0;
+	}
 
-	if (mode_rate > max_rate && !dsc)
-		return MODE_CLOCK_HIGH;
+	if (mode_rate > max_rate && !dsc) {
+		*status = MODE_CLOCK_HIGH;
+		return 0;
+	}
 
 	*status = intel_mode_valid_max_plane_size(dev_priv, mode, false);
 	return 0;
diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
index 7958d0bd851e..ef57dad1a9cb 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
@@ -4537,7 +4537,7 @@ void intel_shared_dpll_state_verify(struct intel_atomic_state *state,
 				"pll active mismatch (didn't expect pipe %c in active mask (0x%x))\n",
 				pipe_name(crtc->pipe), pll->active_mask);
 		I915_STATE_WARN(i915, pll->state.pipe_mask & pipe_mask,
-				"pll enabled crtcs mismatch (found %x in enabled mask (0x%x))\n",
+				"pll enabled crtcs mismatch (found pipe %c in enabled mask (0x%x))\n",
 				pipe_name(crtc->pipe), pll->state.pipe_mask);
 	}
 }
diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c
index 9598d50f68f2..482c28b5c2de 100644
--- a/drivers/gpu/drm/i915/display/intel_dsb.c
+++ b/drivers/gpu/drm/i915/display/intel_dsb.c
@@ -341,7 +341,7 @@ static int intel_dsb_dewake_scanline(const struct intel_crtc_state *crtc_state)
 }
 
 static void _intel_dsb_commit(struct intel_dsb *dsb, u32 ctrl,
-			      unsigned int dewake_scanline)
+			      int dewake_scanline)
 {
 	struct intel_crtc *crtc = dsb->crtc;
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index 275d0218394c..a5d7fc8418c9 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -922,7 +922,7 @@ void intel_dsi_vbt_gpio_init(struct intel_dsi *intel_dsi, bool panel_is_on)
 		gpiod_add_lookup_table(gpiod_lookup_table);
 
 	if (want_panel_gpio) {
-		intel_dsi->gpio_panel = gpiod_get(dev->dev, "panel", flags);
+		intel_dsi->gpio_panel = devm_gpiod_get(dev->dev, "panel", flags);
 		if (IS_ERR(intel_dsi->gpio_panel)) {
 			drm_err(&dev_priv->drm,
 				"Failed to own gpio for panel control\n");
@@ -932,7 +932,7 @@ void intel_dsi_vbt_gpio_init(struct intel_dsi *intel_dsi, bool panel_is_on)
 
 	if (want_backlight_gpio) {
 		intel_dsi->gpio_backlight =
-			gpiod_get(dev->dev, "backlight", flags);
+			devm_gpiod_get(dev->dev, "backlight", flags);
 		if (IS_ERR(intel_dsi->gpio_backlight)) {
 			drm_err(&dev_priv->drm,
 				"Failed to own gpio for backlight control\n");
@@ -943,16 +943,3 @@ void intel_dsi_vbt_gpio_init(struct intel_dsi *intel_dsi, bool panel_is_on)
 	if (gpiod_lookup_table)
 		gpiod_remove_lookup_table(gpiod_lookup_table);
 }
-
-void intel_dsi_vbt_gpio_cleanup(struct intel_dsi *intel_dsi)
-{
-	if (intel_dsi->gpio_panel) {
-		gpiod_put(intel_dsi->gpio_panel);
-		intel_dsi->gpio_panel = NULL;
-	}
-
-	if (intel_dsi->gpio_backlight) {
-		gpiod_put(intel_dsi->gpio_backlight);
-		intel_dsi->gpio_backlight = NULL;
-	}
-}
diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.h b/drivers/gpu/drm/i915/display/intel_dsi_vbt.h
index 468d873fab1a..3462fcc760e6 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.h
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.h
@@ -13,7 +13,6 @@ struct intel_dsi;
 
 bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id);
 void intel_dsi_vbt_gpio_init(struct intel_dsi *intel_dsi, bool panel_is_on);
-void intel_dsi_vbt_gpio_cleanup(struct intel_dsi *intel_dsi);
 void intel_dsi_vbt_exec_sequence(struct intel_dsi *intel_dsi,
 				 enum mipi_seq seq_id);
 void intel_dsi_log_params(struct intel_dsi *intel_dsi);
diff --git a/drivers/gpu/drm/i915/display/intel_dvo.c b/drivers/gpu/drm/i915/display/intel_dvo.c
index 55d6743374bd..9111e9d46486 100644
--- a/drivers/gpu/drm/i915/display/intel_dvo.c
+++ b/drivers/gpu/drm/i915/display/intel_dvo.c
@@ -217,11 +217,17 @@ intel_dvo_mode_valid(struct drm_connector *_connector,
 		     struct drm_display_mode *mode)
 {
 	struct intel_connector *connector = to_intel_connector(_connector);
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
 	struct intel_dvo *intel_dvo = intel_attached_dvo(connector);
 	const struct drm_display_mode *fixed_mode =
 		intel_panel_fixed_mode(connector, mode);
 	int max_dotclk = to_i915(connector->base.dev)->max_dotclk_freq;
 	int target_clock = mode->clock;
+	enum drm_mode_status status;
+
+	status = intel_cpu_transcoder_mode_valid(i915, mode);
+	if (status != MODE_OK)
+		return status;
 
 	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
 		return MODE_NO_DBLESCAN;
diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c
index 6d48aa3af95a..0c0144eaa8fa 100644
--- a/drivers/gpu/drm/i915/display/intel_fb.c
+++ b/drivers/gpu/drm/i915/display/intel_fb.c
@@ -4,7 +4,6 @@
  */
 
 #include <drm/drm_blend.h>
-#include <drm/drm_framebuffer.h>
 #include <drm/drm_modeset_helper.h>
 
 #include <linux/dma-fence.h>
@@ -15,6 +14,7 @@
 #include "intel_display_types.h"
 #include "intel_dpt.h"
 #include "intel_fb.h"
+#include "intel_fb_bo.h"
 #include "intel_frontbuffer.h"
 
 #define check_array_bounds(i915, a, i) drm_WARN_ON(&(i915)->drm, (i) >= ARRAY_SIZE(a))
@@ -301,6 +301,33 @@ lookup_format_info(const struct drm_format_info formats[],
 	return NULL;
 }
 
+unsigned int intel_fb_modifier_to_tiling(u64 fb_modifier)
+{
+	const struct intel_modifier_desc *md;
+	u8 tiling_caps;
+
+	md = lookup_modifier_or_null(fb_modifier);
+	if (!md)
+		return I915_TILING_NONE;
+
+	tiling_caps = lookup_modifier_or_null(fb_modifier)->plane_caps &
+			 INTEL_PLANE_CAP_TILING_MASK;
+
+	switch (tiling_caps) {
+	case INTEL_PLANE_CAP_TILING_Y:
+		return I915_TILING_Y;
+	case INTEL_PLANE_CAP_TILING_X:
+		return I915_TILING_X;
+	case INTEL_PLANE_CAP_TILING_4:
+	case INTEL_PLANE_CAP_TILING_Yf:
+	case INTEL_PLANE_CAP_TILING_NONE:
+		return I915_TILING_NONE;
+	default:
+		MISSING_CASE(tiling_caps);
+		return I915_TILING_NONE;
+	}
+}
+
 /**
  * intel_fb_get_format_info: Get a modifier specific format information
  * @cmd: FB add command structure
@@ -737,26 +764,6 @@ intel_fb_align_height(const struct drm_framebuffer *fb,
 	return ALIGN(height, tile_height);
 }
 
-static unsigned int intel_fb_modifier_to_tiling(u64 fb_modifier)
-{
-	u8 tiling_caps = lookup_modifier(fb_modifier)->plane_caps &
-			 INTEL_PLANE_CAP_TILING_MASK;
-
-	switch (tiling_caps) {
-	case INTEL_PLANE_CAP_TILING_Y:
-		return I915_TILING_Y;
-	case INTEL_PLANE_CAP_TILING_X:
-		return I915_TILING_X;
-	case INTEL_PLANE_CAP_TILING_4:
-	case INTEL_PLANE_CAP_TILING_Yf:
-	case INTEL_PLANE_CAP_TILING_NONE:
-		return I915_TILING_NONE;
-	default:
-		MISSING_CASE(tiling_caps);
-		return I915_TILING_NONE;
-	}
-}
-
 bool intel_fb_modifier_uses_dpt(struct drm_i915_private *i915, u64 modifier)
 {
 	return HAS_DPT(i915) && modifier != DRM_FORMAT_MOD_LINEAR;
@@ -764,7 +771,7 @@ bool intel_fb_modifier_uses_dpt(struct drm_i915_private *i915, u64 modifier)
 
 bool intel_fb_uses_dpt(const struct drm_framebuffer *fb)
 {
-	return fb && to_i915(fb->dev)->display.params.enable_dpt &&
+	return to_i915(fb->dev)->display.params.enable_dpt &&
 		intel_fb_modifier_uses_dpt(to_i915(fb->dev), fb->modifier);
 }
 
@@ -1374,7 +1381,8 @@ plane_view_scanout_stride(const struct intel_framebuffer *fb, int color_plane,
 	struct drm_i915_private *i915 = to_i915(fb->base.dev);
 	unsigned int stride_tiles;
 
-	if (IS_ALDERLAKE_P(i915) || DISPLAY_VER(i915) >= 14)
+	if ((IS_ALDERLAKE_P(i915) || DISPLAY_VER(i915) >= 14) &&
+	    src_stride_tiles < dst_stride_tiles)
 		stride_tiles = src_stride_tiles;
 	else
 		stride_tiles = dst_stride_tiles;
@@ -1501,8 +1509,20 @@ static u32 calc_plane_remap_info(const struct intel_framebuffer *fb, int color_p
 
 			size += remap_info->size;
 		} else {
-			unsigned int dst_stride = plane_view_dst_stride_tiles(fb, color_plane,
-									      remap_info->width);
+			unsigned int dst_stride;
+
+			/*
+			 * The hardware automagically calculates the CCS AUX surface
+			 * stride from the main surface stride so can't really remap a
+			 * smaller subset (unless we'd remap in whole AUX page units).
+			 */
+			if (intel_fb_needs_pot_stride_remap(fb) &&
+			    intel_fb_is_ccs_modifier(fb->base.modifier))
+				dst_stride = remap_info->src_stride;
+			else
+				dst_stride = remap_info->width;
+
+			dst_stride = plane_view_dst_stride_tiles(fb, color_plane, dst_stride);
 
 			assign_chk_ovf(i915, remap_info->dst_stride, dst_stride);
 			color_plane_info->mapping_stride = dst_stride *
@@ -1657,10 +1677,10 @@ int intel_fill_fb_info(struct drm_i915_private *i915, struct intel_framebuffer *
 		max_size = max(max_size, offset + size);
 	}
 
-	if (mul_u32_u32(max_size, tile_size) > obj->base.size) {
+	if (mul_u32_u32(max_size, tile_size) > intel_bo_to_drm_bo(obj)->size) {
 		drm_dbg_kms(&i915->drm,
 			    "fb too big for bo (need %llu bytes, have %zu bytes)\n",
-			    mul_u32_u32(max_size, tile_size), obj->base.size);
+			    mul_u32_u32(max_size, tile_size), intel_bo_to_drm_bo(obj)->size);
 		return -EINVAL;
 	}
 
@@ -1881,6 +1901,8 @@ static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb)
 
 	intel_frontbuffer_put(intel_fb->frontbuffer);
 
+	intel_fb_bo_framebuffer_fini(intel_fb_obj(fb));
+
 	kfree(intel_fb);
 }
 
@@ -1889,7 +1911,7 @@ static int intel_user_framebuffer_create_handle(struct drm_framebuffer *fb,
 						unsigned int *handle)
 {
 	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct drm_i915_private *i915 = to_i915(intel_bo_to_drm_bo(obj)->dev);
 
 	if (i915_gem_object_is_userptr(obj)) {
 		drm_dbg(&i915->drm,
@@ -1897,7 +1919,7 @@ static int intel_user_framebuffer_create_handle(struct drm_framebuffer *fb,
 		return -EINVAL;
 	}
 
-	return drm_gem_handle_create(file, &obj->base, handle);
+	return drm_gem_handle_create(file, intel_bo_to_drm_bo(obj), handle);
 }
 
 struct frontbuffer_fence_cb {
@@ -1975,61 +1997,30 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 			   struct drm_i915_gem_object *obj,
 			   struct drm_mode_fb_cmd2 *mode_cmd)
 {
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+	struct drm_i915_private *dev_priv = to_i915(intel_bo_to_drm_bo(obj)->dev);
 	struct drm_framebuffer *fb = &intel_fb->base;
 	u32 max_stride;
-	unsigned int tiling, stride;
 	int ret = -EINVAL;
 	int i;
 
-	intel_fb->frontbuffer = intel_frontbuffer_get(obj);
-	if (!intel_fb->frontbuffer)
-		return -ENOMEM;
-
-	i915_gem_object_lock(obj, NULL);
-	tiling = i915_gem_object_get_tiling(obj);
-	stride = i915_gem_object_get_stride(obj);
-	i915_gem_object_unlock(obj);
+	ret = intel_fb_bo_framebuffer_init(intel_fb, obj, mode_cmd);
+	if (ret)
+		return ret;
 
-	if (mode_cmd->flags & DRM_MODE_FB_MODIFIERS) {
-		/*
-		 * If there's a fence, enforce that
-		 * the fb modifier and tiling mode match.
-		 */
-		if (tiling != I915_TILING_NONE &&
-		    tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) {
-			drm_dbg_kms(&dev_priv->drm,
-				    "tiling_mode doesn't match fb modifier\n");
-			goto err;
-		}
-	} else {
-		if (tiling == I915_TILING_X) {
-			mode_cmd->modifier[0] = I915_FORMAT_MOD_X_TILED;
-		} else if (tiling == I915_TILING_Y) {
-			drm_dbg_kms(&dev_priv->drm,
-				    "No Y tiling for legacy addfb\n");
-			goto err;
-		}
+	intel_fb->frontbuffer = intel_frontbuffer_get(obj);
+	if (!intel_fb->frontbuffer) {
+		ret = -ENOMEM;
+		goto err;
 	}
 
+	ret = -EINVAL;
 	if (!drm_any_plane_has_format(&dev_priv->drm,
 				      mode_cmd->pixel_format,
 				      mode_cmd->modifier[0])) {
 		drm_dbg_kms(&dev_priv->drm,
 			    "unsupported pixel format %p4cc / modifier 0x%llx\n",
 			    &mode_cmd->pixel_format, mode_cmd->modifier[0]);
-		goto err;
-	}
-
-	/*
-	 * gen2/3 display engine uses the fence if present,
-	 * so the tiling mode must match the fb modifier exactly.
-	 */
-	if (DISPLAY_VER(dev_priv) < 4 &&
-	    tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) {
-		drm_dbg_kms(&dev_priv->drm,
-			    "tiling_mode must match fb modifier exactly on gen2/3\n");
-		goto err;
+		goto err_frontbuffer_put;
 	}
 
 	max_stride = intel_fb_max_stride(dev_priv, mode_cmd->pixel_format,
@@ -2040,18 +2031,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 			    mode_cmd->modifier[0] != DRM_FORMAT_MOD_LINEAR ?
 			    "tiled" : "linear",
 			    mode_cmd->pitches[0], max_stride);
-		goto err;
-	}
-
-	/*
-	 * If there's a fence, enforce that
-	 * the fb pitch and fence stride match.
-	 */
-	if (tiling != I915_TILING_NONE && mode_cmd->pitches[0] != stride) {
-		drm_dbg_kms(&dev_priv->drm,
-			    "pitch (%d) must match tiling stride (%d)\n",
-			    mode_cmd->pitches[0], stride);
-		goto err;
+		goto err_frontbuffer_put;
 	}
 
 	/* FIXME need to adjust LINOFF/TILEOFF accordingly. */
@@ -2059,7 +2039,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 		drm_dbg_kms(&dev_priv->drm,
 			    "plane 0 offset (0x%08x) must be 0\n",
 			    mode_cmd->offsets[0]);
-		goto err;
+		goto err_frontbuffer_put;
 	}
 
 	drm_helper_mode_fill_fb_struct(&dev_priv->drm, fb, mode_cmd);
@@ -2070,7 +2050,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 		if (mode_cmd->handles[i] != mode_cmd->handles[0]) {
 			drm_dbg_kms(&dev_priv->drm, "bad plane %d handle\n",
 				    i);
-			goto err;
+			goto err_frontbuffer_put;
 		}
 
 		stride_alignment = intel_fb_stride_alignment(fb, i);
@@ -2078,7 +2058,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 			drm_dbg_kms(&dev_priv->drm,
 				    "plane %d pitch (%d) must be at least %u byte aligned\n",
 				    i, fb->pitches[i], stride_alignment);
-			goto err;
+			goto err_frontbuffer_put;
 		}
 
 		if (intel_fb_is_gen12_ccs_aux_plane(fb, i)) {
@@ -2089,7 +2069,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 					    "ccs aux plane %d pitch (%d) must be %d\n",
 					    i,
 					    fb->pitches[i], ccs_aux_stride);
-				goto err;
+				goto err_frontbuffer_put;
 			}
 		}
 
@@ -2098,7 +2078,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 
 	ret = intel_fill_fb_info(dev_priv, intel_fb);
 	if (ret)
-		goto err;
+		goto err_frontbuffer_put;
 
 	if (intel_fb_uses_dpt(fb)) {
 		struct i915_address_space *vm;
@@ -2107,7 +2087,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 		if (IS_ERR(vm)) {
 			drm_dbg_kms(&dev_priv->drm, "failed to create DPT\n");
 			ret = PTR_ERR(vm);
-			goto err;
+			goto err_frontbuffer_put;
 		}
 
 		intel_fb->dpt_vm = vm;
@@ -2124,8 +2104,10 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 err_free_dpt:
 	if (intel_fb_uses_dpt(fb))
 		intel_dpt_destroy(intel_fb->dpt_vm);
-err:
+err_frontbuffer_put:
 	intel_frontbuffer_put(intel_fb->frontbuffer);
+err:
+	intel_fb_bo_framebuffer_fini(obj);
 	return ret;
 }
 
@@ -2137,23 +2119,14 @@ intel_user_framebuffer_create(struct drm_device *dev,
 	struct drm_framebuffer *fb;
 	struct drm_i915_gem_object *obj;
 	struct drm_mode_fb_cmd2 mode_cmd = *user_mode_cmd;
-	struct drm_i915_private *i915;
-
-	obj = i915_gem_object_lookup(filp, mode_cmd.handles[0]);
-	if (!obj)
-		return ERR_PTR(-ENOENT);
-
-	/* object is backed with LMEM for discrete */
-	i915 = to_i915(obj->base.dev);
-	if (HAS_LMEM(i915) && !i915_gem_object_can_migrate(obj, INTEL_REGION_LMEM_0)) {
-		/* object is "remote", not in local memory */
-		i915_gem_object_put(obj);
-		drm_dbg_kms(&i915->drm, "framebuffer must reside in local memory\n");
-		return ERR_PTR(-EREMOTE);
-	}
+	struct drm_i915_private *i915 = to_i915(dev);
+
+	obj = intel_fb_bo_lookup_valid_bo(i915, filp, &mode_cmd);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
 
 	fb = intel_framebuffer_create(obj, &mode_cmd);
-	i915_gem_object_put(obj);
+	drm_gem_object_put(intel_bo_to_drm_bo(obj));
 
 	return fb;
 }
diff --git a/drivers/gpu/drm/i915/display/intel_fb.h b/drivers/gpu/drm/i915/display/intel_fb.h
index e85167d6bc34..23db6628f53e 100644
--- a/drivers/gpu/drm/i915/display/intel_fb.h
+++ b/drivers/gpu/drm/i915/display/intel_fb.h
@@ -95,4 +95,6 @@ intel_user_framebuffer_create(struct drm_device *dev,
 bool intel_fb_modifier_uses_dpt(struct drm_i915_private *i915, u64 modifier);
 bool intel_fb_uses_dpt(const struct drm_framebuffer *fb);
 
+unsigned int intel_fb_modifier_to_tiling(u64 fb_modifier);
+
 #endif /* __INTEL_FB_H__ */
diff --git a/drivers/gpu/drm/i915/display/intel_fb_bo.c b/drivers/gpu/drm/i915/display/intel_fb_bo.c
new file mode 100644
index 000000000000..4be09541e509
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_fb_bo.c
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include <drm/drm_framebuffer.h>
+
+#include "gem/i915_gem_object.h"
+
+#include "i915_drv.h"
+#include "intel_fb.h"
+#include "intel_fb_bo.h"
+
+void intel_fb_bo_framebuffer_fini(struct drm_i915_gem_object *obj)
+{
+	/* Nothing to do for i915 */
+}
+
+int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb,
+				 struct drm_i915_gem_object *obj,
+				 struct drm_mode_fb_cmd2 *mode_cmd)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	unsigned int tiling, stride;
+
+	i915_gem_object_lock(obj, NULL);
+	tiling = i915_gem_object_get_tiling(obj);
+	stride = i915_gem_object_get_stride(obj);
+	i915_gem_object_unlock(obj);
+
+	if (mode_cmd->flags & DRM_MODE_FB_MODIFIERS) {
+		/*
+		 * If there's a fence, enforce that
+		 * the fb modifier and tiling mode match.
+		 */
+		if (tiling != I915_TILING_NONE &&
+		    tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) {
+			drm_dbg_kms(&i915->drm,
+				    "tiling_mode doesn't match fb modifier\n");
+			return -EINVAL;
+		}
+	} else {
+		if (tiling == I915_TILING_X) {
+			mode_cmd->modifier[0] = I915_FORMAT_MOD_X_TILED;
+		} else if (tiling == I915_TILING_Y) {
+			drm_dbg_kms(&i915->drm,
+				    "No Y tiling for legacy addfb\n");
+			return -EINVAL;
+		}
+	}
+
+	/*
+	 * gen2/3 display engine uses the fence if present,
+	 * so the tiling mode must match the fb modifier exactly.
+	 */
+	if (DISPLAY_VER(i915) < 4 &&
+	    tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) {
+		drm_dbg_kms(&i915->drm,
+			    "tiling_mode must match fb modifier exactly on gen2/3\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * If there's a fence, enforce that
+	 * the fb pitch and fence stride match.
+	 */
+	if (tiling != I915_TILING_NONE && mode_cmd->pitches[0] != stride) {
+		drm_dbg_kms(&i915->drm,
+			    "pitch (%d) must match tiling stride (%d)\n",
+			    mode_cmd->pitches[0], stride);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+struct drm_i915_gem_object *
+intel_fb_bo_lookup_valid_bo(struct drm_i915_private *i915,
+			    struct drm_file *filp,
+			    const struct drm_mode_fb_cmd2 *mode_cmd)
+{
+	struct drm_i915_gem_object *obj;
+
+	obj = i915_gem_object_lookup(filp, mode_cmd->handles[0]);
+	if (!obj)
+		return ERR_PTR(-ENOENT);
+
+	/* object is backed with LMEM for discrete */
+	if (HAS_LMEM(i915) && !i915_gem_object_can_migrate(obj, INTEL_REGION_LMEM_0)) {
+		/* object is "remote", not in local memory */
+		i915_gem_object_put(obj);
+		drm_dbg_kms(&i915->drm, "framebuffer must reside in local memory\n");
+		return ERR_PTR(-EREMOTE);
+	}
+
+	return obj;
+}
diff --git a/drivers/gpu/drm/i915/display/intel_fb_bo.h b/drivers/gpu/drm/i915/display/intel_fb_bo.h
new file mode 100644
index 000000000000..232bf898b013
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_fb_bo.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef __INTEL_FB_BO_H__
+#define __INTEL_FB_BO_H__
+
+struct drm_file;
+struct drm_mode_fb_cmd2;
+struct drm_i915_gem_object;
+struct drm_i915_private;
+struct intel_framebuffer;
+
+void intel_fb_bo_framebuffer_fini(struct drm_i915_gem_object *obj);
+
+int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb,
+				 struct drm_i915_gem_object *obj,
+				 struct drm_mode_fb_cmd2 *mode_cmd);
+
+struct drm_i915_gem_object *
+intel_fb_bo_lookup_valid_bo(struct drm_i915_private *i915,
+			    struct drm_file *filp,
+			    const struct drm_mode_fb_cmd2 *user_mode_cmd);
+
+#endif
diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c
index 63f389a1707d..f17a1afb4929 100644
--- a/drivers/gpu/drm/i915/display/intel_fbc.c
+++ b/drivers/gpu/drm/i915/display/intel_fbc.c
@@ -1235,7 +1235,7 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state,
 	 * Recommendation is to keep this combination disabled
 	 * Bspec: 50422 HSD: 14010260002
 	 */
-	if (DISPLAY_VER(i915) >= 12 && crtc_state->has_psr2) {
+	if (IS_DISPLAY_VER(i915, 12, 14) && crtc_state->has_psr2) {
 		plane_state->no_fbc_reason = "PSR2 enabled";
 		return 0;
 	}
diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c
index 31d0d695d567..99894a855ef0 100644
--- a/drivers/gpu/drm/i915/display/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/display/intel_fbdev.c
@@ -43,7 +43,6 @@
 #include <drm/drm_fourcc.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 
-#include "gem/i915_gem_lmem.h"
 #include "gem/i915_gem_mman.h"
 
 #include "i915_drv.h"
@@ -51,6 +50,7 @@
 #include "intel_fb.h"
 #include "intel_fb_pin.h"
 #include "intel_fbdev.h"
+#include "intel_fbdev_fb.h"
 #include "intel_frontbuffer.h"
 
 struct intel_fbdev {
@@ -146,65 +146,6 @@ static const struct fb_ops intelfb_ops = {
 	.fb_mmap = intel_fbdev_mmap,
 };
 
-static int intelfb_alloc(struct drm_fb_helper *helper,
-			 struct drm_fb_helper_surface_size *sizes)
-{
-	struct intel_fbdev *ifbdev = to_intel_fbdev(helper);
-	struct drm_framebuffer *fb;
-	struct drm_device *dev = helper->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct drm_mode_fb_cmd2 mode_cmd = {};
-	struct drm_i915_gem_object *obj;
-	int size;
-
-	/* we don't do packed 24bpp */
-	if (sizes->surface_bpp == 24)
-		sizes->surface_bpp = 32;
-
-	mode_cmd.width = sizes->surface_width;
-	mode_cmd.height = sizes->surface_height;
-
-	mode_cmd.pitches[0] = ALIGN(mode_cmd.width *
-				    DIV_ROUND_UP(sizes->surface_bpp, 8), 64);
-	mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
-							  sizes->surface_depth);
-
-	size = mode_cmd.pitches[0] * mode_cmd.height;
-	size = PAGE_ALIGN(size);
-
-	obj = ERR_PTR(-ENODEV);
-	if (HAS_LMEM(dev_priv)) {
-		obj = i915_gem_object_create_lmem(dev_priv, size,
-						  I915_BO_ALLOC_CONTIGUOUS |
-						  I915_BO_ALLOC_USER);
-	} else {
-		/*
-		 * If the FB is too big, just don't use it since fbdev is not very
-		 * important and we should probably use that space with FBC or other
-		 * features.
-		 *
-		 * Also skip stolen on MTL as Wa_22018444074 mitigation.
-		 */
-		if (!(IS_METEORLAKE(dev_priv)) && size * 2 < dev_priv->dsm.usable_size)
-			obj = i915_gem_object_create_stolen(dev_priv, size);
-		if (IS_ERR(obj))
-			obj = i915_gem_object_create_shmem(dev_priv, size);
-	}
-
-	if (IS_ERR(obj)) {
-		drm_err(&dev_priv->drm, "failed to allocate framebuffer (%pe)\n", obj);
-		return PTR_ERR(obj);
-	}
-
-	fb = intel_framebuffer_create(obj, &mode_cmd);
-	i915_gem_object_put(obj);
-	if (IS_ERR(fb))
-		return PTR_ERR(fb);
-
-	ifbdev->fb = to_intel_framebuffer(fb);
-	return 0;
-}
-
 static int intelfb_create(struct drm_fb_helper *helper,
 			  struct drm_fb_helper_surface_size *sizes)
 {
@@ -213,7 +154,6 @@ static int intelfb_create(struct drm_fb_helper *helper,
 	struct drm_device *dev = helper->dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
-	struct i915_ggtt *ggtt = to_gt(dev_priv)->ggtt;
 	const struct i915_gtt_view view = {
 		.type = I915_GTT_VIEW_NORMAL,
 	};
@@ -222,9 +162,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
 	struct i915_vma *vma;
 	unsigned long flags = 0;
 	bool prealloc = false;
-	void __iomem *vaddr;
 	struct drm_i915_gem_object *obj;
-	struct i915_gem_ww_ctx ww;
 	int ret;
 
 	mutex_lock(&ifbdev->hpd_lock);
@@ -245,12 +183,13 @@ static int intelfb_create(struct drm_fb_helper *helper,
 		intel_fb = ifbdev->fb = NULL;
 	}
 	if (!intel_fb || drm_WARN_ON(dev, !intel_fb_obj(&intel_fb->base))) {
+		struct drm_framebuffer *fb;
 		drm_dbg_kms(&dev_priv->drm,
 			    "no BIOS fb, allocating a new one\n");
-		ret = intelfb_alloc(helper, sizes);
-		if (ret)
-			return ret;
-		intel_fb = ifbdev->fb;
+		fb = intel_fbdev_fb_alloc(helper, sizes);
+		if (IS_ERR(fb))
+			return PTR_ERR(fb);
+		intel_fb = ifbdev->fb = to_intel_framebuffer(fb);
 	} else {
 		drm_dbg_kms(&dev_priv->drm, "re-using BIOS fb\n");
 		prealloc = true;
@@ -283,49 +222,18 @@ static int intelfb_create(struct drm_fb_helper *helper,
 	info->fbops = &intelfb_ops;
 
 	obj = intel_fb_obj(&intel_fb->base);
-	if (i915_gem_object_is_lmem(obj)) {
-		struct intel_memory_region *mem = obj->mm.region;
-
-		/* Use fbdev's framebuffer from lmem for discrete */
-		info->fix.smem_start =
-			(unsigned long)(mem->io_start +
-					i915_gem_object_get_dma_address(obj, 0));
-		info->fix.smem_len = obj->base.size;
-	} else {
-		/* Our framebuffer is the entirety of fbdev's system memory */
-		info->fix.smem_start =
-			(unsigned long)(ggtt->gmadr.start + i915_ggtt_offset(vma));
-		info->fix.smem_len = vma->size;
-	}
-
-	for_i915_gem_ww(&ww, ret, false) {
-		ret = i915_gem_object_lock(vma->obj, &ww);
-
-		if (ret)
-			continue;
-
-		vaddr = i915_vma_pin_iomap(vma);
-		if (IS_ERR(vaddr)) {
-			drm_err(&dev_priv->drm,
-				"Failed to remap framebuffer into virtual memory (%pe)\n", vaddr);
-			ret = PTR_ERR(vaddr);
-			continue;
-		}
-	}
 
+	ret = intel_fbdev_fb_fill_info(dev_priv, info, obj, vma);
 	if (ret)
 		goto out_unpin;
 
-	info->screen_base = vaddr;
-	info->screen_size = vma->size;
-
 	drm_fb_helper_fill_info(info, &ifbdev->helper, sizes);
 
 	/* If the object is shmemfs backed, it will have given us zeroed pages.
 	 * If the object is stolen however, it will be full of whatever
 	 * garbage was left in there.
 	 */
-	if (!i915_gem_object_is_shmem(vma->obj) && !prealloc)
+	if (!i915_gem_object_is_shmem(obj) && !prealloc)
 		memset_io(info->screen_base, 0, info->screen_size);
 
 	/* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */
@@ -424,12 +332,12 @@ static bool intel_fbdev_init_bios(struct drm_device *dev,
 			continue;
 		}
 
-		if (obj->base.size > max_size) {
+		if (intel_bo_to_drm_bo(obj)->size > max_size) {
 			drm_dbg_kms(&i915->drm,
 				    "found possible fb from [PLANE:%d:%s]\n",
 				    plane->base.base.id, plane->base.name);
 			fb = to_intel_framebuffer(plane_state->uapi.fb);
-			max_size = obj->base.size;
+			max_size = intel_bo_to_drm_bo(obj)->size;
 		}
 	}
 
diff --git a/drivers/gpu/drm/i915/display/intel_fbdev_fb.c b/drivers/gpu/drm/i915/display/intel_fbdev_fb.c
new file mode 100644
index 000000000000..717c3a3237c4
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_fbdev_fb.c
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_fb_helper.h>
+
+#include "gem/i915_gem_lmem.h"
+
+#include "i915_drv.h"
+#include "intel_display_types.h"
+#include "intel_fbdev_fb.h"
+
+struct drm_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
+					     struct drm_fb_helper_surface_size *sizes)
+{
+	struct drm_framebuffer *fb;
+	struct drm_device *dev = helper->dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_mode_fb_cmd2 mode_cmd = {};
+	struct drm_i915_gem_object *obj;
+	int size;
+
+	/* we don't do packed 24bpp */
+	if (sizes->surface_bpp == 24)
+		sizes->surface_bpp = 32;
+
+	mode_cmd.width = sizes->surface_width;
+	mode_cmd.height = sizes->surface_height;
+
+	mode_cmd.pitches[0] = ALIGN(mode_cmd.width *
+				    DIV_ROUND_UP(sizes->surface_bpp, 8), 64);
+	mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
+							  sizes->surface_depth);
+
+	size = mode_cmd.pitches[0] * mode_cmd.height;
+	size = PAGE_ALIGN(size);
+
+	obj = ERR_PTR(-ENODEV);
+	if (HAS_LMEM(dev_priv)) {
+		obj = i915_gem_object_create_lmem(dev_priv, size,
+						  I915_BO_ALLOC_CONTIGUOUS |
+						  I915_BO_ALLOC_USER);
+	} else {
+		/*
+		 * If the FB is too big, just don't use it since fbdev is not very
+		 * important and we should probably use that space with FBC or other
+		 * features.
+		 *
+		 * Also skip stolen on MTL as Wa_22018444074 mitigation.
+		 */
+		if (!(IS_METEORLAKE(dev_priv)) && size * 2 < dev_priv->dsm.usable_size)
+			obj = i915_gem_object_create_stolen(dev_priv, size);
+		if (IS_ERR(obj))
+			obj = i915_gem_object_create_shmem(dev_priv, size);
+	}
+
+	if (IS_ERR(obj)) {
+		drm_err(&dev_priv->drm, "failed to allocate framebuffer (%pe)\n", obj);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	fb = intel_framebuffer_create(obj, &mode_cmd);
+	i915_gem_object_put(obj);
+
+	return fb;
+}
+
+int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info,
+			     struct drm_i915_gem_object *obj, struct i915_vma *vma)
+{
+	struct i915_gem_ww_ctx ww;
+	void __iomem *vaddr;
+	int ret;
+
+	if (i915_gem_object_is_lmem(obj)) {
+		struct intel_memory_region *mem = obj->mm.region;
+
+		/* Use fbdev's framebuffer from lmem for discrete */
+		info->fix.smem_start =
+			(unsigned long)(mem->io_start +
+					i915_gem_object_get_dma_address(obj, 0));
+		info->fix.smem_len = obj->base.size;
+	} else {
+		struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
+
+		/* Our framebuffer is the entirety of fbdev's system memory */
+		info->fix.smem_start =
+			(unsigned long)(ggtt->gmadr.start + i915_ggtt_offset(vma));
+		info->fix.smem_len = vma->size;
+	}
+
+	for_i915_gem_ww(&ww, ret, false) {
+		ret = i915_gem_object_lock(vma->obj, &ww);
+
+		if (ret)
+			continue;
+
+		vaddr = i915_vma_pin_iomap(vma);
+		if (IS_ERR(vaddr)) {
+			drm_err(&i915->drm,
+				"Failed to remap framebuffer into virtual memory (%pe)\n", vaddr);
+			ret = PTR_ERR(vaddr);
+			continue;
+		}
+	}
+
+	if (ret)
+		return ret;
+
+	info->screen_base = vaddr;
+	info->screen_size = intel_bo_to_drm_bo(obj)->size;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/display/intel_fbdev_fb.h b/drivers/gpu/drm/i915/display/intel_fbdev_fb.h
new file mode 100644
index 000000000000..a395b2c65d33
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_fbdev_fb.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __INTEL_FBDEV_FB_H__
+#define __INTEL_FBDEV_FB_H__
+
+struct drm_fb_helper;
+struct drm_fb_helper_surface_size;
+struct drm_i915_gem_object;
+struct drm_i915_private;
+struct fb_info;
+struct i915_vma;
+
+struct drm_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
+					     struct drm_fb_helper_surface_size *sizes);
+int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info,
+			     struct drm_i915_gem_object *obj, struct i915_vma *vma);
+
+#endif
diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.c b/drivers/gpu/drm/i915/display/intel_gmbus.c
index 40d7b6f3f489..e9e4dcf345f9 100644
--- a/drivers/gpu/drm/i915/display/intel_gmbus.c
+++ b/drivers/gpu/drm/i915/display/intel_gmbus.c
@@ -899,7 +899,6 @@ int intel_gmbus_setup(struct drm_i915_private *i915)
 		}
 
 		bus->adapter.owner = THIS_MODULE;
-		bus->adapter.class = I2C_CLASS_DDC;
 		snprintf(bus->adapter.name,
 			 sizeof(bus->adapter.name),
 			 "i915 gmbus %s", gmbus_pin->name);
diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c
index ab18cfc19c0a..39e4f5f7c817 100644
--- a/drivers/gpu/drm/i915/display/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/display/intel_hdmi.c
@@ -1983,6 +1983,10 @@ intel_hdmi_mode_valid(struct drm_connector *connector,
 	bool ycbcr_420_only;
 	enum intel_output_format sink_format;
 
+	status = intel_cpu_transcoder_mode_valid(dev_priv, mode);
+	if (status != MODE_OK)
+		return status;
+
 	if ((mode->flags & DRM_MODE_FLAG_3D_MASK) == DRM_MODE_FLAG_3D_FRAME_PACKING)
 		clock *= 2;
 
diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c
index 4b114fde57b1..221f5c6c871b 100644
--- a/drivers/gpu/drm/i915/display/intel_lvds.c
+++ b/drivers/gpu/drm/i915/display/intel_lvds.c
@@ -185,7 +185,7 @@ static void intel_lvds_pps_get_hw_state(struct drm_i915_private *dev_priv,
 	/* Convert from 100ms to 100us units */
 	pps->t4 = val * 1000;
 
-	if (DISPLAY_VER(dev_priv) <= 4 &&
+	if (DISPLAY_VER(dev_priv) < 5 &&
 	    pps->t1_t2 == 0 && pps->t5 == 0 && pps->t3 == 0 && pps->tx == 0) {
 		drm_dbg_kms(&dev_priv->drm,
 			    "Panel power timings uninitialized, "
@@ -389,11 +389,16 @@ intel_lvds_mode_valid(struct drm_connector *_connector,
 		      struct drm_display_mode *mode)
 {
 	struct intel_connector *connector = to_intel_connector(_connector);
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
 	const struct drm_display_mode *fixed_mode =
 		intel_panel_fixed_mode(connector, mode);
 	int max_pixclk = to_i915(connector->base.dev)->max_dotclk_freq;
 	enum drm_mode_status status;
 
+	status = intel_cpu_transcoder_mode_valid(i915, mode);
+	if (status != MODE_OK)
+		return status;
+
 	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
 		return MODE_NO_DBLESCAN;
 
diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index 15c1804dcd59..57bbf3e3af92 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -806,10 +806,10 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp)
 
 	val |= EDP_PSR2_IDLE_FRAMES(psr_compute_idle_frames(intel_dp));
 
-	if (DISPLAY_VER(dev_priv) <= 13 && !IS_ALDERLAKE_P(dev_priv))
+	if (DISPLAY_VER(dev_priv) < 14 && !IS_ALDERLAKE_P(dev_priv))
 		val |= EDP_SU_TRACK_ENABLE;
 
-	if (DISPLAY_VER(dev_priv) >= 10 && DISPLAY_VER(dev_priv) <= 12)
+	if (DISPLAY_VER(dev_priv) >= 10 && DISPLAY_VER(dev_priv) < 13)
 		val |= EDP_Y_COORDINATE_ENABLE;
 
 	val |= EDP_PSR2_FRAME_BEFORE_SU(frames_before_su_entry(intel_dp));
@@ -891,13 +891,13 @@ transcoder_has_psr2(struct drm_i915_private *dev_priv, enum transcoder cpu_trans
 		return false;
 }
 
-static u32 intel_get_frame_time_us(const struct intel_crtc_state *cstate)
+static u32 intel_get_frame_time_us(const struct intel_crtc_state *crtc_state)
 {
-	if (!cstate || !cstate->hw.active)
+	if (!crtc_state->hw.active)
 		return 0;
 
 	return DIV_ROUND_UP(1000 * 1000,
-			    drm_mode_vrefresh(&cstate->hw.adjusted_mode));
+			    drm_mode_vrefresh(&crtc_state->hw.adjusted_mode));
 }
 
 static void psr2_program_idle_frames(struct intel_dp *intel_dp,
@@ -1094,7 +1094,7 @@ static bool _compute_psr2_sdp_prior_scanline_indication(struct intel_dp *intel_d
 		return true;
 
 	/* Not supported <13 / Wa_22012279113:adl-p */
-	if (DISPLAY_VER(dev_priv) <= 13 || intel_dp->edp_dpcd[0] < DP_EDP_14b)
+	if (DISPLAY_VER(dev_priv) < 14 || intel_dp->edp_dpcd[0] < DP_EDP_14b)
 		return false;
 
 	crtc_state->req_psr2_sdp_prior_scanline = true;
@@ -1221,7 +1221,7 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp,
 	 * over PSR2.
 	 */
 	if (crtc_state->dsc.compression_enable &&
-	    (DISPLAY_VER(dev_priv) <= 13 && !IS_ALDERLAKE_P(dev_priv))) {
+	    (DISPLAY_VER(dev_priv) < 14 && !IS_ALDERLAKE_P(dev_priv))) {
 		drm_dbg_kms(&dev_priv->drm,
 			    "PSR2 cannot be enabled since DSC is enabled\n");
 		return false;
@@ -1525,8 +1525,18 @@ static void intel_psr_enable_source(struct intel_dp *intel_dp,
 	 * can rely on frontbuffer tracking.
 	 */
 	mask = EDP_PSR_DEBUG_MASK_MEMUP |
-	       EDP_PSR_DEBUG_MASK_HPD |
-	       EDP_PSR_DEBUG_MASK_LPSP;
+	       EDP_PSR_DEBUG_MASK_HPD;
+
+	/*
+	 * For some unknown reason on HSW non-ULT (or at least on
+	 * Dell Latitude E6540) external displays start to flicker
+	 * when PSR is enabled on the eDP. SR/PC6 residency is much
+	 * higher than should be possible with an external display.
+	 * As a workaround leave LPSP unmasked to prevent PSR entry
+	 * when external displays are active.
+	 */
+	if (DISPLAY_VER(dev_priv) >= 8 || IS_HASWELL_ULT(dev_priv))
+		mask |= EDP_PSR_DEBUG_MASK_LPSP;
 
 	if (DISPLAY_VER(dev_priv) < 20)
 		mask |= EDP_PSR_DEBUG_MASK_MAX_SLEEP;
@@ -1917,81 +1927,6 @@ static void psr_force_hw_tracking_exit(struct intel_dp *intel_dp)
 	intel_de_write(dev_priv, CURSURFLIVE(intel_dp->psr.pipe), 0);
 }
 
-void intel_psr2_disable_plane_sel_fetch_arm(struct intel_plane *plane,
-					    const struct intel_crtc_state *crtc_state)
-{
-	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
-	enum pipe pipe = plane->pipe;
-
-	if (!crtc_state->enable_psr2_sel_fetch)
-		return;
-
-	intel_de_write_fw(dev_priv, PLANE_SEL_FETCH_CTL(pipe, plane->id), 0);
-}
-
-void intel_psr2_program_plane_sel_fetch_arm(struct intel_plane *plane,
-					    const struct intel_crtc_state *crtc_state,
-					    const struct intel_plane_state *plane_state)
-{
-	struct drm_i915_private *i915 = to_i915(plane->base.dev);
-	enum pipe pipe = plane->pipe;
-
-	if (!crtc_state->enable_psr2_sel_fetch)
-		return;
-
-	if (plane->id == PLANE_CURSOR)
-		intel_de_write_fw(i915, PLANE_SEL_FETCH_CTL(pipe, plane->id),
-				  plane_state->ctl);
-	else
-		intel_de_write_fw(i915, PLANE_SEL_FETCH_CTL(pipe, plane->id),
-				  PLANE_SEL_FETCH_CTL_ENABLE);
-}
-
-void intel_psr2_program_plane_sel_fetch_noarm(struct intel_plane *plane,
-					      const struct intel_crtc_state *crtc_state,
-					      const struct intel_plane_state *plane_state,
-					      int color_plane)
-{
-	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
-	enum pipe pipe = plane->pipe;
-	const struct drm_rect *clip;
-	u32 val;
-	int x, y;
-
-	if (!crtc_state->enable_psr2_sel_fetch)
-		return;
-
-	if (plane->id == PLANE_CURSOR)
-		return;
-
-	clip = &plane_state->psr2_sel_fetch_area;
-
-	val = (clip->y1 + plane_state->uapi.dst.y1) << 16;
-	val |= plane_state->uapi.dst.x1;
-	intel_de_write_fw(dev_priv, PLANE_SEL_FETCH_POS(pipe, plane->id), val);
-
-	x = plane_state->view.color_plane[color_plane].x;
-
-	/*
-	 * From Bspec: UV surface Start Y Position = half of Y plane Y
-	 * start position.
-	 */
-	if (!color_plane)
-		y = plane_state->view.color_plane[color_plane].y + clip->y1;
-	else
-		y = plane_state->view.color_plane[color_plane].y + clip->y1 / 2;
-
-	val = y << 16 | x;
-
-	intel_de_write_fw(dev_priv, PLANE_SEL_FETCH_OFFSET(pipe, plane->id),
-			  val);
-
-	/* Sizes are 0 based */
-	val = (drm_rect_height(clip) - 1) << 16;
-	val |= (drm_rect_width(&plane_state->uapi.src) >> 16) - 1;
-	intel_de_write_fw(dev_priv, PLANE_SEL_FETCH_SIZE(pipe, plane->id), val);
-}
-
 void intel_psr2_program_trans_man_trk_ctl(const struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
@@ -2251,8 +2186,19 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state,
 			continue;
 
 		inter = pipe_clip;
-		if (!drm_rect_intersect(&inter, &new_plane_state->uapi.dst))
+		sel_fetch_area = &new_plane_state->psr2_sel_fetch_area;
+		if (!drm_rect_intersect(&inter, &new_plane_state->uapi.dst)) {
+			sel_fetch_area->y1 = -1;
+			sel_fetch_area->y2 = -1;
+			/*
+			 * if plane sel fetch was previously enabled ->
+			 * disable it
+			 */
+			if (drm_rect_height(&old_plane_state->psr2_sel_fetch_area) > 0)
+				crtc_state->update_planes |= BIT(plane->id);
+
 			continue;
+		}
 
 		if (!psr2_sel_fetch_plane_state_supported(new_plane_state)) {
 			full_update = true;
@@ -3383,11 +3329,11 @@ void intel_psr_connector_debugfs_add(struct intel_connector *connector)
 	struct drm_i915_private *i915 = to_i915(connector->base.dev);
 	struct dentry *root = connector->base.debugfs_entry;
 
-	if (connector->base.connector_type != DRM_MODE_CONNECTOR_eDP) {
-		if (!(HAS_DP20(i915) &&
-		      connector->base.connector_type == DRM_MODE_CONNECTOR_DisplayPort))
-			return;
-	}
+	/* TODO: Add support for MST connectors as well. */
+	if ((connector->base.connector_type != DRM_MODE_CONNECTOR_eDP &&
+	     connector->base.connector_type != DRM_MODE_CONNECTOR_DisplayPort) ||
+	    connector->mst_port)
+		return;
 
 	debugfs_create_file("i915_psr_sink_status", 0444, root,
 			    connector, &i915_psr_sink_status_fops);
diff --git a/drivers/gpu/drm/i915/display/intel_psr.h b/drivers/gpu/drm/i915/display/intel_psr.h
index 6a1f4573852b..143e0595c097 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.h
+++ b/drivers/gpu/drm/i915/display/intel_psr.h
@@ -55,16 +55,6 @@ bool intel_psr_enabled(struct intel_dp *intel_dp);
 int intel_psr2_sel_fetch_update(struct intel_atomic_state *state,
 				struct intel_crtc *crtc);
 void intel_psr2_program_trans_man_trk_ctl(const struct intel_crtc_state *crtc_state);
-void intel_psr2_program_plane_sel_fetch_noarm(struct intel_plane *plane,
-					      const struct intel_crtc_state *crtc_state,
-					      const struct intel_plane_state *plane_state,
-					      int color_plane);
-void intel_psr2_program_plane_sel_fetch_arm(struct intel_plane *plane,
-					    const struct intel_crtc_state *crtc_state,
-					    const struct intel_plane_state *plane_state);
-
-void intel_psr2_disable_plane_sel_fetch_arm(struct intel_plane *plane,
-					    const struct intel_crtc_state *crtc_state);
 void intel_psr_pause(struct intel_dp *intel_dp);
 void intel_psr_resume(struct intel_dp *intel_dp);
 
diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c
index bcb4959df70d..2915d7afe5cc 100644
--- a/drivers/gpu/drm/i915/display/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/display/intel_sdvo.c
@@ -1209,7 +1209,7 @@ static bool intel_sdvo_set_tv_format(struct intel_sdvo *intel_sdvo,
 	struct intel_sdvo_tv_format format;
 	u32 format_map;
 
-	format_map = 1 << conn_state->tv.mode;
+	format_map = 1 << conn_state->tv.legacy_mode;
 	memset(&format, 0, sizeof(format));
 	memcpy(&format, &format_map, min(sizeof(format), sizeof(format_map)));
 
@@ -1931,13 +1931,19 @@ static enum drm_mode_status
 intel_sdvo_mode_valid(struct drm_connector *connector,
 		      struct drm_display_mode *mode)
 {
+	struct drm_i915_private *i915 = to_i915(connector->dev);
 	struct intel_sdvo *intel_sdvo = intel_attached_sdvo(to_intel_connector(connector));
 	struct intel_sdvo_connector *intel_sdvo_connector =
 		to_intel_sdvo_connector(connector);
-	int max_dotclk = to_i915(connector->dev)->max_dotclk_freq;
 	bool has_hdmi_sink = intel_has_hdmi_sink(intel_sdvo_connector, connector->state);
+	int max_dotclk = i915->max_dotclk_freq;
+	enum drm_mode_status status;
 	int clock = mode->clock;
 
+	status = intel_cpu_transcoder_mode_valid(i915, mode);
+	if (status != MODE_OK)
+		return status;
+
 	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
 		return MODE_NO_DBLESCAN;
 
@@ -2292,7 +2298,7 @@ static int intel_sdvo_get_tv_modes(struct drm_connector *connector)
 	 * Read the list of supported input resolutions for the selected TV
 	 * format.
 	 */
-	format_map = 1 << conn_state->tv.mode;
+	format_map = 1 << conn_state->tv.legacy_mode;
 	memcpy(&tv_res, &format_map,
 	       min(sizeof(format_map), sizeof(struct intel_sdvo_sdtv_resolution_request)));
 
@@ -2357,7 +2363,7 @@ intel_sdvo_connector_atomic_get_property(struct drm_connector *connector,
 		int i;
 
 		for (i = 0; i < intel_sdvo_connector->format_supported_num; i++)
-			if (state->tv.mode == intel_sdvo_connector->tv_format_supported[i]) {
+			if (state->tv.legacy_mode == intel_sdvo_connector->tv_format_supported[i]) {
 				*val = i;
 
 				return 0;
@@ -2413,7 +2419,7 @@ intel_sdvo_connector_atomic_set_property(struct drm_connector *connector,
 	struct intel_sdvo_connector_state *sdvo_state = to_intel_sdvo_connector_state(state);
 
 	if (property == intel_sdvo_connector->tv_format) {
-		state->tv.mode = intel_sdvo_connector->tv_format_supported[val];
+		state->tv.legacy_mode = intel_sdvo_connector->tv_format_supported[val];
 
 		if (state->crtc) {
 			struct drm_crtc_state *crtc_state =
@@ -3070,7 +3076,7 @@ static bool intel_sdvo_tv_create_property(struct intel_sdvo *intel_sdvo,
 		drm_property_add_enum(intel_sdvo_connector->tv_format, i,
 				      tv_format_names[intel_sdvo_connector->tv_format_supported[i]]);
 
-	intel_sdvo_connector->base.base.state->tv.mode = intel_sdvo_connector->tv_format_supported[0];
+	intel_sdvo_connector->base.base.state->tv.legacy_mode = intel_sdvo_connector->tv_format_supported[0];
 	drm_object_attach_property(&intel_sdvo_connector->base.base.base,
 				   intel_sdvo_connector->tv_format, 0);
 	return true;
@@ -3321,7 +3327,6 @@ intel_sdvo_init_ddc_proxy(struct intel_sdvo_ddc *ddc,
 	ddc->ddc_bus = ddc_bus;
 
 	ddc->ddc.owner = THIS_MODULE;
-	ddc->ddc.class = I2C_CLASS_DDC;
 	snprintf(ddc->ddc.name, I2C_NAME_SIZE, "SDVO %c DDC%d",
 		 port_name(sdvo->base.port), ddc_bus);
 	ddc->ddc.dev.parent = &pdev->dev;
diff --git a/drivers/gpu/drm/i915/display/intel_snps_phy.c b/drivers/gpu/drm/i915/display/intel_snps_phy.c
index ce5a73a4cc89..bc61e736f9b3 100644
--- a/drivers/gpu/drm/i915/display/intel_snps_phy.c
+++ b/drivers/gpu/drm/i915/display/intel_snps_phy.c
@@ -3,7 +3,7 @@
  * Copyright © 2019 Intel Corporation
  */
 
-#include <linux/util_macros.h>
+#include <linux/math.h>
 
 #include "i915_reg.h"
 #include "intel_ddi.h"
diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c
index f64d348a969e..dcf05e00e505 100644
--- a/drivers/gpu/drm/i915/display/intel_tc.c
+++ b/drivers/gpu/drm/i915/display/intel_tc.c
@@ -1030,18 +1030,25 @@ static bool xelpdp_tc_phy_enable_tcss_power(struct intel_tc_port *tc, bool enabl
 
 	__xelpdp_tc_phy_enable_tcss_power(tc, enable);
 
-	if ((!tc_phy_wait_for_ready(tc) ||
-	     !xelpdp_tc_phy_wait_for_tcss_power(tc, enable)) &&
-	    !drm_WARN_ON(&i915->drm, tc->mode == TC_PORT_LEGACY)) {
-		if (enable) {
-			__xelpdp_tc_phy_enable_tcss_power(tc, false);
-			xelpdp_tc_phy_wait_for_tcss_power(tc, false);
-		}
+	if (enable && !tc_phy_wait_for_ready(tc))
+		goto out_disable;
 
-		return false;
-	}
+	if (!xelpdp_tc_phy_wait_for_tcss_power(tc, enable))
+		goto out_disable;
 
 	return true;
+
+out_disable:
+	if (drm_WARN_ON(&i915->drm, tc->mode == TC_PORT_LEGACY))
+		return false;
+
+	if (!enable)
+		return false;
+
+	__xelpdp_tc_phy_enable_tcss_power(tc, false);
+	xelpdp_tc_phy_wait_for_tcss_power(tc, false);
+
+	return false;
 }
 
 static void xelpdp_tc_phy_take_ownership(struct intel_tc_port *tc, bool take)
diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c
index 31a79fdfc812..992a725de751 100644
--- a/drivers/gpu/drm/i915/display/intel_tv.c
+++ b/drivers/gpu/drm/i915/display/intel_tv.c
@@ -949,7 +949,7 @@ intel_disable_tv(struct intel_atomic_state *state,
 
 static const struct tv_mode *intel_tv_mode_find(const struct drm_connector_state *conn_state)
 {
-	int format = conn_state->tv.mode;
+	int format = conn_state->tv.legacy_mode;
 
 	return &tv_modes[format];
 }
@@ -958,8 +958,14 @@ static enum drm_mode_status
 intel_tv_mode_valid(struct drm_connector *connector,
 		    struct drm_display_mode *mode)
 {
+	struct drm_i915_private *i915 = to_i915(connector->dev);
 	const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state);
-	int max_dotclk = to_i915(connector->dev)->max_dotclk_freq;
+	int max_dotclk = i915->max_dotclk_freq;
+	enum drm_mode_status status;
+
+	status = intel_cpu_transcoder_mode_valid(i915, mode);
+	if (status != MODE_OK)
+		return status;
 
 	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
 		return MODE_NO_DBLESCAN;
@@ -1411,9 +1417,6 @@ set_tv_mode_timings(struct drm_i915_private *dev_priv,
 static void set_color_conversion(struct drm_i915_private *dev_priv,
 				 const struct color_conversion *color_conversion)
 {
-	if (!color_conversion)
-		return;
-
 	intel_de_write(dev_priv, TV_CSC_Y,
 		       (color_conversion->ry << 16) | color_conversion->gy);
 	intel_de_write(dev_priv, TV_CSC_Y2,
@@ -1448,9 +1451,6 @@ static void intel_tv_pre_enable(struct intel_atomic_state *state,
 	int xpos, ypos;
 	unsigned int xsize, ysize;
 
-	if (!tv_mode)
-		return;	/* can't happen (mode_prepare prevents this) */
-
 	tv_ctl = intel_de_read(dev_priv, TV_CTL);
 	tv_ctl &= TV_CTL_SAVE;
 
@@ -1704,7 +1704,7 @@ static void intel_tv_find_better_format(struct drm_connector *connector)
 			break;
 	}
 
-	connector->state->tv.mode = i;
+	connector->state->tv.legacy_mode = i;
 }
 
 static int
@@ -1859,7 +1859,7 @@ static int intel_tv_atomic_check(struct drm_connector *connector,
 	old_state = drm_atomic_get_old_connector_state(state, connector);
 	new_crtc_state = drm_atomic_get_new_crtc_state(state, new_state->crtc);
 
-	if (old_state->tv.mode != new_state->tv.mode ||
+	if (old_state->tv.legacy_mode != new_state->tv.legacy_mode ||
 	    old_state->tv.margins.left != new_state->tv.margins.left ||
 	    old_state->tv.margins.right != new_state->tv.margins.right ||
 	    old_state->tv.margins.top != new_state->tv.margins.top ||
@@ -1896,7 +1896,7 @@ static void intel_tv_add_properties(struct drm_connector *connector)
 	conn_state->tv.margins.right = 46;
 	conn_state->tv.margins.bottom = 37;
 
-	conn_state->tv.mode = 0;
+	conn_state->tv.legacy_mode = 0;
 
 	/* Create TV properties then attach current values */
 	for (i = 0; i < ARRAY_SIZE(tv_modes); i++) {
@@ -1910,7 +1910,7 @@ static void intel_tv_add_properties(struct drm_connector *connector)
 
 	drm_object_attach_property(&connector->base,
 				   i915->drm.mode_config.legacy_tv_mode_property,
-				   conn_state->tv.mode);
+				   conn_state->tv.legacy_mode);
 	drm_object_attach_property(&connector->base,
 				   i915->drm.mode_config.tv_left_margin_property,
 				   conn_state->tv.margins.left);
diff --git a/drivers/gpu/drm/i915/display/intel_vblank.c b/drivers/gpu/drm/i915/display/intel_vblank.c
index 2cec2abf9746..fe256bf7b485 100644
--- a/drivers/gpu/drm/i915/display/intel_vblank.c
+++ b/drivers/gpu/drm/i915/display/intel_vblank.c
@@ -265,6 +265,32 @@ int intel_crtc_scanline_to_hw(struct intel_crtc *crtc, int scanline)
 	return (scanline + vtotal - crtc->scanline_offset) % vtotal;
 }
 
+/*
+ * The uncore version of the spin lock functions is used to decide
+ * whether we need to lock the uncore lock or not.  This is only
+ * needed in i915, not in Xe.
+ *
+ * This lock in i915 is needed because some old platforms (at least
+ * IVB and possibly HSW as well), which are not supported in Xe, need
+ * all register accesses to the same cacheline to be serialized,
+ * otherwise they may hang.
+ */
+static void intel_vblank_section_enter(struct drm_i915_private *i915)
+	__acquires(i915->uncore.lock)
+{
+#ifdef I915
+	spin_lock(&i915->uncore.lock);
+#endif
+}
+
+static void intel_vblank_section_exit(struct drm_i915_private *i915)
+	__releases(i915->uncore.lock)
+{
+#ifdef I915
+	spin_unlock(&i915->uncore.lock);
+#endif
+}
+
 static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc,
 				     bool in_vblank_irq,
 				     int *vpos, int *hpos,
@@ -302,11 +328,12 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc,
 	}
 
 	/*
-	 * Lock uncore.lock, as we will do multiple timing critical raw
-	 * register reads, potentially with preemption disabled, so the
-	 * following code must not block on uncore.lock.
+	 * Enter vblank critical section, as we will do multiple
+	 * timing critical raw register reads, potentially with
+	 * preemption disabled, so the following code must not block.
 	 */
-	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
+	local_irq_save(irqflags);
+	intel_vblank_section_enter(dev_priv);
 
 	/* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
 
@@ -374,7 +401,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc,
 
 	/* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
 
-	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
+	intel_vblank_section_exit(dev_priv);
+	local_irq_restore(irqflags);
 
 	/*
 	 * While in vblank, position will be negative
@@ -412,9 +440,13 @@ int intel_get_crtc_scanline(struct intel_crtc *crtc)
 	unsigned long irqflags;
 	int position;
 
-	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
+	local_irq_save(irqflags);
+	intel_vblank_section_enter(dev_priv);
+
 	position = __intel_get_crtc_scanline(crtc);
-	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
+
+	intel_vblank_section_exit(dev_priv);
+	local_irq_restore(irqflags);
 
 	return position;
 }
@@ -537,7 +569,7 @@ void intel_crtc_update_active_timings(const struct intel_crtc_state *crtc_state,
 	 * Need to audit everything to make sure it's safe.
 	 */
 	spin_lock_irqsave(&i915->drm.vblank_time_lock, irqflags);
-	spin_lock(&i915->uncore.lock);
+	intel_vblank_section_enter(i915);
 
 	drm_calc_timestamping_constants(&crtc->base, &adjusted_mode);
 
@@ -546,7 +578,6 @@ void intel_crtc_update_active_timings(const struct intel_crtc_state *crtc_state,
 	crtc->mode_flags = mode_flags;
 
 	crtc->scanline_offset = intel_crtc_scanline_offset(crtc_state);
-
-	spin_unlock(&i915->uncore.lock);
+	intel_vblank_section_exit(i915);
 	spin_unlock_irqrestore(&i915->drm.vblank_time_lock, irqflags);
 }
diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c
index 5f2fb702e367..17d6572f9d0a 100644
--- a/drivers/gpu/drm/i915/display/intel_vdsc.c
+++ b/drivers/gpu/drm/i915/display/intel_vdsc.c
@@ -812,13 +812,13 @@ void intel_dsc_disable(const struct intel_crtc_state *old_crtc_state)
 }
 
 static u32 intel_dsc_pps_read(struct intel_crtc_state *crtc_state, int pps,
-			      bool *check_equal)
+			      bool *all_equal)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
 	i915_reg_t dsc_reg[2];
 	int i, vdsc_per_pipe, dsc_reg_num;
-	u32 val = 0;
+	u32 val;
 
 	vdsc_per_pipe = intel_dsc_get_vdsc_per_pipe(crtc_state);
 	dsc_reg_num = min_t(int, ARRAY_SIZE(dsc_reg), vdsc_per_pipe);
@@ -827,20 +827,13 @@ static u32 intel_dsc_pps_read(struct intel_crtc_state *crtc_state, int pps,
 
 	intel_dsc_get_pps_reg(crtc_state, pps, dsc_reg, dsc_reg_num);
 
-	if (check_equal)
-		*check_equal = true;
-
-	for (i = 0; i < dsc_reg_num; i++) {
-		u32 tmp;
+	*all_equal = true;
 
-		tmp = intel_de_read(i915, dsc_reg[i]);
+	val = intel_de_read(i915, dsc_reg[0]);
 
-		if (i == 0) {
-			val = tmp;
-		} else if (check_equal && tmp != val) {
-			*check_equal = false;
-			break;
-		} else if (!check_equal) {
+	for (i = 1; i < dsc_reg_num; i++) {
+		if (intel_de_read(i915, dsc_reg[i]) != val) {
+			*all_equal = false;
 			break;
 		}
 	}
diff --git a/drivers/gpu/drm/i915/display/intel_vdsc_regs.h b/drivers/gpu/drm/i915/display/intel_vdsc_regs.h
index 64f440fdc22b..8b21dc8e26d5 100644
--- a/drivers/gpu/drm/i915/display/intel_vdsc_regs.h
+++ b/drivers/gpu/drm/i915/display/intel_vdsc_regs.h
@@ -51,8 +51,8 @@
 #define DSCC_PICTURE_PARAMETER_SET_0		_MMIO(0x6BA00)
 #define _DSCA_PPS_0				0x6B200
 #define _DSCC_PPS_0				0x6BA00
-#define DSCA_PPS(pps)				_MMIO(_DSCA_PPS_0 + (pps) * 4)
-#define DSCC_PPS(pps)				_MMIO(_DSCC_PPS_0 + (pps) * 4)
+#define DSCA_PPS(pps)				_MMIO(_DSCA_PPS_0 + ((pps) < 12 ? (pps) : (pps) + 12) * 4)
+#define DSCC_PPS(pps)				_MMIO(_DSCC_PPS_0 + ((pps) < 12 ? (pps) : (pps) + 12) * 4)
 #define _ICL_DSC0_PICTURE_PARAMETER_SET_0_PB	0x78270
 #define _ICL_DSC1_PICTURE_PARAMETER_SET_0_PB	0x78370
 #define _ICL_DSC0_PICTURE_PARAMETER_SET_0_PC	0x78470
diff --git a/drivers/gpu/drm/i915/display/skl_scaler.c b/drivers/gpu/drm/i915/display/skl_scaler.c
index 1e7c97243fcf..8a934bada624 100644
--- a/drivers/gpu/drm/i915/display/skl_scaler.c
+++ b/drivers/gpu/drm/i915/display/skl_scaler.c
@@ -504,7 +504,6 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv,
 {
 	struct drm_plane *plane = NULL;
 	struct intel_plane *intel_plane;
-	struct intel_plane_state *plane_state = NULL;
 	struct intel_crtc_scaler_state *scaler_state =
 		&crtc_state->scaler_state;
 	struct drm_atomic_state *drm_state = crtc_state->uapi.state;
@@ -536,6 +535,7 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv,
 
 	/* walkthrough scaler_users bits and start assigning scalers */
 	for (i = 0; i < sizeof(scaler_state->scaler_users) * 8; i++) {
+		struct intel_plane_state *plane_state = NULL;
 		int *scaler_id;
 		const char *name;
 		int idx, ret;
diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c
index f5c77a018e10..511dc1544854 100644
--- a/drivers/gpu/drm/i915/display/skl_universal_plane.c
+++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c
@@ -18,6 +18,7 @@
 #include "intel_fbc.h"
 #include "intel_frontbuffer.h"
 #include "intel_psr.h"
+#include "intel_psr_regs.h"
 #include "skl_scaler.h"
 #include "skl_universal_plane.h"
 #include "skl_watermark.h"
@@ -629,6 +630,18 @@ skl_plane_disable_arm(struct intel_plane *plane,
 	intel_de_write_fw(dev_priv, PLANE_SURF(pipe, plane_id), 0);
 }
 
+static void icl_plane_disable_sel_fetch_arm(struct intel_plane *plane,
+					    const struct intel_crtc_state *crtc_state)
+{
+	struct drm_i915_private *i915 = to_i915(plane->base.dev);
+	enum pipe pipe = plane->pipe;
+
+	if (!crtc_state->enable_psr2_sel_fetch)
+		return;
+
+	intel_de_write_fw(i915, PLANE_SEL_FETCH_CTL(pipe, plane->id), 0);
+}
+
 static void
 icl_plane_disable_arm(struct intel_plane *plane,
 		      const struct intel_crtc_state *crtc_state)
@@ -642,7 +655,7 @@ icl_plane_disable_arm(struct intel_plane *plane,
 
 	skl_write_plane_wm(plane, crtc_state);
 
-	intel_psr2_disable_plane_sel_fetch_arm(plane, crtc_state);
+	icl_plane_disable_sel_fetch_arm(plane, crtc_state);
 	intel_de_write_fw(dev_priv, PLANE_CTL(pipe, plane_id), 0);
 	intel_de_write_fw(dev_priv, PLANE_SURF(pipe, plane_id), 0);
 }
@@ -1197,6 +1210,48 @@ skl_plane_update_arm(struct intel_plane *plane,
 			  skl_plane_surf(plane_state, 0));
 }
 
+static void icl_plane_update_sel_fetch_noarm(struct intel_plane *plane,
+					     const struct intel_crtc_state *crtc_state,
+					     const struct intel_plane_state *plane_state,
+					     int color_plane)
+{
+	struct drm_i915_private *i915 = to_i915(plane->base.dev);
+	enum pipe pipe = plane->pipe;
+	const struct drm_rect *clip;
+	u32 val;
+	int x, y;
+
+	if (!crtc_state->enable_psr2_sel_fetch)
+		return;
+
+	clip = &plane_state->psr2_sel_fetch_area;
+
+	val = (clip->y1 + plane_state->uapi.dst.y1) << 16;
+	val |= plane_state->uapi.dst.x1;
+	intel_de_write_fw(i915, PLANE_SEL_FETCH_POS(pipe, plane->id), val);
+
+	x = plane_state->view.color_plane[color_plane].x;
+
+	/*
+	 * From Bspec: UV surface Start Y Position = half of Y plane Y
+	 * start position.
+	 */
+	if (!color_plane)
+		y = plane_state->view.color_plane[color_plane].y + clip->y1;
+	else
+		y = plane_state->view.color_plane[color_plane].y + clip->y1 / 2;
+
+	val = y << 16 | x;
+
+	intel_de_write_fw(i915, PLANE_SEL_FETCH_OFFSET(pipe, plane->id),
+			  val);
+
+	/* Sizes are 0 based */
+	val = (drm_rect_height(clip) - 1) << 16;
+	val |= (drm_rect_width(&plane_state->uapi.src) >> 16) - 1;
+	intel_de_write_fw(i915, PLANE_SEL_FETCH_SIZE(pipe, plane->id), val);
+}
+
 static void
 icl_plane_update_noarm(struct intel_plane *plane,
 		       const struct intel_crtc_state *crtc_state,
@@ -1269,7 +1324,24 @@ icl_plane_update_noarm(struct intel_plane *plane,
 	if (plane_state->force_black)
 		icl_plane_csc_load_black(plane);
 
-	intel_psr2_program_plane_sel_fetch_noarm(plane, crtc_state, plane_state, color_plane);
+	icl_plane_update_sel_fetch_noarm(plane, crtc_state, plane_state, color_plane);
+}
+
+static void icl_plane_update_sel_fetch_arm(struct intel_plane *plane,
+					   const struct intel_crtc_state *crtc_state,
+					   const struct intel_plane_state *plane_state)
+{
+	struct drm_i915_private *i915 = to_i915(plane->base.dev);
+	enum pipe pipe = plane->pipe;
+
+	if (!crtc_state->enable_psr2_sel_fetch)
+		return;
+
+	if (drm_rect_height(&plane_state->psr2_sel_fetch_area) > 0)
+		intel_de_write_fw(i915, PLANE_SEL_FETCH_CTL(pipe, plane->id),
+				  PLANE_SEL_FETCH_CTL_ENABLE);
+	else
+		icl_plane_disable_sel_fetch_arm(plane, crtc_state);
 }
 
 static void
@@ -1296,7 +1368,7 @@ icl_plane_update_arm(struct intel_plane *plane,
 	if (plane_state->scaler_id >= 0)
 		skl_program_plane_scaler(plane, crtc_state, plane_state);
 
-	intel_psr2_program_plane_sel_fetch_arm(plane, crtc_state, plane_state);
+	icl_plane_update_sel_fetch_arm(plane, crtc_state, plane_state);
 
 	/*
 	 * The control register self-arms if the plane was previously
diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c
index bda49734ca33..9b33b8a74d64 100644
--- a/drivers/gpu/drm/i915/display/vlv_dsi.c
+++ b/drivers/gpu/drm/i915/display/vlv_dsi.c
@@ -1532,21 +1532,29 @@ static void intel_dsi_unprepare(struct intel_encoder *encoder)
 	}
 }
 
-static void intel_dsi_encoder_destroy(struct drm_encoder *encoder)
+static const struct drm_encoder_funcs intel_dsi_funcs = {
+	.destroy = intel_encoder_destroy,
+};
+
+static enum drm_mode_status vlv_dsi_mode_valid(struct drm_connector *connector,
+					       struct drm_display_mode *mode)
 {
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(to_intel_encoder(encoder));
+	struct drm_i915_private *i915 = to_i915(connector->dev);
 
-	intel_dsi_vbt_gpio_cleanup(intel_dsi);
-	intel_encoder_destroy(encoder);
-}
+	if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
+		enum drm_mode_status status;
 
-static const struct drm_encoder_funcs intel_dsi_funcs = {
-	.destroy = intel_dsi_encoder_destroy,
-};
+		status = intel_cpu_transcoder_mode_valid(i915, mode);
+		if (status != MODE_OK)
+			return status;
+	}
+
+	return intel_dsi_mode_valid(connector, mode);
+}
 
 static const struct drm_connector_helper_funcs intel_dsi_connector_helper_funcs = {
 	.get_modes = intel_dsi_get_modes,
-	.mode_valid = intel_dsi_mode_valid,
+	.mode_valid = vlv_dsi_mode_valid,
 	.atomic_check = intel_digital_connector_atomic_check,
 };
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index e38f06a6e56e..dcbfe32fd30c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -279,7 +279,8 @@ static int proto_context_set_protected(struct drm_i915_private *i915,
 }
 
 static struct i915_gem_proto_context *
-proto_context_create(struct drm_i915_private *i915, unsigned int flags)
+proto_context_create(struct drm_i915_file_private *fpriv,
+		     struct drm_i915_private *i915, unsigned int flags)
 {
 	struct i915_gem_proto_context *pc, *err;
 
@@ -287,6 +288,7 @@ proto_context_create(struct drm_i915_private *i915, unsigned int flags)
 	if (!pc)
 		return ERR_PTR(-ENOMEM);
 
+	pc->fpriv = fpriv;
 	pc->num_user_engines = -1;
 	pc->user_engines = NULL;
 	pc->user_flags = BIT(UCONTEXT_BANNABLE) |
@@ -1622,6 +1624,7 @@ i915_gem_create_context(struct drm_i915_private *i915,
 			err = PTR_ERR(ppgtt);
 			goto err_ctx;
 		}
+		ppgtt->vm.fpriv = pc->fpriv;
 		vm = &ppgtt->vm;
 	}
 	if (vm)
@@ -1741,7 +1744,7 @@ int i915_gem_context_open(struct drm_i915_private *i915,
 	/* 0 reserved for invalid/unassigned ppgtt */
 	xa_init_flags(&file_priv->vm_xa, XA_FLAGS_ALLOC1);
 
-	pc = proto_context_create(i915, 0);
+	pc = proto_context_create(file_priv, i915, 0);
 	if (IS_ERR(pc)) {
 		err = PTR_ERR(pc);
 		goto err;
@@ -1823,6 +1826,7 @@ int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
 
 	GEM_BUG_ON(id == 0); /* reserved for invalid/unassigned ppgtt */
 	args->vm_id = id;
+	ppgtt->vm.fpriv = file_priv;
 	return 0;
 
 err_put:
@@ -2285,7 +2289,8 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 		return -EIO;
 	}
 
-	ext_data.pc = proto_context_create(i915, args->flags);
+	ext_data.pc = proto_context_create(file->driver_priv, i915,
+					   args->flags);
 	if (IS_ERR(ext_data.pc))
 		return PTR_ERR(ext_data.pc);
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index cb78214a7dcd..03bc7f9d191b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -188,6 +188,9 @@ struct i915_gem_proto_engine {
  * CONTEXT_CREATE_SET_PARAM during GEM_CONTEXT_CREATE.
  */
 struct i915_gem_proto_context {
+	/** @fpriv: Client which creates the context */
+	struct drm_i915_file_private *fpriv;
+
 	/** @vm: See &i915_gem_context.vm */
 	struct i915_address_space *vm;
 
@@ -409,9 +412,9 @@ struct i915_gem_context {
 
 	/** @stale: tracks stale engines to be destroyed */
 	struct {
-		/** @lock: guards engines */
+		/** @stale.lock: guards engines */
 		spinlock_t lock;
-		/** @engines: list of stale engines */
+		/** @stale.engines: list of stale engines */
 		struct list_head engines;
 	} stale;
 };
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index ccc077b74d2d..555022c0652c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -254,6 +254,8 @@ struct i915_execbuffer {
 	struct intel_gt *gt; /* gt for the execbuf */
 	struct intel_context *context; /* logical state for the request */
 	struct i915_gem_context *gem_context; /** caller's context */
+	intel_wakeref_t wakeref;
+	intel_wakeref_t wakeref_gt0;
 
 	/** our requests to build */
 	struct i915_request *requests[MAX_ENGINE_INSTANCE + 1];
@@ -1157,7 +1159,7 @@ static void reloc_cache_unmap(struct reloc_cache *cache)
 
 	vaddr = unmask_page(cache->vaddr);
 	if (cache->vaddr & KMAP)
-		kunmap_atomic(vaddr);
+		kunmap_local(vaddr);
 	else
 		io_mapping_unmap_atomic((void __iomem *)vaddr);
 }
@@ -1173,7 +1175,7 @@ static void reloc_cache_remap(struct reloc_cache *cache,
 	if (cache->vaddr & KMAP) {
 		struct page *page = i915_gem_object_get_page(obj, cache->page);
 
-		vaddr = kmap_atomic(page);
+		vaddr = kmap_local_page(page);
 		cache->vaddr = unmask_flags(cache->vaddr) |
 			(unsigned long)vaddr;
 	} else {
@@ -1203,7 +1205,7 @@ static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer
 		if (cache->vaddr & CLFLUSH_AFTER)
 			mb();
 
-		kunmap_atomic(vaddr);
+		kunmap_local(vaddr);
 		i915_gem_object_finish_access(obj);
 	} else {
 		struct i915_ggtt *ggtt = cache_to_ggtt(cache);
@@ -1235,7 +1237,7 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
 	struct page *page;
 
 	if (cache->vaddr) {
-		kunmap_atomic(unmask_page(cache->vaddr));
+		kunmap_local(unmask_page(cache->vaddr));
 	} else {
 		unsigned int flushes;
 		int err;
@@ -1257,7 +1259,7 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
 	if (!obj->mm.dirty)
 		set_page_dirty(page);
 
-	vaddr = kmap_atomic(page);
+	vaddr = kmap_local_page(page);
 	cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr;
 	cache->page = pageno;
 
@@ -1679,7 +1681,7 @@ static int eb_copy_relocations(const struct i915_execbuffer *eb)
 		urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr);
 		size = nreloc * sizeof(*relocs);
 
-		relocs = kvmalloc_array(size, 1, GFP_KERNEL);
+		relocs = kvmalloc_array(1, size, GFP_KERNEL);
 		if (!relocs) {
 			err = -ENOMEM;
 			goto err;
@@ -2720,13 +2722,13 @@ eb_select_engine(struct i915_execbuffer *eb)
 
 	for_each_child(ce, child)
 		intel_context_get(child);
-	intel_gt_pm_get(gt);
+	eb->wakeref = intel_gt_pm_get(ce->engine->gt);
 	/*
 	 * Keep GT0 active on MTL so that i915_vma_parked() doesn't
 	 * free VMAs while execbuf ioctl is validating VMAs.
 	 */
 	if (gt->info.id)
-		intel_gt_pm_get(to_gt(gt->i915));
+		eb->wakeref_gt0 = intel_gt_pm_get(to_gt(gt->i915));
 
 	if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
 		err = intel_context_alloc_state(ce);
@@ -2766,9 +2768,9 @@ eb_select_engine(struct i915_execbuffer *eb)
 
 err:
 	if (gt->info.id)
-		intel_gt_pm_put(to_gt(gt->i915));
+		intel_gt_pm_put(to_gt(gt->i915), eb->wakeref_gt0);
 
-	intel_gt_pm_put(gt);
+	intel_gt_pm_put(ce->engine->gt, eb->wakeref);
 	for_each_child(ce, child)
 		intel_context_put(child);
 	intel_context_put(ce);
@@ -2786,8 +2788,8 @@ eb_put_engine(struct i915_execbuffer *eb)
 	 * i915_vma_parked() from interfering while execbuf validates vmas.
 	 */
 	if (eb->gt->info.id)
-		intel_gt_pm_put(to_gt(eb->gt->i915));
-	intel_gt_pm_put(eb->gt);
+		intel_gt_pm_put(to_gt(eb->gt->i915), eb->wakeref_gt0);
+	intel_gt_pm_put(eb->context->engine->gt, eb->wakeref);
 	for_each_child(eb->context, child)
 		intel_context_put(child);
 	intel_context_put(eb->context);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
index 6bc26b4b06b8..ea7561ae6e13 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -36,7 +36,7 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
 	struct sg_table *st;
 	struct scatterlist *sg;
 	unsigned int npages; /* restricted by sg_alloc_table */
-	int max_order = MAX_ORDER;
+	int max_order = MAX_PAGE_ORDER;
 	unsigned int max_segment;
 	gfp_t gfp;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index c26d87555825..58e6c680fe0d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -106,6 +106,10 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 
 	INIT_LIST_HEAD(&obj->mm.link);
 
+#ifdef CONFIG_PROC_FS
+	INIT_LIST_HEAD(&obj->client_link);
+#endif
+
 	INIT_LIST_HEAD(&obj->lut_list);
 	spin_lock_init(&obj->lut_lock);
 
@@ -293,6 +297,10 @@ void __i915_gem_free_object_rcu(struct rcu_head *head)
 		container_of(head, typeof(*obj), rcu);
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 
+	/* We need to keep this alive for RCU read access from fdinfo. */
+	if (obj->mm.n_placements > 1)
+		kfree(obj->mm.placements);
+
 	i915_gem_object_free(obj);
 
 	GEM_BUG_ON(!atomic_read(&i915->mm.free_count));
@@ -389,9 +397,6 @@ void __i915_gem_free_object(struct drm_i915_gem_object *obj)
 	if (obj->ops->release)
 		obj->ops->release(obj);
 
-	if (obj->mm.n_placements > 1)
-		kfree(obj->mm.placements);
-
 	if (obj->shares_resv_from)
 		i915_vm_resv_put(obj->shares_resv_from);
 
@@ -442,6 +447,8 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj)
 
 	GEM_BUG_ON(i915_gem_object_is_framebuffer(obj));
 
+	i915_drm_client_remove_object(obj);
+
 	/*
 	 * Before we free the object, make sure any pure RCU-only
 	 * read-side critical sections are complete, e.g.
@@ -493,17 +500,15 @@ static void
 i915_gem_object_read_from_page_kmap(struct drm_i915_gem_object *obj, u64 offset, void *dst, int size)
 {
 	pgoff_t idx = offset >> PAGE_SHIFT;
-	void *src_map;
 	void *src_ptr;
 
-	src_map = kmap_atomic(i915_gem_object_get_page(obj, idx));
-
-	src_ptr = src_map + offset_in_page(offset);
+	src_ptr = kmap_local_page(i915_gem_object_get_page(obj, idx))
+	          + offset_in_page(offset);
 	if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
 		drm_clflush_virt_range(src_ptr, size);
 	memcpy(dst, src_ptr, size);
 
-	kunmap_atomic(src_map);
+	kunmap_local(src_ptr);
 }
 
 static void
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 2292404007c8..0c5cdab278b6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -302,6 +302,18 @@ struct drm_i915_gem_object {
 	 */
 	struct i915_address_space *shares_resv_from;
 
+#ifdef CONFIG_PROC_FS
+	/**
+	 * @client: @i915_drm_client which created the object
+	 */
+	struct i915_drm_client *client;
+
+	/**
+	 * @client_link: Link into @i915_drm_client.objects_list
+	 */
+	struct list_head client_link;
+#endif
+
 	union {
 		struct rcu_head rcu;
 		struct llist_node freed;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
index 5df128e2f4dc..ef85c6dc9fd5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
@@ -65,16 +65,13 @@ static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 	dst = vaddr;
 	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
 		struct page *page;
-		void *src;
 
 		page = shmem_read_mapping_page(mapping, i);
 		if (IS_ERR(page))
 			goto err_st;
 
-		src = kmap_atomic(page);
-		memcpy(dst, src, PAGE_SIZE);
+		memcpy_from_page(dst, page, 0, PAGE_SIZE);
 		drm_clflush_virt_range(dst, PAGE_SIZE);
-		kunmap_atomic(src);
 
 		put_page(page);
 		dst += PAGE_SIZE;
@@ -113,16 +110,13 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
 
 		for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
 			struct page *page;
-			char *dst;
 
 			page = shmem_read_mapping_page(mapping, i);
 			if (IS_ERR(page))
 				continue;
 
-			dst = kmap_atomic(page);
 			drm_clflush_virt_range(src, PAGE_SIZE);
-			memcpy(dst, src, PAGE_SIZE);
-			kunmap_atomic(dst);
+			memcpy_to_page(page, 0, src, PAGE_SIZE);
 
 			set_page_dirty(page);
 			if (obj->mm.madv == I915_MADV_WILLNEED)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 73a4a4eb29e0..38b72d86560f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -485,11 +485,13 @@ shmem_pwrite(struct drm_i915_gem_object *obj,
 		if (err < 0)
 			return err;
 
-		vaddr = kmap_atomic(page);
+		vaddr = kmap_local_page(page);
+		pagefault_disable();
 		unwritten = __copy_from_user_inatomic(vaddr + pg,
 						      user_data,
 						      len);
-		kunmap_atomic(vaddr);
+		pagefault_enable();
+		kunmap_local(vaddr);
 
 		err = aops->write_end(obj->base.filp, mapping, offset, len,
 				      len - unwritten, page, data);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index 1a766d8e7cce..8c88075eeab2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -386,6 +386,27 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915,
 
 	drm_dbg(&i915->drm, "GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val);
 
+	/* Wa_14019821291 */
+	if (MEDIA_VER_FULL(i915) == IP_VER(13, 0)) {
+		/*
+		 * This workaround is primarily implemented by the BIOS.  We
+		 * just need to figure out whether the BIOS has applied the
+		 * workaround (meaning the programmed address falls within
+		 * the DSM) and, if so, reserve that part of the DSM to
+		 * prevent accidental reuse.  The DSM location should be just
+		 * below the WOPCM.
+		 */
+		u64 gscpsmi_base = intel_uncore_read64_2x32(uncore,
+							    MTL_GSCPSMI_BASEADDR_LSB,
+							    MTL_GSCPSMI_BASEADDR_MSB);
+		if (gscpsmi_base >= i915->dsm.stolen.start &&
+		    gscpsmi_base < i915->dsm.stolen.end) {
+			*base = gscpsmi_base;
+			*size = i915->dsm.stolen.end - gscpsmi_base;
+			return;
+		}
+	}
+
 	switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) {
 	case GEN8_STOLEN_RESERVED_1M:
 		*size = 1024 * 1024;
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index 6b9f6cf50bf6..3ff3d8889c6c 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -115,7 +115,7 @@ static int get_huge_pages(struct drm_i915_gem_object *obj)
 		do {
 			struct page *page;
 
-			GEM_BUG_ON(order > MAX_ORDER);
+			GEM_BUG_ON(order > MAX_PAGE_ORDER);
 			page = alloc_pages(GFP | __GFP_ZERO, order);
 			if (!page)
 				goto err;
@@ -1082,7 +1082,7 @@ __cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val)
 		goto err_unlock;
 
 	for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) {
-		u32 *ptr = kmap_atomic(i915_gem_object_get_page(obj, n));
+		u32 *ptr = kmap_local_page(i915_gem_object_get_page(obj, n));
 
 		if (needs_flush & CLFLUSH_BEFORE)
 			drm_clflush_virt_range(ptr, PAGE_SIZE);
@@ -1090,12 +1090,12 @@ __cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val)
 		if (ptr[dword] != val) {
 			pr_err("n=%lu ptr[%u]=%u, val=%u\n",
 			       n, dword, ptr[dword], val);
-			kunmap_atomic(ptr);
+			kunmap_local(ptr);
 			err = -EINVAL;
 			break;
 		}
 
-		kunmap_atomic(ptr);
+		kunmap_local(ptr);
 	}
 
 	i915_gem_object_finish_access(obj);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
index 3bef1beec7cb..2a0c0634d446 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
@@ -24,7 +24,6 @@ static int cpu_set(struct context *ctx, unsigned long offset, u32 v)
 {
 	unsigned int needs_clflush;
 	struct page *page;
-	void *map;
 	u32 *cpu;
 	int err;
 
@@ -34,8 +33,7 @@ static int cpu_set(struct context *ctx, unsigned long offset, u32 v)
 		goto out;
 
 	page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT);
-	map = kmap_atomic(page);
-	cpu = map + offset_in_page(offset);
+	cpu = kmap_local_page(page) + offset_in_page(offset);
 
 	if (needs_clflush & CLFLUSH_BEFORE)
 		drm_clflush_virt_range(cpu, sizeof(*cpu));
@@ -45,7 +43,7 @@ static int cpu_set(struct context *ctx, unsigned long offset, u32 v)
 	if (needs_clflush & CLFLUSH_AFTER)
 		drm_clflush_virt_range(cpu, sizeof(*cpu));
 
-	kunmap_atomic(map);
+	kunmap_local(cpu);
 	i915_gem_object_finish_access(ctx->obj);
 
 out:
@@ -57,7 +55,6 @@ static int cpu_get(struct context *ctx, unsigned long offset, u32 *v)
 {
 	unsigned int needs_clflush;
 	struct page *page;
-	void *map;
 	u32 *cpu;
 	int err;
 
@@ -67,15 +64,14 @@ static int cpu_get(struct context *ctx, unsigned long offset, u32 *v)
 		goto out;
 
 	page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT);
-	map = kmap_atomic(page);
-	cpu = map + offset_in_page(offset);
+	cpu = kmap_local_page(page) + offset_in_page(offset);
 
 	if (needs_clflush & CLFLUSH_BEFORE)
 		drm_clflush_virt_range(cpu, sizeof(*cpu));
 
 	*v = *cpu;
 
-	kunmap_atomic(map);
+	kunmap_local(cpu);
 	i915_gem_object_finish_access(ctx->obj);
 
 out:
@@ -85,6 +81,7 @@ out:
 
 static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
 {
+	intel_wakeref_t wakeref;
 	struct i915_vma *vma;
 	u32 __iomem *map;
 	int err = 0;
@@ -99,7 +96,7 @@ static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
-	intel_gt_pm_get(vma->vm->gt);
+	wakeref = intel_gt_pm_get(vma->vm->gt);
 
 	map = i915_vma_pin_iomap(vma);
 	i915_vma_unpin(vma);
@@ -112,12 +109,13 @@ static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
 	i915_vma_unpin_iomap(vma);
 
 out_rpm:
-	intel_gt_pm_put(vma->vm->gt);
+	intel_gt_pm_put(vma->vm->gt, wakeref);
 	return err;
 }
 
 static int gtt_get(struct context *ctx, unsigned long offset, u32 *v)
 {
+	intel_wakeref_t wakeref;
 	struct i915_vma *vma;
 	u32 __iomem *map;
 	int err = 0;
@@ -132,7 +130,7 @@ static int gtt_get(struct context *ctx, unsigned long offset, u32 *v)
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
-	intel_gt_pm_get(vma->vm->gt);
+	wakeref = intel_gt_pm_get(vma->vm->gt);
 
 	map = i915_vma_pin_iomap(vma);
 	i915_vma_unpin(vma);
@@ -145,7 +143,7 @@ static int gtt_get(struct context *ctx, unsigned long offset, u32 *v)
 	i915_vma_unpin_iomap(vma);
 
 out_rpm:
-	intel_gt_pm_put(vma->vm->gt);
+	intel_gt_pm_put(vma->vm->gt, wakeref);
 	return err;
 }
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index 7021b6e9b219..89d4dc8b60c6 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -489,12 +489,12 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
 	for (n = 0; n < real_page_count(obj); n++) {
 		u32 *map;
 
-		map = kmap_atomic(i915_gem_object_get_page(obj, n));
+		map = kmap_local_page(i915_gem_object_get_page(obj, n));
 		for (m = 0; m < DW_PER_PAGE; m++)
 			map[m] = value;
 		if (!has_llc)
 			drm_clflush_virt_range(map, PAGE_SIZE);
-		kunmap_atomic(map);
+		kunmap_local(map);
 	}
 
 	i915_gem_object_finish_access(obj);
@@ -520,7 +520,7 @@ static noinline int cpu_check(struct drm_i915_gem_object *obj,
 	for (n = 0; n < real_page_count(obj); n++) {
 		u32 *map, m;
 
-		map = kmap_atomic(i915_gem_object_get_page(obj, n));
+		map = kmap_local_page(i915_gem_object_get_page(obj, n));
 		if (needs_flush & CLFLUSH_BEFORE)
 			drm_clflush_virt_range(map, PAGE_SIZE);
 
@@ -546,7 +546,7 @@ static noinline int cpu_check(struct drm_i915_gem_object *obj,
 		}
 
 out_unmap:
-		kunmap_atomic(map);
+		kunmap_local(map);
 		if (err)
 			break;
 	}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
index e57f9390076c..d684a70f2c04 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
@@ -504,7 +504,7 @@ static int igt_dmabuf_export_vmap(void *arg)
 	}
 
 	if (memchr_inv(ptr, 0, dmabuf->size)) {
-		pr_err("Exported object not initialiased to zero!\n");
+		pr_err("Exported object not initialised to zero!\n");
 		err = -EINVAL;
 		goto out;
 	}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 72957a36a36b..2c51a2c452fc 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -630,14 +630,14 @@ static bool assert_mmap_offset(struct drm_i915_private *i915,
 static void disable_retire_worker(struct drm_i915_private *i915)
 {
 	i915_gem_driver_unregister__shrinker(i915);
-	intel_gt_pm_get(to_gt(i915));
+	intel_gt_pm_get_untracked(to_gt(i915));
 	cancel_delayed_work_sync(&to_gt(i915)->requests.retire_work);
 }
 
 static void restore_retire_worker(struct drm_i915_private *i915)
 {
 	igt_flush_test(i915);
-	intel_gt_pm_put(to_gt(i915));
+	intel_gt_pm_put_untracked(to_gt(i915));
 	i915_gem_driver_register__shrinker(i915);
 }
 
@@ -778,6 +778,7 @@ err_obj:
 
 static int gtt_set(struct drm_i915_gem_object *obj)
 {
+	intel_wakeref_t wakeref;
 	struct i915_vma *vma;
 	void __iomem *map;
 	int err = 0;
@@ -786,7 +787,7 @@ static int gtt_set(struct drm_i915_gem_object *obj)
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
-	intel_gt_pm_get(vma->vm->gt);
+	wakeref = intel_gt_pm_get(vma->vm->gt);
 	map = i915_vma_pin_iomap(vma);
 	i915_vma_unpin(vma);
 	if (IS_ERR(map)) {
@@ -798,12 +799,13 @@ static int gtt_set(struct drm_i915_gem_object *obj)
 	i915_vma_unpin_iomap(vma);
 
 out:
-	intel_gt_pm_put(vma->vm->gt);
+	intel_gt_pm_put(vma->vm->gt, wakeref);
 	return err;
 }
 
 static int gtt_check(struct drm_i915_gem_object *obj)
 {
+	intel_wakeref_t wakeref;
 	struct i915_vma *vma;
 	void __iomem *map;
 	int err = 0;
@@ -812,7 +814,7 @@ static int gtt_check(struct drm_i915_gem_object *obj)
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
-	intel_gt_pm_get(vma->vm->gt);
+	wakeref = intel_gt_pm_get(vma->vm->gt);
 	map = i915_vma_pin_iomap(vma);
 	i915_vma_unpin(vma);
 	if (IS_ERR(map)) {
@@ -828,7 +830,7 @@ static int gtt_check(struct drm_i915_gem_object *obj)
 	i915_vma_unpin_iomap(vma);
 
 out:
-	intel_gt_pm_put(vma->vm->gt);
+	intel_gt_pm_put(vma->vm->gt, wakeref);
 	return err;
 }
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
index e199d7dbb876..2b0327cc47c2 100644
--- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
@@ -83,7 +83,7 @@ live_context(struct drm_i915_private *i915, struct file *file)
 	int err;
 	u32 id;
 
-	pc = proto_context_create(i915, 0);
+	pc = proto_context_create(fpriv, i915, 0);
 	if (IS_ERR(pc))
 		return ERR_CAST(pc);
 
@@ -152,7 +152,7 @@ kernel_context(struct drm_i915_private *i915,
 	struct i915_gem_context *ctx;
 	struct i915_gem_proto_context *pc;
 
-	pc = proto_context_create(i915, 0);
+	pc = proto_context_create(NULL, i915, 0);
 	if (IS_ERR(pc))
 		return ERR_CAST(pc);
 
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 9895e18df043..fa46d2308b0e 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -5,6 +5,7 @@
 
 #include <linux/log2.h>
 
+#include "gem/i915_gem_internal.h"
 #include "gem/i915_gem_lmem.h"
 
 #include "gen8_ppgtt.h"
@@ -222,6 +223,9 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
 {
 	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 
+	if (vm->rsvd.obj)
+		i915_gem_object_put(vm->rsvd.obj);
+
 	if (intel_vgpu_active(vm->i915))
 		gen8_ppgtt_notify_vgt(ppgtt, false);
 
@@ -950,6 +954,41 @@ err_pd:
 	return ERR_PTR(err);
 }
 
+static int gen8_init_rsvd(struct i915_address_space *vm)
+{
+	struct drm_i915_private *i915 = vm->i915;
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	int ret;
+
+	/* The memory will be used only by GPU. */
+	obj = i915_gem_object_create_lmem(i915, PAGE_SIZE,
+					  I915_BO_ALLOC_VOLATILE |
+					  I915_BO_ALLOC_GPU_ONLY);
+	if (IS_ERR(obj))
+		obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	vma = i915_vma_instance(obj, vm, NULL);
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
+		goto unref;
+	}
+
+	ret = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH);
+	if (ret)
+		goto unref;
+
+	vm->rsvd.vma = i915_vma_make_unshrinkable(vma);
+	vm->rsvd.obj = obj;
+	vm->total -= vma->node.size;
+	return 0;
+unref:
+	i915_gem_object_put(obj);
+	return ret;
+}
+
 /*
  * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
  * with a net effect resembling a 2-level page table in normal x86 terms. Each
@@ -1031,6 +1070,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
 	if (intel_vgpu_active(gt->i915))
 		gen8_ppgtt_notify_vgt(ppgtt, true);
 
+	err = gen8_init_rsvd(&ppgtt->vm);
+	if (err)
+		goto err_put;
+
 	return ppgtt;
 
 err_put:
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index ecc990ec1b95..d650beb8ed22 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -28,11 +28,14 @@ static void irq_disable(struct intel_breadcrumbs *b)
 
 static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
 {
+	intel_wakeref_t wakeref;
+
 	/*
 	 * Since we are waiting on a request, the GPU should be busy
 	 * and should have its own rpm reference.
 	 */
-	if (GEM_WARN_ON(!intel_gt_pm_get_if_awake(b->irq_engine->gt)))
+	wakeref = intel_gt_pm_get_if_awake(b->irq_engine->gt);
+	if (GEM_WARN_ON(!wakeref))
 		return;
 
 	/*
@@ -41,7 +44,7 @@ static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
 	 * which we can add a new waiter and avoid the cost of re-enabling
 	 * the irq.
 	 */
-	WRITE_ONCE(b->irq_armed, true);
+	WRITE_ONCE(b->irq_armed, wakeref);
 
 	/* Requests may have completed before we could enable the interrupt. */
 	if (!b->irq_enabled++ && b->irq_enable(b))
@@ -61,12 +64,14 @@ static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
 
 static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
 {
+	intel_wakeref_t wakeref = b->irq_armed;
+
 	GEM_BUG_ON(!b->irq_enabled);
 	if (!--b->irq_enabled)
 		b->irq_disable(b);
 
-	WRITE_ONCE(b->irq_armed, false);
-	intel_gt_pm_put_async(b->irq_engine->gt);
+	WRITE_ONCE(b->irq_armed, 0);
+	intel_gt_pm_put_async(b->irq_engine->gt, wakeref);
 }
 
 static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
index 72dfd3748c4c..bdf09fd67b6e 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
@@ -13,6 +13,7 @@
 #include <linux/types.h>
 
 #include "intel_engine_types.h"
+#include "intel_wakeref.h"
 
 /*
  * Rather than have every client wait upon all user interrupts,
@@ -43,7 +44,7 @@ struct intel_breadcrumbs {
 	spinlock_t irq_lock; /* protects the interrupt from hardirq context */
 	struct irq_work irq_work; /* for use from inside irq_lock */
 	unsigned int irq_enabled;
-	bool irq_armed;
+	intel_wakeref_t irq_armed;
 
 	/* Not all breadcrumbs are attached to physical HW */
 	intel_engine_mask_t	engine_mask;
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index a53b26178f0a..a2f1245741bb 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -6,6 +6,7 @@
 #include "gem/i915_gem_context.h"
 #include "gem/i915_gem_pm.h"
 
+#include "i915_drm_client.h"
 #include "i915_drv.h"
 #include "i915_trace.h"
 
@@ -50,6 +51,7 @@ intel_context_create(struct intel_engine_cs *engine)
 
 int intel_context_alloc_state(struct intel_context *ce)
 {
+	struct i915_gem_context *ctx;
 	int err = 0;
 
 	if (mutex_lock_interruptible(&ce->pin_mutex))
@@ -66,6 +68,18 @@ int intel_context_alloc_state(struct intel_context *ce)
 			goto unlock;
 
 		set_bit(CONTEXT_ALLOC_BIT, &ce->flags);
+
+		rcu_read_lock();
+		ctx = rcu_dereference(ce->gem_context);
+		if (ctx && !kref_get_unless_zero(&ctx->ref))
+			ctx = NULL;
+		rcu_read_unlock();
+		if (ctx) {
+			if (ctx->client)
+				i915_drm_client_add_context_objects(ctx->client,
+								    ce);
+			i915_gem_context_put(ctx);
+		}
 	}
 
 unlock:
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index a80e3b7c24ff..25564c01507e 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -212,7 +212,7 @@ static inline void intel_context_enter(struct intel_context *ce)
 		return;
 
 	ce->ops->enter(ce);
-	intel_gt_pm_get(ce->vm->gt);
+	ce->wakeref = intel_gt_pm_get(ce->vm->gt);
 }
 
 static inline void intel_context_mark_active(struct intel_context *ce)
@@ -229,7 +229,7 @@ static inline void intel_context_exit(struct intel_context *ce)
 	if (--ce->active_count)
 		return;
 
-	intel_gt_pm_put_async(ce->vm->gt);
+	intel_gt_pm_put_async(ce->vm->gt, ce->wakeref);
 	ce->ops->exit(ce);
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index aceaac28a33e..7eccbd70d89f 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -17,6 +17,7 @@
 #include "i915_utils.h"
 #include "intel_engine_types.h"
 #include "intel_sseu.h"
+#include "intel_wakeref.h"
 
 #include "uc/intel_guc_fwif.h"
 
@@ -112,6 +113,7 @@ struct intel_context {
 	u32 ring_size;
 	struct intel_ring *ring;
 	struct intel_timeline *timeline;
+	intel_wakeref_t wakeref;
 
 	unsigned long flags;
 #define CONTEXT_BARRIER_BIT		0
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 4a11219e560e..40687806d22a 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -47,7 +47,7 @@
 #define GEN9_LR_CONTEXT_RENDER_SIZE	(22 * PAGE_SIZE)
 #define GEN11_LR_CONTEXT_RENDER_SIZE	(14 * PAGE_SIZE)
 
-#define GEN8_LR_CONTEXT_OTHER_SIZE	( 2 * PAGE_SIZE)
+#define GEN8_LR_CONTEXT_OTHER_SIZE	(2 * PAGE_SIZE)
 
 #define MAX_MMIO_BASES 3
 struct engine_info {
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 9a527e1f5be6..1a8e2b7db013 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -188,7 +188,7 @@ static void heartbeat(struct work_struct *wrk)
 			 * low latency and no jitter] the chance to naturally
 			 * complete before being preempted.
 			 */
-			attr.priority = 0;
+			attr.priority = I915_PRIORITY_NORMAL;
 			if (rq->sched.attr.priority >= attr.priority)
 				attr.priority = I915_PRIORITY_HEARTBEAT;
 			if (rq->sched.attr.priority >= attr.priority)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index e91fc881dbf1..96bdb93a948d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -63,7 +63,7 @@ static int __engine_unpark(struct intel_wakeref *wf)
 
 	ENGINE_TRACE(engine, "\n");
 
-	intel_gt_pm_get(engine->gt);
+	engine->wakeref_track = intel_gt_pm_get(engine->gt);
 
 	/* Discard stale context state from across idling */
 	ce = engine->kernel_context;
@@ -122,6 +122,7 @@ __queue_and_release_pm(struct i915_request *rq,
 	 */
 	GEM_BUG_ON(rq->context->active_count != 1);
 	__intel_gt_pm_get(engine->gt);
+	rq->context->wakeref = intel_wakeref_track(&engine->gt->wakeref);
 
 	/*
 	 * We have to serialise all potential retirement paths with our
@@ -285,7 +286,7 @@ static int __engine_park(struct intel_wakeref *wf)
 		engine->park(engine);
 
 	/* While gt calls i915_vma_parked(), we have to break the lock cycle */
-	intel_gt_pm_put_async(engine->gt);
+	intel_gt_pm_put_async(engine->gt, engine->wakeref_track);
 	return 0;
 }
 
@@ -296,7 +297,7 @@ static const struct intel_wakeref_ops wf_ops = {
 
 void intel_engine_init__pm(struct intel_engine_cs *engine)
 {
-	intel_wakeref_init(&engine->wakeref, engine->i915, &wf_ops);
+	intel_wakeref_init(&engine->wakeref, engine->i915, &wf_ops, engine->name);
 	intel_engine_init_heartbeat(engine);
 
 	intel_gsc_idle_msg_enable(engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
index fdd4ddd3a978..a8eac59e3779 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
@@ -118,9 +118,15 @@
 #define   CCID_EXTENDED_STATE_RESTORE		BIT(2)
 #define   CCID_EXTENDED_STATE_SAVE		BIT(3)
 #define RING_BB_PER_CTX_PTR(base)		_MMIO((base) + 0x1c0) /* gen8+ */
+#define   PER_CTX_BB_FORCE			BIT(2)
+#define   PER_CTX_BB_VALID			BIT(0)
+
 #define RING_INDIRECT_CTX(base)			_MMIO((base) + 0x1c4) /* gen8+ */
 #define RING_INDIRECT_CTX_OFFSET(base)		_MMIO((base) + 0x1c8) /* gen8+ */
 #define ECOSKPD(base)				_MMIO((base) + 0x1d0)
+#define   XEHP_BLITTER_SCHEDULING_MODE_MASK	REG_GENMASK(12, 11)
+#define   XEHP_BLITTER_ROUND_ROBIN_MODE		\
+		REG_FIELD_PREP(XEHP_BLITTER_SCHEDULING_MODE_MASK, 1)
 #define   ECO_CONSTANT_BUFFER_SR_DISABLE	REG_BIT(4)
 #define   ECO_GATING_CX_ONLY			REG_BIT(3)
 #define   GEN6_BLITTER_FBC_NOTIFY		REG_BIT(3)
@@ -257,5 +263,7 @@
 #define VDBOX_CGCTL3F18(base)			_MMIO((base) + 0x3f18)
 #define   ALNUNIT_CLKGATE_DIS			REG_BIT(13)
 
+#define VDBOX_CGCTL3F1C(base)			_MMIO((base) + 0x3f1c)
+#define   MFXPIPE_CLKGATE_DIS			REG_BIT(3)
 
 #endif /* __INTEL_ENGINE_REGS__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 8769760257fd..960e6be2042f 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -446,7 +446,9 @@ struct intel_engine_cs {
 	unsigned long serial;
 
 	unsigned long wakeref_serial;
+	intel_wakeref_t wakeref_track;
 	struct intel_wakeref wakeref;
+
 	struct file *default_state;
 
 	struct {
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index 118164ddbb2e..833987015b8b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -41,12 +41,15 @@ void intel_engine_add_user(struct intel_engine_cs *engine)
 	llist_add(&engine->uabi_llist, &engine->i915->uabi_engines_llist);
 }
 
-static const u8 uabi_classes[] = {
+#define I915_NO_UABI_CLASS ((u16)(-1))
+
+static const u16 uabi_classes[] = {
 	[RENDER_CLASS] = I915_ENGINE_CLASS_RENDER,
 	[COPY_ENGINE_CLASS] = I915_ENGINE_CLASS_COPY,
 	[VIDEO_DECODE_CLASS] = I915_ENGINE_CLASS_VIDEO,
 	[VIDEO_ENHANCEMENT_CLASS] = I915_ENGINE_CLASS_VIDEO_ENHANCE,
 	[COMPUTE_CLASS] = I915_ENGINE_CLASS_COMPUTE,
+	[OTHER_CLASS] = I915_NO_UABI_CLASS, /* Not exposed to users, no uabi class. */
 };
 
 static int engine_cmp(void *priv, const struct list_head *A,
@@ -200,6 +203,7 @@ static void engine_rename(struct intel_engine_cs *engine, const char *name, u16
 
 void intel_engines_driver_register(struct drm_i915_private *i915)
 {
+	u16 name_instance, other_instance = 0;
 	struct legacy_ring ring = {};
 	struct list_head *it, *next;
 	struct rb_node **p, *prev;
@@ -216,27 +220,28 @@ void intel_engines_driver_register(struct drm_i915_private *i915)
 		if (intel_gt_has_unrecoverable_error(engine->gt))
 			continue; /* ignore incomplete engines */
 
-		/*
-		 * We don't want to expose the GSC engine to the users, but we
-		 * still rename it so it is easier to identify in the debug logs
-		 */
-		if (engine->id == GSC0) {
-			engine_rename(engine, "gsc", 0);
-			continue;
-		}
-
 		GEM_BUG_ON(engine->class >= ARRAY_SIZE(uabi_classes));
 		engine->uabi_class = uabi_classes[engine->class];
+		if (engine->uabi_class == I915_NO_UABI_CLASS) {
+			name_instance = other_instance++;
+		} else {
+			GEM_BUG_ON(engine->uabi_class >=
+				   ARRAY_SIZE(i915->engine_uabi_class_count));
+			name_instance =
+				i915->engine_uabi_class_count[engine->uabi_class]++;
+		}
+		engine->uabi_instance = name_instance;
 
-		GEM_BUG_ON(engine->uabi_class >=
-			   ARRAY_SIZE(i915->engine_uabi_class_count));
-		engine->uabi_instance =
-			i915->engine_uabi_class_count[engine->uabi_class]++;
-
-		/* Replace the internal name with the final user facing name */
+		/*
+		 * Replace the internal name with the final user and log facing
+		 * name.
+		 */
 		engine_rename(engine,
 			      intel_engine_class_repr(engine->class),
-			      engine->uabi_instance);
+			      name_instance);
+
+		if (engine->uabi_class == I915_NO_UABI_CLASS)
+			continue;
 
 		rb_link_node(&engine->uabi_node, prev, p);
 		rb_insert_color(&engine->uabi_node, &i915->uabi_engines);
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index e8f42ec6b1b4..42aade0faf2d 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -630,7 +630,7 @@ static void __execlists_schedule_out(struct i915_request * const rq,
 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
 	if (engine->fw_domain && !--engine->fw_active)
 		intel_uncore_forcewake_put(engine->uncore, engine->fw_domain);
-	intel_gt_pm_put_async(engine->gt);
+	intel_gt_pm_put_async_untracked(engine->gt);
 
 	/*
 	 * If this is part of a virtual engine, its next request may
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 15fc8e4703f4..21a7e3191c18 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -245,16 +245,15 @@ static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
 	gen8_ggtt_invalidate(ggtt);
 
 	list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) {
-		if (intel_guc_tlb_invalidation_is_available(&gt->uc.guc)) {
+		if (intel_guc_tlb_invalidation_is_available(&gt->uc.guc))
 			guc_ggtt_ct_invalidate(gt);
-		} else if (GRAPHICS_VER(i915) >= 12) {
+		else if (GRAPHICS_VER(i915) >= 12)
 			intel_uncore_write_fw(gt->uncore,
 					      GEN12_GUC_TLB_INV_CR,
 					      GEN12_GUC_TLB_INV_CR_INVALIDATE);
-		} else {
+		else
 			intel_uncore_write_fw(gt->uncore,
 					      GEN8_GTCR, GEN8_GTCR_INVALIDATE);
-		}
 	}
 }
 
@@ -297,7 +296,7 @@ static bool should_update_ggtt_with_bind(struct i915_ggtt *ggtt)
 	return intel_gt_is_bind_context_ready(gt);
 }
 
-static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt)
+static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt, intel_wakeref_t *wakeref)
 {
 	struct intel_context *ce;
 	struct intel_gt *gt = ggtt->vm.gt;
@@ -314,7 +313,8 @@ static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt)
 	 * would conflict with fs_reclaim trying to allocate memory while
 	 * doing rpm_resume().
 	 */
-	if (!intel_gt_pm_get_if_awake(gt))
+	*wakeref = intel_gt_pm_get_if_awake(gt);
+	if (!*wakeref)
 		return NULL;
 
 	intel_engine_pm_get(ce->engine);
@@ -322,10 +322,10 @@ static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt)
 	return ce;
 }
 
-static void gen8_ggtt_bind_put_ce(struct intel_context *ce)
+static void gen8_ggtt_bind_put_ce(struct intel_context *ce, intel_wakeref_t wakeref)
 {
 	intel_engine_pm_put(ce->engine);
-	intel_gt_pm_put(ce->engine->gt);
+	intel_gt_pm_put(ce->engine->gt, wakeref);
 }
 
 static bool gen8_ggtt_bind_ptes(struct i915_ggtt *ggtt, u32 offset,
@@ -338,12 +338,13 @@ static bool gen8_ggtt_bind_ptes(struct i915_ggtt *ggtt, u32 offset,
 	struct sgt_iter iter;
 	struct i915_request *rq;
 	struct intel_context *ce;
+	intel_wakeref_t wakeref;
 	u32 *cs;
 
 	if (!num_entries)
 		return true;
 
-	ce = gen8_ggtt_bind_get_ce(ggtt);
+	ce = gen8_ggtt_bind_get_ce(ggtt, &wakeref);
 	if (!ce)
 		return false;
 
@@ -419,13 +420,13 @@ queue_err_rq:
 		offset += n_ptes;
 	}
 
-	gen8_ggtt_bind_put_ce(ce);
+	gen8_ggtt_bind_put_ce(ce, wakeref);
 	return true;
 
 err_rq:
 	i915_request_put(rq);
 put_ce:
-	gen8_ggtt_bind_put_ce(ce);
+	gen8_ggtt_bind_put_ce(ce, wakeref);
 	return false;
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gsc.h b/drivers/gpu/drm/i915/gt/intel_gsc.h
index 7ab3ca0f9f26..013c64251448 100644
--- a/drivers/gpu/drm/i915/gt/intel_gsc.h
+++ b/drivers/gpu/drm/i915/gt/intel_gsc.h
@@ -21,8 +21,11 @@ struct mei_aux_device;
 /**
  * struct intel_gsc - graphics security controller
  *
- * @gem_obj: scratch memory GSC operations
- * @intf : gsc interface
+ * @intf: gsc interface
+ * @intf.adev: MEI aux. device for this @intf
+ * @intf.gem_obj: scratch memory GSC operations
+ * @intf.irq: IRQ for this device (%-1 for no IRQ)
+ * @intf.id: this interface's id number/index
  */
 struct intel_gsc {
 	struct intel_gsc_intf {
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index ba1186fc524f..a425db5ed3a2 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -451,7 +451,7 @@ void intel_gt_flush_ggtt_writes(struct intel_gt *gt)
 
 		spin_lock_irqsave(&uncore->lock, flags);
 		intel_uncore_posting_read_fw(uncore,
-					     RING_HEAD(RENDER_RING_BASE));
+					     RING_TAIL(RENDER_RING_BASE));
 		spin_unlock_irqrestore(&uncore->lock, flags);
 	}
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index e1f13735f530..608f5c872928 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -82,6 +82,10 @@ struct drm_printer;
 		  ##__VA_ARGS__);					\
 } while (0)
 
+#define NEEDS_FASTCOLOR_BLT_WABB(engine) ( \
+	IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 55), IP_VER(12, 71)) && \
+	engine->class == COPY_ENGINE_CLASS && engine->instance == 0)
+
 static inline bool gt_is_root(struct intel_gt *gt)
 {
 	return !gt->info.id;
@@ -114,6 +118,11 @@ static inline struct intel_gt *gsc_to_gt(struct intel_gsc *gsc)
 	return container_of(gsc, struct intel_gt, gsc);
 }
 
+static inline struct drm_i915_private *guc_to_i915(struct intel_guc *guc)
+{
+	return guc_to_gt(guc)->i915;
+}
+
 void intel_gt_common_init_early(struct intel_gt *gt);
 int intel_root_gt_init_early(struct drm_i915_private *i915);
 int intel_gt_assign_ggtt(struct intel_gt *gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
index 34913912d8ae..e253750a51c5 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
@@ -388,8 +388,7 @@ void intel_gt_mcr_lock(struct intel_gt *gt, unsigned long *flags)
 		 * registers.  This wakeref will be released in the unlock
 		 * routine.
 		 *
-		 * This is expected to become a formally documented/numbered
-		 * workaround soon.
+		 * Wa_22018931422
 		 */
 		intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_GT);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index f5899d503e23..220ac4f92edf 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -28,19 +28,20 @@
 static void user_forcewake(struct intel_gt *gt, bool suspend)
 {
 	int count = atomic_read(&gt->user_wakeref);
+	intel_wakeref_t wakeref;
 
 	/* Inside suspend/resume so single threaded, no races to worry about. */
 	if (likely(!count))
 		return;
 
-	intel_gt_pm_get(gt);
+	wakeref = intel_gt_pm_get(gt);
 	if (suspend) {
 		GEM_BUG_ON(count > atomic_read(&gt->wakeref.count));
 		atomic_sub(count, &gt->wakeref.count);
 	} else {
 		atomic_add(count, &gt->wakeref.count);
 	}
-	intel_gt_pm_put(gt);
+	intel_gt_pm_put(gt, wakeref);
 }
 
 static void runtime_begin(struct intel_gt *gt)
@@ -138,7 +139,7 @@ void intel_gt_pm_init_early(struct intel_gt *gt)
 	 * runtime_pm is per-device rather than per-tile, so this is still the
 	 * correct structure.
 	 */
-	intel_wakeref_init(&gt->wakeref, gt->i915, &wf_ops);
+	intel_wakeref_init(&gt->wakeref, gt->i915, &wf_ops, "GT");
 	seqcount_mutex_init(&gt->stats.lock, &gt->wakeref.mutex);
 }
 
@@ -167,7 +168,7 @@ static void gt_sanitize(struct intel_gt *gt, bool force)
 	enum intel_engine_id id;
 	intel_wakeref_t wakeref;
 
-	GT_TRACE(gt, "force:%s", str_yes_no(force));
+	GT_TRACE(gt, "force:%s\n", str_yes_no(force));
 
 	/* Use a raw wakeref to avoid calling intel_display_power_get early */
 	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
@@ -236,6 +237,7 @@ int intel_gt_resume(struct intel_gt *gt)
 {
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
 	int err;
 
 	err = intel_gt_has_unrecoverable_error(gt);
@@ -252,7 +254,7 @@ int intel_gt_resume(struct intel_gt *gt)
 	 */
 	gt_sanitize(gt, true);
 
-	intel_gt_pm_get(gt);
+	wakeref = intel_gt_pm_get(gt);
 
 	intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
 	intel_rc6_sanitize(&gt->rc6);
@@ -295,7 +297,7 @@ int intel_gt_resume(struct intel_gt *gt)
 
 out_fw:
 	intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
-	intel_gt_pm_put(gt);
+	intel_gt_pm_put(gt, wakeref);
 	intel_gt_bind_context_set_ready(gt);
 	return err;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
index b1eeb5b33918..911fd0160221 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
@@ -16,19 +16,28 @@ static inline bool intel_gt_pm_is_awake(const struct intel_gt *gt)
 	return intel_wakeref_is_active(&gt->wakeref);
 }
 
-static inline void intel_gt_pm_get(struct intel_gt *gt)
+static inline void intel_gt_pm_get_untracked(struct intel_gt *gt)
 {
 	intel_wakeref_get(&gt->wakeref);
 }
 
+static inline intel_wakeref_t intel_gt_pm_get(struct intel_gt *gt)
+{
+	intel_gt_pm_get_untracked(gt);
+	return intel_wakeref_track(&gt->wakeref);
+}
+
 static inline void __intel_gt_pm_get(struct intel_gt *gt)
 {
 	__intel_wakeref_get(&gt->wakeref);
 }
 
-static inline bool intel_gt_pm_get_if_awake(struct intel_gt *gt)
+static inline intel_wakeref_t intel_gt_pm_get_if_awake(struct intel_gt *gt)
 {
-	return intel_wakeref_get_if_active(&gt->wakeref);
+	if (!intel_wakeref_get_if_active(&gt->wakeref))
+		return 0;
+
+	return intel_wakeref_track(&gt->wakeref);
 }
 
 static inline void intel_gt_pm_might_get(struct intel_gt *gt)
@@ -36,12 +45,18 @@ static inline void intel_gt_pm_might_get(struct intel_gt *gt)
 	intel_wakeref_might_get(&gt->wakeref);
 }
 
-static inline void intel_gt_pm_put(struct intel_gt *gt)
+static inline void intel_gt_pm_put_untracked(struct intel_gt *gt)
 {
 	intel_wakeref_put(&gt->wakeref);
 }
 
-static inline void intel_gt_pm_put_async(struct intel_gt *gt)
+static inline void intel_gt_pm_put(struct intel_gt *gt, intel_wakeref_t handle)
+{
+	intel_wakeref_untrack(&gt->wakeref, handle);
+	intel_gt_pm_put_untracked(gt);
+}
+
+static inline void intel_gt_pm_put_async_untracked(struct intel_gt *gt)
 {
 	intel_wakeref_put_async(&gt->wakeref);
 }
@@ -51,9 +66,14 @@ static inline void intel_gt_pm_might_put(struct intel_gt *gt)
 	intel_wakeref_might_put(&gt->wakeref);
 }
 
-#define with_intel_gt_pm(gt, tmp) \
-	for (tmp = 1, intel_gt_pm_get(gt); tmp; \
-	     intel_gt_pm_put(gt), tmp = 0)
+static inline void intel_gt_pm_put_async(struct intel_gt *gt, intel_wakeref_t handle)
+{
+	intel_wakeref_untrack(&gt->wakeref, handle);
+	intel_gt_pm_put_async_untracked(gt);
+}
+
+#define with_intel_gt_pm(gt, wf) \
+	for (wf = intel_gt_pm_get(gt); wf; intel_gt_pm_put(gt, wf), wf = 0)
 
 /**
  * with_intel_gt_pm_if_awake - if GT is PM awake, get a reference to prevent
@@ -64,7 +84,7 @@ static inline void intel_gt_pm_might_put(struct intel_gt *gt)
  * @wf: pointer to a temporary wakeref.
  */
 #define with_intel_gt_pm_if_awake(gt, wf) \
-	for (wf = intel_gt_pm_get_if_awake(gt); wf; intel_gt_pm_put_async(gt), wf = 0)
+	for (wf = intel_gt_pm_get_if_awake(gt); wf; intel_gt_pm_put_async(gt, wf), wf = 0)
 
 static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
index f900cc68d6d9..7114c116e928 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
@@ -27,7 +27,7 @@
 void intel_gt_pm_debugfs_forcewake_user_open(struct intel_gt *gt)
 {
 	atomic_inc(&gt->user_wakeref);
-	intel_gt_pm_get(gt);
+	intel_gt_pm_get_untracked(gt);
 	if (GRAPHICS_VER(gt->i915) >= 6)
 		intel_uncore_forcewake_user_get(gt->uncore);
 }
@@ -36,7 +36,7 @@ void intel_gt_pm_debugfs_forcewake_user_release(struct intel_gt *gt)
 {
 	if (GRAPHICS_VER(gt->i915) >= 6)
 		intel_uncore_forcewake_user_put(gt->uncore);
-	intel_gt_pm_put(gt);
+	intel_gt_pm_put_untracked(gt);
 	atomic_dec(&gt->user_wakeref);
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index eecd0a87a647..50962cfd1353 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -469,6 +469,9 @@
 #define XEHP_PSS_MODE2				MCR_REG(0x703c)
 #define   SCOREBOARD_STALL_FLUSH_CONTROL	REG_BIT(5)
 
+#define XEHP_PSS_CHICKEN			MCR_REG(0x7044)
+#define   FD_END_COLLECT			REG_BIT(5)
+
 #define GEN7_SC_INSTDONE			_MMIO(0x7100)
 #define GEN12_SC_INSTDONE_EXTRA			_MMIO(0x7104)
 #define GEN12_SC_INSTDONE_EXTRA2		_MMIO(0x7108)
@@ -537,6 +540,9 @@
 #define XEHP_SQCM				MCR_REG(0x8724)
 #define   EN_32B_ACCESS				REG_BIT(30)
 
+#define MTL_GSCPSMI_BASEADDR_LSB		_MMIO(0x880c)
+#define MTL_GSCPSMI_BASEADDR_MSB		_MMIO(0x8810)
+
 #define HSW_IDICR				_MMIO(0x9008)
 #define   IDIHASHMSK(x)				(((x) & 0x3f) << 16)
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 4fbed27ef0ec..86f73fe558ca 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -63,6 +63,9 @@ struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz)
 	if (!IS_ERR(obj)) {
 		obj->base.resv = i915_vm_resv_get(vm);
 		obj->shares_resv_from = vm;
+
+		if (vm->fpriv)
+			i915_drm_client_add_object(vm->fpriv->client, obj);
 	}
 
 	return obj;
@@ -84,6 +87,9 @@ struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz)
 	if (!IS_ERR(obj)) {
 		obj->base.resv = i915_vm_resv_get(vm);
 		obj->shares_resv_from = vm;
+
+		if (vm->fpriv)
+			i915_drm_client_add_object(vm->fpriv->client, obj);
 	}
 
 	return obj;
@@ -95,6 +101,16 @@ int map_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj)
 	void *vaddr;
 
 	type = intel_gt_coherent_map_type(vm->gt, obj, true);
+	/*
+	 * FIXME: It is suspected that some Address Translation Service (ATS)
+	 * issue on IOMMU is causing CAT errors to occur on some MTL workloads.
+	 * Applying a write barrier to the ppgtt set entry functions appeared
+	 * to have no effect, so we must temporarily use I915_MAP_WC here on
+	 * MTL until a proper ATS solution is found.
+	 */
+	if (IS_METEORLAKE(vm->i915))
+		type = I915_MAP_WC;
+
 	vaddr = i915_gem_object_pin_map_unlocked(obj, type);
 	if (IS_ERR(vaddr))
 		return PTR_ERR(vaddr);
@@ -109,6 +125,16 @@ int map_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object
 	void *vaddr;
 
 	type = intel_gt_coherent_map_type(vm->gt, obj, true);
+	/*
+	 * FIXME: It is suspected that some Address Translation Service (ATS)
+	 * issue on IOMMU is causing CAT errors to occur on some MTL workloads.
+	 * Applying a write barrier to the ppgtt set entry functions appeared
+	 * to have no effect, so we must temporarily use I915_MAP_WC here on
+	 * MTL until a proper ATS solution is found.
+	 */
+	if (IS_METEORLAKE(vm->i915))
+		type = I915_MAP_WC;
+
 	vaddr = i915_gem_object_pin_map(obj, type);
 	if (IS_ERR(vaddr))
 		return PTR_ERR(vaddr);
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index b471edac2699..6b85222ee3ea 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -249,8 +249,13 @@ struct i915_address_space {
 	struct work_struct release_work;
 
 	struct drm_mm mm;
+	struct {
+		struct drm_i915_gem_object *obj;
+		struct i915_vma *vma;
+	} rsvd;
 	struct intel_gt *gt;
 	struct drm_i915_private *i915;
+	struct drm_i915_file_private *fpriv;
 	struct device *dma;
 	u64 total;		/* size addr space maps (ex. 2GB for ggtt) */
 	u64 reserved;		/* size addr space reserved */
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index eaf66d903166..7c367ba8d9dc 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -829,6 +829,18 @@ lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
 }
 
 static void
+lrc_setup_bb_per_ctx(u32 *regs,
+		     const struct intel_engine_cs *engine,
+		     u32 ctx_bb_ggtt_addr)
+{
+	GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
+	regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
+		ctx_bb_ggtt_addr |
+		PER_CTX_BB_FORCE |
+		PER_CTX_BB_VALID;
+}
+
+static void
 lrc_setup_indirect_ctx(u32 *regs,
 		       const struct intel_engine_cs *engine,
 		       u32 ctx_bb_ggtt_addr,
@@ -1020,7 +1032,13 @@ static u32 context_wa_bb_offset(const struct intel_context *ce)
 	return PAGE_SIZE * ce->wa_bb_page;
 }
 
-static u32 *context_indirect_bb(const struct intel_context *ce)
+/*
+ * per_ctx below determines which WABB section is used.
+ * When true, the function returns the location of the
+ * PER_CTX_BB.  When false, the function returns the
+ * location of the INDIRECT_CTX.
+ */
+static u32 *context_wabb(const struct intel_context *ce, bool per_ctx)
 {
 	void *ptr;
 
@@ -1029,6 +1047,7 @@ static u32 *context_indirect_bb(const struct intel_context *ce)
 	ptr = ce->lrc_reg_state;
 	ptr -= LRC_STATE_OFFSET; /* back to start of context image */
 	ptr += context_wa_bb_offset(ce);
+	ptr += per_ctx ? PAGE_SIZE : 0;
 
 	return ptr;
 }
@@ -1105,7 +1124,8 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
 
 	if (GRAPHICS_VER(engine->i915) >= 12) {
 		ce->wa_bb_page = context_size / PAGE_SIZE;
-		context_size += PAGE_SIZE;
+		/* INDIRECT_CTX and PER_CTX_BB need separate pages. */
+		context_size += PAGE_SIZE * 2;
 	}
 
 	if (intel_context_is_parent(ce) && intel_engine_uses_guc(engine)) {
@@ -1407,12 +1427,85 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
 	return gen12_emit_aux_table_inv(ce->engine, cs);
 }
 
+static u32 *xehp_emit_fastcolor_blt_wabb(const struct intel_context *ce, u32 *cs)
+{
+	struct intel_gt *gt = ce->engine->gt;
+	int mocs = gt->mocs.uc_index << 1;
+
+	/**
+	 * Wa_16018031267 / Wa_16018063123 requires that SW forces the
+	 * main copy engine arbitration into round robin mode.  We
+	 * additionally need to submit the following WABB blt command
+	 * to produce 4 subblits with each subblit generating 0 byte
+	 * write requests as WABB:
+	 *
+	 * XY_FASTCOLOR_BLT
+	 *  BG0    -> 5100000E
+	 *  BG1    -> 0000003F (Dest pitch)
+	 *  BG2    -> 00000000 (X1, Y1) = (0, 0)
+	 *  BG3    -> 00040001 (X2, Y2) = (1, 4)
+	 *  BG4    -> scratch
+	 *  BG5    -> scratch
+	 *  BG6-12 -> 00000000
+	 *  BG13   -> 20004004 (Surf. Width= 2,Surf. Height = 5 )
+	 *  BG14   -> 00000010 (Qpitch = 4)
+	 *  BG15   -> 00000000
+	 */
+	*cs++ = XY_FAST_COLOR_BLT_CMD | (16 - 2);
+	*cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) | 0x3f;
+	*cs++ = 0;
+	*cs++ = 4 << 16 | 1;
+	*cs++ = lower_32_bits(i915_vma_offset(ce->vm->rsvd.vma));
+	*cs++ = upper_32_bits(i915_vma_offset(ce->vm->rsvd.vma));
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0x20004004;
+	*cs++ = 0x10;
+	*cs++ = 0;
+
+	return cs;
+}
+
+static u32 *
+xehp_emit_per_ctx_bb(const struct intel_context *ce, u32 *cs)
+{
+	/* Wa_16018031267, Wa_16018063123 */
+	if (NEEDS_FASTCOLOR_BLT_WABB(ce->engine))
+		cs = xehp_emit_fastcolor_blt_wabb(ce, cs);
+
+	return cs;
+}
+
+static void
+setup_per_ctx_bb(const struct intel_context *ce,
+		 const struct intel_engine_cs *engine,
+		 u32 *(*emit)(const struct intel_context *, u32 *))
+{
+	/* Place PER_CTX_BB on next page after INDIRECT_CTX */
+	u32 * const start = context_wabb(ce, true);
+	u32 *cs;
+
+	cs = emit(ce, start);
+
+	/* PER_CTX_BB must manually terminate */
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
+	lrc_setup_bb_per_ctx(ce->lrc_reg_state, engine,
+			     lrc_indirect_bb(ce) + PAGE_SIZE);
+}
+
 static void
 setup_indirect_ctx_bb(const struct intel_context *ce,
 		      const struct intel_engine_cs *engine,
 		      u32 *(*emit)(const struct intel_context *, u32 *))
 {
-	u32 * const start = context_indirect_bb(ce);
+	u32 * const start = context_wabb(ce, false);
 	u32 *cs;
 
 	cs = emit(ce, start);
@@ -1511,6 +1604,7 @@ u32 lrc_update_regs(const struct intel_context *ce,
 		/* Mutually exclusive wrt to global indirect bb */
 		GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
 		setup_indirect_ctx_bb(ce, engine, fn);
+		setup_per_ctx_bb(ce, engine, xehp_emit_per_ctx_bb);
 	}
 
 	return lrc_descriptor(ce) | CTX_DESC_FORCE_RESTORE;
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index d5ed904f355d..6801f8b95c53 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -1293,7 +1293,7 @@ int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg)
 	if (msg)
 		drm_notice(&engine->i915->drm,
 			   "Resetting %s for %s\n", engine->name, msg);
-	atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]);
+	i915_increase_reset_engine_count(&engine->i915->gpu_error, engine);
 
 	ret = intel_gt_reset_engine(engine);
 	if (ret) {
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
index f602895f6d0d..6a3246240e81 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -849,13 +849,12 @@ void intel_sseu_print_topology(struct drm_i915_private *i915,
 			       const struct sseu_dev_info *sseu,
 			       struct drm_printer *p)
 {
-	if (sseu->max_slices == 0) {
+	if (sseu->max_slices == 0)
 		drm_printf(p, "Unavailable\n");
-	} else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
+	else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
 		sseu_print_xehp_topology(sseu, p);
-	} else {
+	else
 		sseu_print_hsw_topology(sseu, p);
-	}
 }
 
 void intel_sseu_print_ss_info(const char *type,
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 192ac0e59afa..3eacbc50caf8 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -777,6 +777,9 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine,
 
 	/* Wa_18019271663:dg2 */
 	wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE);
+
+	/* Wa_14019877138:dg2 */
+	wa_mcr_masked_en(wal, XEHP_PSS_CHICKEN, FD_END_COLLECT);
 }
 
 static void xelpg_ctx_gt_tuning_init(struct intel_engine_cs *engine,
@@ -1663,8 +1666,22 @@ xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
 }
 
 static void
+wa_16021867713(struct intel_gt *gt, struct i915_wa_list *wal)
+{
+	struct intel_engine_cs *engine;
+	int id;
+
+	for_each_engine(engine, gt, id)
+		if (engine->class == VIDEO_DECODE_CLASS)
+			wa_write_or(wal, VDBOX_CGCTL3F1C(engine->mmio_base),
+				    MFXPIPE_CLKGATE_DIS);
+}
+
+static void
 xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
 {
+	wa_16021867713(gt, wal);
+
 	/*
 	 * Wa_14018778641
 	 * Wa_18018781329
@@ -1674,6 +1691,9 @@ xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
 	 */
 	wa_write_or(wal, XELPMP_GSC_MOD_CTRL, FORCE_MISS_FTLB);
 
+	/* Wa_22016670082 */
+	wa_write_or(wal, GEN12_SQCNT1, GEN12_STRICT_RAR_ENABLE);
+
 	debug_dump_steering(gt);
 }
 
@@ -2340,14 +2360,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 			   0, true);
 	}
 
-	if (IS_DG2_G11(i915) || IS_DG2_G10(i915)) {
-		/* Wa_22014600077:dg2 */
-		wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
-			   _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH),
-			   0 /* Wa_14012342262 write-only reg, so skip verification */,
-			   true);
-	}
-
 	if (IS_DG2(i915) || IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) ||
 	    IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
 		/*
@@ -2782,6 +2794,11 @@ xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 			 RING_SEMA_WAIT_POLL(engine->mmio_base),
 			 1);
 	}
+	/* Wa_16018031267, Wa_16018063123 */
+	if (NEEDS_FASTCOLOR_BLT_WABB(engine))
+		wa_masked_field_set(wal, ECOSKPD(engine->mmio_base),
+				    XEHP_BLITTER_SCHEDULING_MODE_MASK,
+				    XEHP_BLITTER_ROUND_ROBIN_MODE);
 }
 
 static void
@@ -2915,6 +2932,9 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
 		 * Wa_22015475538:dg2
 		 */
 		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
+
+		/* Wa_18028616096 */
+		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3);
 	}
 
 	if (IS_DG2_G11(i915)) {
@@ -2943,11 +2963,6 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
 			   true);
 	}
 
-	if (IS_DG2_G10(i915) || IS_DG2_G12(i915)) {
-		/* Wa_18028616096 */
-		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3);
-	}
-
 	if (IS_XEHPSDV(i915)) {
 		/* Wa_1409954639 */
 		wa_mcr_masked_en(wal,
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
index 86cecf7a1105..5ffa5e30f419 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
@@ -21,20 +21,22 @@ static int cmp_u32(const void *A, const void *B)
 	return *a - *b;
 }
 
-static void perf_begin(struct intel_gt *gt)
+static intel_wakeref_t perf_begin(struct intel_gt *gt)
 {
-	intel_gt_pm_get(gt);
+	intel_wakeref_t wakeref = intel_gt_pm_get(gt);
 
 	/* Boost gpufreq to max [waitboost] and keep it fixed */
 	atomic_inc(&gt->rps.num_waiters);
 	queue_work(gt->i915->unordered_wq, &gt->rps.work);
 	flush_work(&gt->rps.work);
+
+	return wakeref;
 }
 
-static int perf_end(struct intel_gt *gt)
+static int perf_end(struct intel_gt *gt, intel_wakeref_t wakeref)
 {
 	atomic_dec(&gt->rps.num_waiters);
-	intel_gt_pm_put(gt);
+	intel_gt_pm_put(gt, wakeref);
 
 	return igt_flush_test(gt->i915);
 }
@@ -133,12 +135,13 @@ static int perf_mi_bb_start(void *arg)
 	struct intel_gt *gt = arg;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
 	int err = 0;
 
 	if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */
 		return 0;
 
-	perf_begin(gt);
+	wakeref = perf_begin(gt);
 	for_each_engine(engine, gt, id) {
 		struct intel_context *ce = engine->kernel_context;
 		struct i915_vma *batch;
@@ -207,7 +210,7 @@ out:
 		pr_info("%s: MI_BB_START cycles: %u\n",
 			engine->name, trifilter(cycles));
 	}
-	if (perf_end(gt))
+	if (perf_end(gt, wakeref))
 		err = -EIO;
 
 	return err;
@@ -260,12 +263,13 @@ static int perf_mi_noop(void *arg)
 	struct intel_gt *gt = arg;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
 	int err = 0;
 
 	if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */
 		return 0;
 
-	perf_begin(gt);
+	wakeref = perf_begin(gt);
 	for_each_engine(engine, gt, id) {
 		struct intel_context *ce = engine->kernel_context;
 		struct i915_vma *base, *nop;
@@ -364,7 +368,7 @@ out:
 		pr_info("%s: 16K MI_NOOP cycles: %u\n",
 			engine->name, trifilter(cycles));
 	}
-	if (perf_end(gt))
+	if (perf_end(gt, wakeref))
 		err = -EIO;
 
 	return err;
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
index 273d440a53e3..bc441ce7b380 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
@@ -84,7 +84,7 @@ static struct pulse *pulse_create(void)
 
 static void pulse_unlock_wait(struct pulse *p)
 {
-	i915_active_unlock_wait(&p->active);
+	wait_var_event_timeout(&p->active, i915_active_is_idle(&p->active), HZ);
 }
 
 static int __live_idle_pulse(struct intel_engine_cs *engine,
diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
index 0971241707ce..33351deeea4f 100644
--- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
@@ -81,6 +81,7 @@ static int live_gt_clocks(void *arg)
 	struct intel_gt *gt = arg;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
 	int err = 0;
 
 	if (!gt->clock_frequency) { /* unknown */
@@ -91,7 +92,7 @@ static int live_gt_clocks(void *arg)
 	if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */
 		return 0;
 
-	intel_gt_pm_get(gt);
+	wakeref = intel_gt_pm_get(gt);
 	intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
 
 	for_each_engine(engine, gt, id) {
@@ -128,7 +129,7 @@ static int live_gt_clocks(void *arg)
 	}
 
 	intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
-	intel_gt_pm_put(gt);
+	intel_gt_pm_put(gt, wakeref);
 
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 5f826b6dcf5d..e17b8777d21d 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -1555,7 +1555,7 @@ static int live_lrc_isolation(void *arg)
 	return err;
 }
 
-static int indirect_ctx_submit_req(struct intel_context *ce)
+static int wabb_ctx_submit_req(struct intel_context *ce)
 {
 	struct i915_request *rq;
 	int err = 0;
@@ -1579,7 +1579,8 @@ static int indirect_ctx_submit_req(struct intel_context *ce)
 #define CTX_BB_CANARY_INDEX  (CTX_BB_CANARY_OFFSET / sizeof(u32))
 
 static u32 *
-emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
+emit_wabb_ctx_canary(const struct intel_context *ce,
+		     u32 *cs, bool per_ctx)
 {
 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 |
 		MI_SRM_LRM_GLOBAL_GTT |
@@ -1587,26 +1588,43 @@ emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
 	*cs++ = i915_mmio_reg_offset(RING_START(0));
 	*cs++ = i915_ggtt_offset(ce->state) +
 		context_wa_bb_offset(ce) +
-		CTX_BB_CANARY_OFFSET;
+		CTX_BB_CANARY_OFFSET +
+		(per_ctx ? PAGE_SIZE : 0);
 	*cs++ = 0;
 
 	return cs;
 }
 
+static u32 *
+emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
+{
+	return emit_wabb_ctx_canary(ce, cs, false);
+}
+
+static u32 *
+emit_per_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
+{
+	return emit_wabb_ctx_canary(ce, cs, true);
+}
+
 static void
-indirect_ctx_bb_setup(struct intel_context *ce)
+wabb_ctx_setup(struct intel_context *ce, bool per_ctx)
 {
-	u32 *cs = context_indirect_bb(ce);
+	u32 *cs = context_wabb(ce, per_ctx);
 
 	cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d;
 
-	setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary);
+	if (per_ctx)
+		setup_per_ctx_bb(ce, ce->engine, emit_per_ctx_bb_canary);
+	else
+		setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary);
 }
 
-static bool check_ring_start(struct intel_context *ce)
+static bool check_ring_start(struct intel_context *ce, bool per_ctx)
 {
 	const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) -
-		LRC_STATE_OFFSET + context_wa_bb_offset(ce);
+		LRC_STATE_OFFSET + context_wa_bb_offset(ce) +
+		(per_ctx ? PAGE_SIZE : 0);
 
 	if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START])
 		return true;
@@ -1618,21 +1636,21 @@ static bool check_ring_start(struct intel_context *ce)
 	return false;
 }
 
-static int indirect_ctx_bb_check(struct intel_context *ce)
+static int wabb_ctx_check(struct intel_context *ce, bool per_ctx)
 {
 	int err;
 
-	err = indirect_ctx_submit_req(ce);
+	err = wabb_ctx_submit_req(ce);
 	if (err)
 		return err;
 
-	if (!check_ring_start(ce))
+	if (!check_ring_start(ce, per_ctx))
 		return -EINVAL;
 
 	return 0;
 }
 
-static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine)
+static int __lrc_wabb_ctx(struct intel_engine_cs *engine, bool per_ctx)
 {
 	struct intel_context *a, *b;
 	int err;
@@ -1667,14 +1685,14 @@ static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine)
 	 * As ring start is restored apriori of starting the indirect ctx bb and
 	 * as it will be different for each context, it fits to this purpose.
 	 */
-	indirect_ctx_bb_setup(a);
-	indirect_ctx_bb_setup(b);
+	wabb_ctx_setup(a, per_ctx);
+	wabb_ctx_setup(b, per_ctx);
 
-	err = indirect_ctx_bb_check(a);
+	err = wabb_ctx_check(a, per_ctx);
 	if (err)
 		goto unpin_b;
 
-	err = indirect_ctx_bb_check(b);
+	err = wabb_ctx_check(b, per_ctx);
 
 unpin_b:
 	intel_context_unpin(b);
@@ -1688,7 +1706,7 @@ put_a:
 	return err;
 }
 
-static int live_lrc_indirect_ctx_bb(void *arg)
+static int lrc_wabb_ctx(void *arg, bool per_ctx)
 {
 	struct intel_gt *gt = arg;
 	struct intel_engine_cs *engine;
@@ -1697,7 +1715,7 @@ static int live_lrc_indirect_ctx_bb(void *arg)
 
 	for_each_engine(engine, gt, id) {
 		intel_engine_pm_get(engine);
-		err = __live_lrc_indirect_ctx_bb(engine);
+		err = __lrc_wabb_ctx(engine, per_ctx);
 		intel_engine_pm_put(engine);
 
 		if (igt_flush_test(gt->i915))
@@ -1710,6 +1728,16 @@ static int live_lrc_indirect_ctx_bb(void *arg)
 	return err;
 }
 
+static int live_lrc_indirect_ctx_bb(void *arg)
+{
+	return lrc_wabb_ctx(arg, false);
+}
+
+static int live_lrc_per_ctx_bb(void *arg)
+{
+	return lrc_wabb_ctx(arg, true);
+}
+
 static void garbage_reset(struct intel_engine_cs *engine,
 			  struct i915_request *rq)
 {
@@ -1947,6 +1975,7 @@ int intel_lrc_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_lrc_garbage),
 		SUBTEST(live_pphwsp_runtime),
 		SUBTEST(live_lrc_indirect_ctx_bb),
+		SUBTEST(live_lrc_per_ctx_bb),
 	};
 
 	if (!HAS_LOGICAL_RING_CONTEXTS(i915))
diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c
index 79aa6ac66ad2..f40de408cd3a 100644
--- a/drivers/gpu/drm/i915/gt/selftest_reset.c
+++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
@@ -261,11 +261,12 @@ static int igt_atomic_reset(void *arg)
 {
 	struct intel_gt *gt = arg;
 	const typeof(*igt_atomic_phases) *p;
+	intel_wakeref_t wakeref;
 	int err = 0;
 
 	/* Check that the resets are usable from atomic context */
 
-	intel_gt_pm_get(gt);
+	wakeref = intel_gt_pm_get(gt);
 	igt_global_reset_lock(gt);
 
 	/* Flush any requests before we get started and check basics */
@@ -296,7 +297,7 @@ static int igt_atomic_reset(void *arg)
 
 unlock:
 	igt_global_reset_unlock(gt);
-	intel_gt_pm_put(gt);
+	intel_gt_pm_put(gt, wakeref);
 
 	return err;
 }
@@ -307,6 +308,7 @@ static int igt_atomic_engine_reset(void *arg)
 	const typeof(*igt_atomic_phases) *p;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
 	int err = 0;
 
 	/* Check that the resets are usable from atomic context */
@@ -317,7 +319,7 @@ static int igt_atomic_engine_reset(void *arg)
 	if (intel_uc_uses_guc_submission(&gt->uc))
 		return 0;
 
-	intel_gt_pm_get(gt);
+	wakeref = intel_gt_pm_get(gt);
 	igt_global_reset_lock(gt);
 
 	/* Flush any requests before we get started and check basics */
@@ -365,7 +367,7 @@ static int igt_atomic_engine_reset(void *arg)
 
 out_unlock:
 	igt_global_reset_unlock(gt);
-	intel_gt_pm_put(gt);
+	intel_gt_pm_put(gt, wakeref);
 
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
index fb30f733b036..dcef8d498919 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
@@ -224,6 +224,7 @@ int live_rps_clock_interval(void *arg)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 	struct igt_spinner spin;
+	intel_wakeref_t wakeref;
 	int err = 0;
 
 	if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
@@ -236,7 +237,7 @@ int live_rps_clock_interval(void *arg)
 	saved_work = rps->work.func;
 	rps->work.func = dummy_rps_work;
 
-	intel_gt_pm_get(gt);
+	wakeref = intel_gt_pm_get(gt);
 	intel_rps_disable(&gt->rps);
 
 	intel_gt_check_clock_frequency(gt);
@@ -355,7 +356,7 @@ int live_rps_clock_interval(void *arg)
 	}
 
 	intel_rps_enable(&gt->rps);
-	intel_gt_pm_put(gt);
+	intel_gt_pm_put(gt, wakeref);
 
 	igt_spinner_fini(&spin);
 
@@ -376,6 +377,7 @@ int live_rps_control(void *arg)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 	struct igt_spinner spin;
+	intel_wakeref_t wakeref;
 	int err = 0;
 
 	/*
@@ -398,7 +400,7 @@ int live_rps_control(void *arg)
 	saved_work = rps->work.func;
 	rps->work.func = dummy_rps_work;
 
-	intel_gt_pm_get(gt);
+	wakeref = intel_gt_pm_get(gt);
 	for_each_engine(engine, gt, id) {
 		struct i915_request *rq;
 		ktime_t min_dt, max_dt;
@@ -488,7 +490,7 @@ int live_rps_control(void *arg)
 			break;
 		}
 	}
-	intel_gt_pm_put(gt);
+	intel_gt_pm_put(gt, wakeref);
 
 	igt_spinner_fini(&spin);
 
@@ -1023,6 +1025,7 @@ int live_rps_interrupt(void *arg)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 	struct igt_spinner spin;
+	intel_wakeref_t wakeref;
 	u32 pm_events;
 	int err = 0;
 
@@ -1033,9 +1036,9 @@ int live_rps_interrupt(void *arg)
 	if (!intel_rps_has_interrupts(rps) || GRAPHICS_VER(gt->i915) < 6)
 		return 0;
 
-	intel_gt_pm_get(gt);
-	pm_events = rps->pm_events;
-	intel_gt_pm_put(gt);
+	pm_events = 0;
+	with_intel_gt_pm(gt, wakeref)
+		pm_events = rps->pm_events;
 	if (!pm_events) {
 		pr_err("No RPS PM events registered, but RPS is enabled?\n");
 		return -ENODEV;
diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c
index 952c8d52d68a..302d0540295d 100644
--- a/drivers/gpu/drm/i915/gt/selftest_slpc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c
@@ -266,6 +266,7 @@ static int run_test(struct intel_gt *gt, int test_type)
 	struct intel_rps *rps = &gt->rps;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
 	struct igt_spinner spin;
 	u32 slpc_min_freq, slpc_max_freq;
 	int err = 0;
@@ -311,7 +312,7 @@ static int run_test(struct intel_gt *gt, int test_type)
 	}
 
 	intel_gt_pm_wait_for_idle(gt);
-	intel_gt_pm_get(gt);
+	wakeref = intel_gt_pm_get(gt);
 	for_each_engine(engine, gt, id) {
 		struct i915_request *rq;
 		u32 max_act_freq;
@@ -397,7 +398,7 @@ static int run_test(struct intel_gt *gt, int test_type)
 	if (igt_flush_test(gt->i915))
 		err = -EIO;
 
-	intel_gt_pm_put(gt);
+	intel_gt_pm_put(gt, wakeref);
 	igt_spinner_fini(&spin);
 	intel_gt_pm_wait_for_idle(gt);
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c
index 5f138de3c14f..40817ebcca71 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c
@@ -322,6 +322,7 @@ static int i915_gsc_proxy_component_bind(struct device *i915_kdev,
 	gsc->proxy.component = data;
 	gsc->proxy.component->mei_dev = mei_kdev;
 	mutex_unlock(&gsc->proxy.mutex);
+	gt_dbg(gt, "GSC proxy mei component bound\n");
 
 	return 0;
 }
@@ -342,6 +343,7 @@ static void i915_gsc_proxy_component_unbind(struct device *i915_kdev,
 	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
 		intel_uncore_rmw(gt->uncore, HECI_H_CSR(MTL_GSC_HECI2_BASE),
 				 HECI_H_CSR_IE | HECI_H_CSR_RST, 0);
+	gt_dbg(gt, "GSC proxy mei component unbound\n");
 }
 
 static const struct component_ops i915_gsc_proxy_component_ops = {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 3f3df1166b86..2b450c43bbd7 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -330,7 +330,7 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc)
 
 static u32 guc_ctl_devid(struct intel_guc *guc)
 {
-	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+	struct drm_i915_private *i915 = guc_to_i915(guc);
 
 	return (INTEL_DEVID(i915) << 16) | INTEL_REVID(i915);
 }
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 2b6dfe62c8f2..813cc888e6fa 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -105,61 +105,67 @@ struct intel_guc {
 	 */
 	struct {
 		/**
-		 * @lock: protects everything in submission_state,
-		 * ce->guc_id.id, and ce->guc_id.ref when transitioning in and
-		 * out of zero
+		 * @submission_state.lock: protects everything in
+		 * submission_state, ce->guc_id.id, and ce->guc_id.ref
+		 * when transitioning in and out of zero
 		 */
 		spinlock_t lock;
 		/**
-		 * @guc_ids: used to allocate new guc_ids, single-lrc
+		 * @submission_state.guc_ids: used to allocate new
+		 * guc_ids, single-lrc
 		 */
 		struct ida guc_ids;
 		/**
-		 * @num_guc_ids: Number of guc_ids, selftest feature to be able
-		 * to reduce this number while testing.
+		 * @submission_state.num_guc_ids: Number of guc_ids, selftest
+		 * feature to be able to reduce this number while testing.
 		 */
 		int num_guc_ids;
 		/**
-		 * @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc
+		 * @submission_state.guc_ids_bitmap: used to allocate
+		 * new guc_ids, multi-lrc
 		 */
 		unsigned long *guc_ids_bitmap;
 		/**
-		 * @guc_id_list: list of intel_context with valid guc_ids but no
-		 * refs
+		 * @submission_state.guc_id_list: list of intel_context
+		 * with valid guc_ids but no refs
 		 */
 		struct list_head guc_id_list;
 		/**
-		 * @guc_ids_in_use: Number single-lrc guc_ids in use
+		 * @submission_state.guc_ids_in_use: Number single-lrc
+		 * guc_ids in use
 		 */
 		unsigned int guc_ids_in_use;
 		/**
-		 * @destroyed_contexts: list of contexts waiting to be destroyed
-		 * (deregistered with the GuC)
+		 * @submission_state.destroyed_contexts: list of contexts
+		 * waiting to be destroyed (deregistered with the GuC)
 		 */
 		struct list_head destroyed_contexts;
 		/**
-		 * @destroyed_worker: worker to deregister contexts, need as we
-		 * need to take a GT PM reference and can't from destroy
-		 * function as it might be in an atomic context (no sleeping)
+		 * @submission_state.destroyed_worker: worker to deregister
+		 * contexts, need as we need to take a GT PM reference and
+		 * can't from destroy function as it might be in an atomic
+		 * context (no sleeping)
 		 */
 		struct work_struct destroyed_worker;
 		/**
-		 * @reset_fail_worker: worker to trigger a GT reset after an
-		 * engine reset fails
+		 * @submission_state.reset_fail_worker: worker to trigger
+		 * a GT reset after an engine reset fails
 		 */
 		struct work_struct reset_fail_worker;
 		/**
-		 * @reset_fail_mask: mask of engines that failed to reset
+		 * @submission_state.reset_fail_mask: mask of engines that
+		 * failed to reset
 		 */
 		intel_engine_mask_t reset_fail_mask;
 		/**
-		 * @sched_disable_delay_ms: schedule disable delay, in ms, for
-		 * contexts
+		 * @submission_state.sched_disable_delay_ms: schedule
+		 * disable delay, in ms, for contexts
 		 */
 		unsigned int sched_disable_delay_ms;
 		/**
-		 * @sched_disable_gucid_threshold: threshold of min remaining available
-		 * guc_ids before we start bypassing the schedule disable delay
+		 * @submission_state.sched_disable_gucid_threshold:
+		 * threshold of min remaining available guc_ids before
+		 * we start bypassing the schedule disable delay
 		 */
 		unsigned int sched_disable_gucid_threshold;
 	} submission_state;
@@ -243,37 +249,40 @@ struct intel_guc {
 	 */
 	struct {
 		/**
-		 * @lock: Lock protecting the below fields and the engine stats.
+		 * @timestamp.lock: Lock protecting the below fields and
+		 * the engine stats.
 		 */
 		spinlock_t lock;
 
 		/**
-		 * @gt_stamp: 64 bit extended value of the GT timestamp.
+		 * @timestamp.gt_stamp: 64-bit extended value of the GT
+		 * timestamp.
 		 */
 		u64 gt_stamp;
 
 		/**
-		 * @ping_delay: Period for polling the GT timestamp for
-		 * overflow.
+		 * @timestamp.ping_delay: Period for polling the GT
+		 * timestamp for overflow.
 		 */
 		unsigned long ping_delay;
 
 		/**
-		 * @work: Periodic work to adjust GT timestamp, engine and
-		 * context usage for overflows.
+		 * @timestamp.work: Periodic work to adjust GT timestamp,
+		 * engine and context usage for overflows.
 		 */
 		struct delayed_work work;
 
 		/**
-		 * @shift: Right shift value for the gpm timestamp
+		 * @timestamp.shift: Right shift value for the gpm timestamp
 		 */
 		u32 shift;
 
 		/**
-		 * @last_stat_jiffies: jiffies at last actual stats collection time
-		 * We use this timestamp to ensure we don't oversample the
-		 * stats because runtime power management events can trigger
-		 * stats collection at much higher rates than required.
+		 * @timestamp.last_stat_jiffies: jiffies at last actual
+		 * stats collection time. We use this timestamp to ensure
+		 * we don't oversample the stats because runtime power
+		 * management events can trigger stats collection at much
+		 * higher rates than required.
 		 */
 		unsigned long last_stat_jiffies;
 	} timestamp;
@@ -297,6 +306,10 @@ struct intel_guc {
 	 * @number_guc_id_stolen: The number of guc_ids that have been stolen
 	 */
 	int number_guc_id_stolen;
+	/**
+	 * @fast_response_selftest: Backdoor to CT handler for fast response selftest
+	 */
+	u32 fast_response_selftest;
 #endif
 };
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
index a4da0208c883..a1cd40d80517 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
@@ -355,7 +355,7 @@ guc_capture_alloc_steered_lists(struct intel_guc *guc,
 static const struct __guc_mmio_reg_descr_group *
 guc_capture_get_device_reglist(struct intel_guc *guc)
 {
-	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+	struct drm_i915_private *i915 = guc_to_i915(guc);
 	const struct __guc_mmio_reg_descr_group *lists;
 
 	if (GRAPHICS_VER(i915) >= 12)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index 89e314b3756b..0d5197c0824a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -265,7 +265,7 @@ int intel_guc_ct_init(struct intel_guc_ct *ct)
 	u32 *cmds;
 	int err;
 
-	err = i915_inject_probe_error(guc_to_gt(guc)->i915, -ENXIO);
+	err = i915_inject_probe_error(guc_to_i915(guc), -ENXIO);
 	if (err)
 		return err;
 
@@ -1076,6 +1076,15 @@ static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r
 		found = true;
 		break;
 	}
+
+#ifdef CONFIG_DRM_I915_SELFTEST
+	if (!found && ct_to_guc(ct)->fast_response_selftest) {
+		CT_DEBUG(ct, "Assuming unsolicited response due to FAST_REQUEST selftest\n");
+		ct_to_guc(ct)->fast_response_selftest++;
+		found = true;
+	}
+#endif
+
 	if (!found) {
 		CT_ERROR(ct, "Unsolicited response message: len %u, data %#x (fence %u, last %u)\n",
 			 len, hxg[0], fence, ct->requests.last_fence);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
index 55bc8b55fbc0..bf16351c9349 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
@@ -520,7 +520,7 @@ void intel_guc_log_init_early(struct intel_guc_log *log)
 static int guc_log_relay_create(struct intel_guc_log *log)
 {
 	struct intel_guc *guc = log_to_guc(log);
-	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+	struct drm_i915_private *i915 = guc_to_i915(guc);
 	struct rchan *guc_log_relay_chan;
 	size_t n_subbufs, subbuf_size;
 	int ret;
@@ -573,7 +573,7 @@ static void guc_log_relay_destroy(struct intel_guc_log *log)
 static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log)
 {
 	struct intel_guc *guc = log_to_guc(log);
-	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+	struct drm_i915_private *i915 = guc_to_i915(guc);
 	intel_wakeref_t wakeref;
 
 	_guc_log_copy_debuglogs_for_relay(log);
@@ -589,7 +589,7 @@ static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log)
 static u32 __get_default_log_level(struct intel_guc_log *log)
 {
 	struct intel_guc *guc = log_to_guc(log);
-	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+	struct drm_i915_private *i915 = guc_to_i915(guc);
 
 	/* A negative value means "use platform/config default" */
 	if (i915->params.guc_log_level < 0) {
@@ -664,7 +664,7 @@ void intel_guc_log_destroy(struct intel_guc_log *log)
 int intel_guc_log_set_level(struct intel_guc_log *log, u32 level)
 {
 	struct intel_guc *guc = log_to_guc(log);
-	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+	struct drm_i915_private *i915 = guc_to_i915(guc);
 	intel_wakeref_t wakeref;
 	int ret = 0;
 
@@ -796,7 +796,7 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log)
 static void guc_log_relay_stop(struct intel_guc_log *log)
 {
 	struct intel_guc *guc = log_to_guc(log);
-	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+	struct drm_i915_private *i915 = guc_to_i915(guc);
 
 	if (!log->relay.started)
 		return;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c
index 1adec6de223c..9df7927304ae 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c
@@ -14,7 +14,7 @@ static bool __guc_rc_supported(struct intel_guc *guc)
 {
 	/* GuC RC is unavailable for pre-Gen12 */
 	return guc->submission_supported &&
-		GRAPHICS_VER(guc_to_gt(guc)->i915) >= 12;
+		GRAPHICS_VER(guc_to_i915(guc)) >= 12;
 }
 
 static bool __guc_rc_selected(struct intel_guc *guc)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index 2dfb07cc4b33..3e681ab6fbf9 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -34,7 +34,7 @@ static bool __detect_slpc_supported(struct intel_guc *guc)
 {
 	/* GuC SLPC is unavailable for pre-Gen12 */
 	return guc->submission_supported &&
-		GRAPHICS_VER(guc_to_gt(guc)->i915) >= 12;
+		GRAPHICS_VER(guc_to_i915(guc)) >= 12;
 }
 
 static bool __guc_slpc_selected(struct intel_guc *guc)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index d37698bd6b91..a259f1118c5a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1107,7 +1107,7 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
 			if (deregister)
 				guc_signal_context_fence(ce);
 			if (destroyed) {
-				intel_gt_pm_put_async(guc_to_gt(guc));
+				intel_gt_pm_put_async_untracked(guc_to_gt(guc));
 				release_guc_id(guc, ce);
 				__guc_context_destroy(ce);
 			}
@@ -1303,6 +1303,7 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
 	unsigned long flags;
 	u32 reset_count;
 	bool in_reset;
+	intel_wakeref_t wakeref;
 
 	spin_lock_irqsave(&guc->timestamp.lock, flags);
 
@@ -1325,7 +1326,8 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
 	 * start_gt_clk is derived from GuC state. To get a consistent
 	 * view of activity, we query the GuC state only if gt is awake.
 	 */
-	if (!in_reset && intel_gt_pm_get_if_awake(gt)) {
+	wakeref = in_reset ? 0 : intel_gt_pm_get_if_awake(gt);
+	if (wakeref) {
 		stats_saved = *stats;
 		gt_stamp_saved = guc->timestamp.gt_stamp;
 		/*
@@ -1334,7 +1336,7 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
 		 */
 		guc_update_engine_gt_clks(engine);
 		guc_update_pm_timestamp(guc, now);
-		intel_gt_pm_put_async(gt);
+		intel_gt_pm_put_async(gt, wakeref);
 		if (i915_reset_count(gpu_error) != reset_count) {
 			*stats = stats_saved;
 			guc->timestamp.gt_stamp = gt_stamp_saved;
@@ -3385,9 +3387,9 @@ static void destroyed_worker_func(struct work_struct *w)
 	struct intel_guc *guc = container_of(w, struct intel_guc,
 					     submission_state.destroyed_worker);
 	struct intel_gt *gt = guc_to_gt(guc);
-	int tmp;
+	intel_wakeref_t wakeref;
 
-	with_intel_gt_pm(gt, tmp)
+	with_intel_gt_pm(gt, wakeref)
 		deregister_destroyed_contexts(guc);
 }
 
@@ -4624,12 +4626,12 @@ static bool __guc_submission_supported(struct intel_guc *guc)
 {
 	/* GuC submission is unavailable for pre-Gen11 */
 	return intel_guc_is_supported(guc) &&
-	       GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11;
+	       GRAPHICS_VER(guc_to_i915(guc)) >= 11;
 }
 
 static bool __guc_submission_selected(struct intel_guc *guc)
 {
-	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+	struct drm_i915_private *i915 = guc_to_i915(guc);
 
 	if (!intel_guc_submission_is_supported(guc))
 		return false;
@@ -4894,7 +4896,7 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
 		intel_context_put(ce);
 	} else if (context_destroyed(ce)) {
 		/* Context has been destroyed */
-		intel_gt_pm_put_async(guc_to_gt(guc));
+		intel_gt_pm_put_async_untracked(guc_to_gt(guc));
 		release_guc_id(guc, ce);
 		__guc_context_destroy(ce);
 	}
@@ -5001,7 +5003,8 @@ static void capture_error_state(struct intel_guc *guc,
 			if (match) {
 				intel_engine_set_hung_context(e, ce);
 				engine_mask |= e->mask;
-				atomic_inc(&i915->gpu_error.reset_engine_count[e->uabi_class]);
+				i915_increase_reset_engine_count(&i915->gpu_error,
+								 e);
 			}
 		}
 
@@ -5013,7 +5016,7 @@ static void capture_error_state(struct intel_guc *guc,
 	} else {
 		intel_engine_set_hung_context(ce->engine, ce);
 		engine_mask = ce->engine->mask;
-		atomic_inc(&i915->gpu_error.reset_engine_count[ce->engine->uabi_class]);
+		i915_increase_reset_engine_count(&i915->gpu_error, ce->engine);
 	}
 
 	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 27f6561dd731..3872d309ed31 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -106,11 +106,6 @@ static void __confirm_options(struct intel_uc *uc)
 		gt_info(gt,  "Incompatible option enable_guc=%d - %s\n",
 			i915->params.enable_guc, "GuC is not supported!");
 
-	if (i915->params.enable_guc & ENABLE_GUC_LOAD_HUC &&
-	    !intel_uc_supports_huc(uc))
-		gt_info(gt, "Incompatible option enable_guc=%d - %s\n",
-			i915->params.enable_guc, "HuC is not supported!");
-
 	if (i915->params.enable_guc & ENABLE_GUC_SUBMISSION &&
 	    !intel_uc_supports_guc_submission(uc))
 		gt_info(gt, "Incompatible option enable_guc=%d - %s\n",
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index 362639162ed6..756093eaf2ad 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -1343,16 +1343,13 @@ size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len)
 
 		for_each_sgt_page(page, iter, uc_fw->obj->mm.pages) {
 			u32 len = min_t(u32, size, PAGE_SIZE - offset);
-			void *vaddr;
 
 			if (idx > 0) {
 				idx--;
 				continue;
 			}
 
-			vaddr = kmap_atomic(page);
-			memcpy(dst, vaddr + offset, len);
-			kunmap_atomic(vaddr);
+			memcpy_from_page(dst, page, offset, len);
 
 			offset = 0;
 			dst += len;
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
index bfb72143566f..c900aac85adb 100644
--- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
@@ -286,11 +286,126 @@ err_wakeref:
 	return ret;
 }
 
+/*
+ * Send a context schedule H2G message with an invalid context id.
+ * This should generate a GUC_RESULT_INVALID_CONTEXT response.
+ */
+static int bad_h2g(struct intel_guc *guc)
+{
+	u32 action[] = {
+	   INTEL_GUC_ACTION_SCHED_CONTEXT,
+	   0x12345678,
+	};
+
+	return intel_guc_send_nb(guc, action, ARRAY_SIZE(action), 0);
+}
+
+/*
+ * Set a spinner running to make sure the system is alive and active,
+ * then send a bad but asynchronous H2G command and wait to see if an
+ * error response is returned. If no response is received or if the
+ * spinner dies then the test will fail.
+ */
+#define FAST_RESPONSE_TIMEOUT_MS	1000
+static int intel_guc_fast_request(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_context *ce;
+	struct igt_spinner spin;
+	struct i915_request *rq;
+	intel_wakeref_t wakeref;
+	struct intel_engine_cs *engine = intel_selftest_find_any_engine(gt);
+	bool spinning = false;
+	int ret = 0;
+
+	if (!engine)
+		return 0;
+
+	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce)) {
+		ret = PTR_ERR(ce);
+		gt_err(gt, "Failed to create spinner request: %pe\n", ce);
+		goto err_pm;
+	}
+
+	ret = igt_spinner_init(&spin, engine->gt);
+	if (ret) {
+		gt_err(gt, "Failed to create spinner: %pe\n", ERR_PTR(ret));
+		goto err_pm;
+	}
+	spinning = true;
+
+	rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
+	intel_context_put(ce);
+	if (IS_ERR(rq)) {
+		ret = PTR_ERR(rq);
+		gt_err(gt, "Failed to create spinner request: %pe\n", rq);
+		goto err_spin;
+	}
+
+	ret = request_add_spin(rq, &spin);
+	if (ret) {
+		gt_err(gt, "Failed to add Spinner request: %pe\n", ERR_PTR(ret));
+		goto err_rq;
+	}
+
+	gt->uc.guc.fast_response_selftest = 1;
+
+	ret = bad_h2g(&gt->uc.guc);
+	if (ret) {
+		gt_err(gt, "Failed to send H2G: %pe\n", ERR_PTR(ret));
+		goto err_rq;
+	}
+
+	ret = wait_for(gt->uc.guc.fast_response_selftest != 1 || i915_request_completed(rq),
+		       FAST_RESPONSE_TIMEOUT_MS);
+	if (ret) {
+		gt_err(gt, "Request wait failed: %pe\n", ERR_PTR(ret));
+		goto err_rq;
+	}
+
+	if (i915_request_completed(rq)) {
+		gt_err(gt, "Spinner died waiting for fast request error!\n");
+		ret = -EIO;
+		goto err_rq;
+	}
+
+	if (gt->uc.guc.fast_response_selftest != 2) {
+		gt_err(gt, "Unexpected fast response count: %d\n",
+		       gt->uc.guc.fast_response_selftest);
+		goto err_rq;
+	}
+
+	igt_spinner_end(&spin);
+	spinning = false;
+
+	ret = intel_selftest_wait_for_rq(rq);
+	if (ret) {
+		gt_err(gt, "Request failed to complete: %pe\n", ERR_PTR(ret));
+		goto err_rq;
+	}
+
+err_rq:
+	i915_request_put(rq);
+
+err_spin:
+	if (spinning)
+		igt_spinner_end(&spin);
+	igt_spinner_fini(&spin);
+
+err_pm:
+	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
+	return ret;
+}
+
 int intel_guc_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(intel_guc_scrub_ctbs),
 		SUBTEST(intel_guc_steal_guc_ids),
+		SUBTEST(intel_guc_fast_request),
 	};
 	struct intel_gt *gt = to_gt(i915);
 
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c
index 34b5d952e2bc..26fdc392fce6 100644
--- a/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c
@@ -74,7 +74,7 @@ static int intel_hang_guc(void *arg)
 		goto err;
 	}
 
-	rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
+	rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
 	intel_context_put(ce);
 	if (IS_ERR(rq)) {
 		ret = PTR_ERR(rq);
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 90f6c1ece57d..efcb00472be2 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -2849,8 +2849,7 @@ static int handle_mmio(struct intel_gvt_mmio_table_iter *iter, u32 offset,
 	for (i = start; i < end; i += 4) {
 		p = intel_gvt_find_mmio_info(gvt, i);
 		if (p) {
-			WARN(1, "dup mmio definition offset %x\n",
-				info->offset);
+			WARN(1, "dup mmio definition offset %x\n", i);
 
 			/* We return -EEXIST here to make GVT-g load fail.
 			 * So duplicated MMIO can be found as soon as
diff --git a/drivers/gpu/drm/i915/gvt/interrupt.c b/drivers/gpu/drm/i915/gvt/interrupt.c
index de3f5903d1a7..c8e7dfc9f791 100644
--- a/drivers/gpu/drm/i915/gvt/interrupt.c
+++ b/drivers/gpu/drm/i915/gvt/interrupt.c
@@ -422,7 +422,7 @@ static void init_irq_map(struct intel_gvt_irq *irq)
 #define MSI_CAP_DATA(offset) (offset + 8)
 #define MSI_CAP_EN 0x1
 
-static int inject_virtual_interrupt(struct intel_vgpu *vgpu)
+static void inject_virtual_interrupt(struct intel_vgpu *vgpu)
 {
 	unsigned long offset = vgpu->gvt->device_info.msi_cap_offset;
 	u16 control, data;
@@ -434,10 +434,10 @@ static int inject_virtual_interrupt(struct intel_vgpu *vgpu)
 
 	/* Do not generate MSI if MSIEN is disabled */
 	if (!(control & MSI_CAP_EN))
-		return 0;
+		return;
 
 	if (WARN(control & GENMASK(15, 1), "only support one MSI format\n"))
-		return -EINVAL;
+		return;
 
 	trace_inject_msi(vgpu->id, addr, data);
 
@@ -451,10 +451,9 @@ static int inject_virtual_interrupt(struct intel_vgpu *vgpu)
 	 * returned and don't inject interrupt into guest.
 	 */
 	if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
-		return -ESRCH;
-	if (vgpu->msi_trigger && eventfd_signal(vgpu->msi_trigger, 1) != 1)
-		return -EFAULT;
-	return 0;
+		return;
+	if (vgpu->msi_trigger)
+		eventfd_signal(vgpu->msi_trigger);
 }
 
 static void propagate_event(struct intel_gvt_irq *irq,
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index ddf49c2dbb91..2905df83e180 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1211,11 +1211,11 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
 		for (n = offset >> PAGE_SHIFT; remain; n++) {
 			int len = min(remain, PAGE_SIZE - x);
 
-			src = kmap_atomic(i915_gem_object_get_page(src_obj, n));
+			src = kmap_local_page(i915_gem_object_get_page(src_obj, n));
 			if (src_needs_clflush)
 				drm_clflush_virt_range(src + x, len);
 			memcpy(ptr, src + x, len);
-			kunmap_atomic(src);
+			kunmap_local(src);
 
 			ptr += len;
 			remain -= len;
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index bfe92d2402ea..db99c2ef66db 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -51,6 +51,7 @@
 #include "i915_debugfs.h"
 #include "i915_debugfs_params.h"
 #include "i915_driver.h"
+#include "i915_gpu_error.h"
 #include "i915_irq.h"
 #include "i915_reg.h"
 #include "i915_scheduler.h"
@@ -299,107 +300,6 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 	return 0;
 }
 
-#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
-static ssize_t gpu_state_read(struct file *file, char __user *ubuf,
-			      size_t count, loff_t *pos)
-{
-	struct i915_gpu_coredump *error;
-	ssize_t ret;
-	void *buf;
-
-	error = file->private_data;
-	if (!error)
-		return 0;
-
-	/* Bounce buffer required because of kernfs __user API convenience. */
-	buf = kmalloc(count, GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
-
-	ret = i915_gpu_coredump_copy_to_buffer(error, buf, *pos, count);
-	if (ret <= 0)
-		goto out;
-
-	if (!copy_to_user(ubuf, buf, ret))
-		*pos += ret;
-	else
-		ret = -EFAULT;
-
-out:
-	kfree(buf);
-	return ret;
-}
-
-static int gpu_state_release(struct inode *inode, struct file *file)
-{
-	i915_gpu_coredump_put(file->private_data);
-	return 0;
-}
-
-static int i915_gpu_info_open(struct inode *inode, struct file *file)
-{
-	struct drm_i915_private *i915 = inode->i_private;
-	struct i915_gpu_coredump *gpu;
-	intel_wakeref_t wakeref;
-
-	gpu = NULL;
-	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
-		gpu = i915_gpu_coredump(to_gt(i915), ALL_ENGINES, CORE_DUMP_FLAG_NONE);
-
-	if (IS_ERR(gpu))
-		return PTR_ERR(gpu);
-
-	file->private_data = gpu;
-	return 0;
-}
-
-static const struct file_operations i915_gpu_info_fops = {
-	.owner = THIS_MODULE,
-	.open = i915_gpu_info_open,
-	.read = gpu_state_read,
-	.llseek = default_llseek,
-	.release = gpu_state_release,
-};
-
-static ssize_t
-i915_error_state_write(struct file *filp,
-		       const char __user *ubuf,
-		       size_t cnt,
-		       loff_t *ppos)
-{
-	struct i915_gpu_coredump *error = filp->private_data;
-
-	if (!error)
-		return 0;
-
-	drm_dbg(&error->i915->drm, "Resetting error state\n");
-	i915_reset_error_state(error->i915);
-
-	return cnt;
-}
-
-static int i915_error_state_open(struct inode *inode, struct file *file)
-{
-	struct i915_gpu_coredump *error;
-
-	error = i915_first_error_state(inode->i_private);
-	if (IS_ERR(error))
-		return PTR_ERR(error);
-
-	file->private_data  = error;
-	return 0;
-}
-
-static const struct file_operations i915_error_state_fops = {
-	.owner = THIS_MODULE,
-	.open = i915_error_state_open,
-	.read = gpu_state_read,
-	.write = i915_error_state_write,
-	.llseek = default_llseek,
-	.release = gpu_state_release,
-};
-#endif
-
 static int i915_frequency_info(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *i915 = node_to_i915(m->private);
@@ -839,10 +739,6 @@ static const struct i915_debugfs_files {
 	{"i915_perf_noa_delay", &i915_perf_noa_delay_fops},
 	{"i915_wedged", &i915_wedged_fops},
 	{"i915_gem_drop_caches", &i915_drop_caches_fops},
-#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
-	{"i915_error_state", &i915_error_state_fops},
-	{"i915_gpu_info", &i915_gpu_info_fops},
-#endif
 };
 
 void i915_debugfs_register(struct drm_i915_private *dev_priv)
@@ -865,4 +761,6 @@ void i915_debugfs_register(struct drm_i915_private *dev_priv)
 	drm_debugfs_create_files(i915_debugfs_list,
 				 ARRAY_SIZE(i915_debugfs_list),
 				 minor->debugfs_root, minor);
+
+	i915_gpu_error_debugfs_register(dev_priv);
 }
diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
index 2a1faf403965..c7d7c3b7ecc6 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -798,7 +798,9 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (ret)
 		goto out_cleanup_modeset2;
 
-	intel_pxp_init(i915);
+	ret = intel_pxp_init(i915);
+	if (ret != -ENODEV)
+		drm_dbg(&i915->drm, "pxp init failed with %d\n", ret);
 
 	ret = intel_display_driver_probe(i915);
 	if (ret)
@@ -1033,7 +1035,7 @@ void i915_driver_shutdown(struct drm_i915_private *i915)
 	intel_power_domains_driver_remove(i915);
 	enable_rpm_wakeref_asserts(&i915->runtime_pm);
 
-	intel_runtime_pm_driver_release(&i915->runtime_pm);
+	intel_runtime_pm_driver_last_release(&i915->runtime_pm);
 }
 
 static bool suspend_to_idle(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c
index 2a44b3876cb5..fa6852713bee 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.c
+++ b/drivers/gpu/drm/i915/i915_drm_client.c
@@ -28,6 +28,10 @@ struct i915_drm_client *i915_drm_client_alloc(void)
 	kref_init(&client->kref);
 	spin_lock_init(&client->ctx_lock);
 	INIT_LIST_HEAD(&client->ctx_list);
+#ifdef CONFIG_PROC_FS
+	spin_lock_init(&client->objects_lock);
+	INIT_LIST_HEAD(&client->objects_list);
+#endif
 
 	return client;
 }
@@ -41,6 +45,68 @@ void __i915_drm_client_free(struct kref *kref)
 }
 
 #ifdef CONFIG_PROC_FS
+static void
+obj_meminfo(struct drm_i915_gem_object *obj,
+	    struct drm_memory_stats stats[INTEL_REGION_UNKNOWN])
+{
+	const enum intel_region_id id = obj->mm.region ?
+					obj->mm.region->id : INTEL_REGION_SMEM;
+	const u64 sz = obj->base.size;
+
+	if (obj->base.handle_count > 1)
+		stats[id].shared += sz;
+	else
+		stats[id].private += sz;
+
+	if (i915_gem_object_has_pages(obj)) {
+		stats[id].resident += sz;
+
+		if (!dma_resv_test_signaled(obj->base.resv,
+					    DMA_RESV_USAGE_BOOKKEEP))
+			stats[id].active += sz;
+		else if (i915_gem_object_is_shrinkable(obj) &&
+			 obj->mm.madv == I915_MADV_DONTNEED)
+			stats[id].purgeable += sz;
+	}
+}
+
+static void show_meminfo(struct drm_printer *p, struct drm_file *file)
+{
+	struct drm_memory_stats stats[INTEL_REGION_UNKNOWN] = {};
+	struct drm_i915_file_private *fpriv = file->driver_priv;
+	struct i915_drm_client *client = fpriv->client;
+	struct drm_i915_private *i915 = fpriv->i915;
+	struct drm_i915_gem_object *obj;
+	struct intel_memory_region *mr;
+	struct list_head __rcu *pos;
+	unsigned int id;
+
+	/* Public objects. */
+	spin_lock(&file->table_lock);
+	idr_for_each_entry(&file->object_idr, obj, id)
+		obj_meminfo(obj, stats);
+	spin_unlock(&file->table_lock);
+
+	/* Internal objects. */
+	rcu_read_lock();
+	list_for_each_rcu(pos, &client->objects_list) {
+		obj = i915_gem_object_get_rcu(list_entry(pos, typeof(*obj),
+							 client_link));
+		if (!obj)
+			continue;
+		obj_meminfo(obj, stats);
+		i915_gem_object_put(obj);
+	}
+	rcu_read_unlock();
+
+	for_each_memory_region(mr, i915, id)
+		drm_print_memory_stats(p,
+				       &stats[id],
+				       DRM_GEM_OBJECT_RESIDENT |
+				       DRM_GEM_OBJECT_PURGEABLE,
+				       mr->uabi_name);
+}
+
 static const char * const uabi_class_names[] = {
 	[I915_ENGINE_CLASS_RENDER] = "render",
 	[I915_ENGINE_CLASS_COPY] = "copy",
@@ -102,10 +168,52 @@ void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file)
 	 * ******************************************************************
 	 */
 
+	show_meminfo(p, file);
+
 	if (GRAPHICS_VER(i915) < 8)
 		return;
 
 	for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++)
 		show_client_class(p, i915, file_priv->client, i);
 }
+
+void i915_drm_client_add_object(struct i915_drm_client *client,
+				struct drm_i915_gem_object *obj)
+{
+	unsigned long flags;
+
+	GEM_WARN_ON(obj->client);
+	GEM_WARN_ON(!list_empty(&obj->client_link));
+
+	spin_lock_irqsave(&client->objects_lock, flags);
+	obj->client = i915_drm_client_get(client);
+	list_add_tail_rcu(&obj->client_link, &client->objects_list);
+	spin_unlock_irqrestore(&client->objects_lock, flags);
+}
+
+void i915_drm_client_remove_object(struct drm_i915_gem_object *obj)
+{
+	struct i915_drm_client *client = fetch_and_zero(&obj->client);
+	unsigned long flags;
+
+	/* Object may not be associated with a client. */
+	if (!client)
+		return;
+
+	spin_lock_irqsave(&client->objects_lock, flags);
+	list_del_rcu(&obj->client_link);
+	spin_unlock_irqrestore(&client->objects_lock, flags);
+
+	i915_drm_client_put(client);
+}
+
+void i915_drm_client_add_context_objects(struct i915_drm_client *client,
+					 struct intel_context *ce)
+{
+	if (ce->state)
+		i915_drm_client_add_object(client, ce->state->obj);
+
+	if (ce->ring != ce->engine->legacy.ring && ce->ring->vma)
+		i915_drm_client_add_object(client, ce->ring->vma->obj);
+}
 #endif
diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
index 67816c912bca..a439dd789936 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.h
+++ b/drivers/gpu/drm/i915/i915_drm_client.h
@@ -12,6 +12,10 @@
 
 #include <uapi/drm/i915_drm.h>
 
+#include "i915_file_private.h"
+#include "gem/i915_gem_object_types.h"
+#include "gt/intel_context_types.h"
+
 #define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE
 
 struct drm_file;
@@ -25,6 +29,20 @@ struct i915_drm_client {
 	spinlock_t ctx_lock; /* For add/remove from ctx_list. */
 	struct list_head ctx_list; /* List of contexts belonging to client. */
 
+#ifdef CONFIG_PROC_FS
+	/**
+	 * @objects_lock: lock protecting @objects_list
+	 */
+	spinlock_t objects_lock;
+
+	/**
+	 * @objects_list: list of objects created by this client
+	 *
+	 * Protected by @objects_lock.
+	 */
+	struct list_head objects_list;
+#endif
+
 	/**
 	 * @past_runtime: Accumulation of pphwsp runtimes from closed contexts.
 	 */
@@ -49,4 +67,28 @@ struct i915_drm_client *i915_drm_client_alloc(void);
 
 void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file);
 
+#ifdef CONFIG_PROC_FS
+void i915_drm_client_add_object(struct i915_drm_client *client,
+				struct drm_i915_gem_object *obj);
+void i915_drm_client_remove_object(struct drm_i915_gem_object *obj);
+void i915_drm_client_add_context_objects(struct i915_drm_client *client,
+					 struct intel_context *ce);
+#else
+static inline void i915_drm_client_add_object(struct i915_drm_client *client,
+					      struct drm_i915_gem_object *obj)
+{
+}
+
+static inline void
+i915_drm_client_remove_object(struct drm_i915_gem_object *obj)
+{
+}
+
+static inline void
+i915_drm_client_add_context_objects(struct i915_drm_client *client,
+				    struct intel_context *ce)
+{
+}
+#endif
+
 #endif /* !__I915_DRM_CLIENT_H__ */
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 0971f4976324..d04660b60046 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -57,6 +57,7 @@
 #include "i915_memcpy.h"
 #include "i915_reg.h"
 #include "i915_scatterlist.h"
+#include "i915_sysfs.h"
 #include "i915_utils.h"
 
 #define ALLOW_FAIL (__GFP_KSWAPD_RECLAIM | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
@@ -520,7 +521,7 @@ __find_vma(struct i915_vma_coredump *vma, const char *name)
 	return NULL;
 }
 
-struct i915_vma_coredump *
+static struct i915_vma_coredump *
 intel_gpu_error_find_batch(const struct intel_engine_coredump *ee)
 {
 	return __find_vma(ee->vma, "batch");
@@ -609,9 +610,9 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
 	va_end(args);
 }
 
-void intel_gpu_error_print_vma(struct drm_i915_error_state_buf *m,
-			       const struct intel_engine_cs *engine,
-			       const struct i915_vma_coredump *vma)
+static void intel_gpu_error_print_vma(struct drm_i915_error_state_buf *m,
+				      const struct intel_engine_cs *engine,
+				      const struct i915_vma_coredump *vma)
 {
 	char out[ASCII85_BUFSZ];
 	struct page *page;
@@ -2140,7 +2141,7 @@ __i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 du
 	return error;
 }
 
-struct i915_gpu_coredump *
+static struct i915_gpu_coredump *
 i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 dump_flags)
 {
 	static DEFINE_MUTEX(capture_mutex);
@@ -2211,7 +2212,7 @@ void i915_capture_error_state(struct intel_gt *gt,
 	i915_gpu_coredump_put(error);
 }
 
-struct i915_gpu_coredump *
+static struct i915_gpu_coredump *
 i915_first_error_state(struct drm_i915_private *i915)
 {
 	struct i915_gpu_coredump *error;
@@ -2378,3 +2379,184 @@ void intel_klog_error_capture(struct intel_gt *gt,
 	drm_info(&i915->drm, "[Capture/%d.%d] Dumped %zd bytes\n", l_count, line++, pos_err);
 }
 #endif
+
+static ssize_t gpu_state_read(struct file *file, char __user *ubuf,
+			      size_t count, loff_t *pos)
+{
+	struct i915_gpu_coredump *error;
+	ssize_t ret;
+	void *buf;
+
+	error = file->private_data;
+	if (!error)
+		return 0;
+
+	/* Bounce buffer required because of kernfs __user API convenience. */
+	buf = kmalloc(count, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	ret = i915_gpu_coredump_copy_to_buffer(error, buf, *pos, count);
+	if (ret <= 0)
+		goto out;
+
+	if (!copy_to_user(ubuf, buf, ret))
+		*pos += ret;
+	else
+		ret = -EFAULT;
+
+out:
+	kfree(buf);
+	return ret;
+}
+
+static int gpu_state_release(struct inode *inode, struct file *file)
+{
+	i915_gpu_coredump_put(file->private_data);
+	return 0;
+}
+
+static int i915_gpu_info_open(struct inode *inode, struct file *file)
+{
+	struct drm_i915_private *i915 = inode->i_private;
+	struct i915_gpu_coredump *gpu;
+	intel_wakeref_t wakeref;
+
+	gpu = NULL;
+	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
+		gpu = i915_gpu_coredump(to_gt(i915), ALL_ENGINES, CORE_DUMP_FLAG_NONE);
+
+	if (IS_ERR(gpu))
+		return PTR_ERR(gpu);
+
+	file->private_data = gpu;
+	return 0;
+}
+
+static const struct file_operations i915_gpu_info_fops = {
+	.owner = THIS_MODULE,
+	.open = i915_gpu_info_open,
+	.read = gpu_state_read,
+	.llseek = default_llseek,
+	.release = gpu_state_release,
+};
+
+static ssize_t
+i915_error_state_write(struct file *filp,
+		       const char __user *ubuf,
+		       size_t cnt,
+		       loff_t *ppos)
+{
+	struct i915_gpu_coredump *error = filp->private_data;
+
+	if (!error)
+		return 0;
+
+	drm_dbg(&error->i915->drm, "Resetting error state\n");
+	i915_reset_error_state(error->i915);
+
+	return cnt;
+}
+
+static int i915_error_state_open(struct inode *inode, struct file *file)
+{
+	struct i915_gpu_coredump *error;
+
+	error = i915_first_error_state(inode->i_private);
+	if (IS_ERR(error))
+		return PTR_ERR(error);
+
+	file->private_data  = error;
+	return 0;
+}
+
+static const struct file_operations i915_error_state_fops = {
+	.owner = THIS_MODULE,
+	.open = i915_error_state_open,
+	.read = gpu_state_read,
+	.write = i915_error_state_write,
+	.llseek = default_llseek,
+	.release = gpu_state_release,
+};
+
+void i915_gpu_error_debugfs_register(struct drm_i915_private *i915)
+{
+	struct drm_minor *minor = i915->drm.primary;
+
+	debugfs_create_file("i915_error_state", 0644, minor->debugfs_root, i915,
+			    &i915_error_state_fops);
+	debugfs_create_file("i915_gpu_info", 0644, minor->debugfs_root, i915,
+			    &i915_gpu_info_fops);
+}
+
+static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
+				struct bin_attribute *attr, char *buf,
+				loff_t off, size_t count)
+{
+
+	struct device *kdev = kobj_to_dev(kobj);
+	struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
+	struct i915_gpu_coredump *gpu;
+	ssize_t ret = 0;
+
+	/*
+	 * FIXME: Concurrent clients triggering resets and reading + clearing
+	 * dumps can cause inconsistent sysfs reads when a user calls in with a
+	 * non-zero offset to complete a prior partial read but the
+	 * gpu_coredump has been cleared or replaced.
+	 */
+
+	gpu = i915_first_error_state(i915);
+	if (IS_ERR(gpu)) {
+		ret = PTR_ERR(gpu);
+	} else if (gpu) {
+		ret = i915_gpu_coredump_copy_to_buffer(gpu, buf, off, count);
+		i915_gpu_coredump_put(gpu);
+	} else {
+		const char *str = "No error state collected\n";
+		size_t len = strlen(str);
+
+		if (off < len) {
+			ret = min_t(size_t, count, len - off);
+			memcpy(buf, str + off, ret);
+		}
+	}
+
+	return ret;
+}
+
+static ssize_t error_state_write(struct file *file, struct kobject *kobj,
+				 struct bin_attribute *attr, char *buf,
+				 loff_t off, size_t count)
+{
+	struct device *kdev = kobj_to_dev(kobj);
+	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
+
+	drm_dbg(&dev_priv->drm, "Resetting error state\n");
+	i915_reset_error_state(dev_priv);
+
+	return count;
+}
+
+static const struct bin_attribute error_state_attr = {
+	.attr.name = "error",
+	.attr.mode = S_IRUSR | S_IWUSR,
+	.size = 0,
+	.read = error_state_read,
+	.write = error_state_write,
+};
+
+void i915_gpu_error_sysfs_setup(struct drm_i915_private *i915)
+{
+	struct device *kdev = i915->drm.primary->kdev;
+
+	if (sysfs_create_bin_file(&kdev->kobj, &error_state_attr))
+		drm_err(&i915->drm, "error_state sysfs setup failed\n");
+}
+
+void i915_gpu_error_sysfs_teardown(struct drm_i915_private *i915)
+{
+	struct device *kdev = i915->drm.primary->kdev;
+
+	sysfs_remove_bin_file(&kdev->kobj, &error_state_attr);
+}
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 4ce227f7e1e1..7c255bb1c319 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -17,6 +17,7 @@
 #include "display/intel_display_device.h"
 #include "display/intel_display_params.h"
 #include "gt/intel_engine.h"
+#include "gt/intel_engine_types.h"
 #include "gt/intel_gt_types.h"
 #include "gt/uc/intel_uc_fw.h"
 
@@ -234,7 +235,7 @@ struct i915_gpu_error {
 	atomic_t reset_count;
 
 	/** Number of times an engine has been reset */
-	atomic_t reset_engine_count[I915_NUM_ENGINES];
+	atomic_t reset_engine_count[MAX_ENGINE_CLASS];
 };
 
 struct drm_i915_error_state_buf {
@@ -257,7 +258,14 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error)
 static inline u32 i915_reset_engine_count(struct i915_gpu_error *error,
 					  const struct intel_engine_cs *engine)
 {
-	return atomic_read(&error->reset_engine_count[engine->uabi_class]);
+	return atomic_read(&error->reset_engine_count[engine->class]);
+}
+
+static inline void
+i915_increase_reset_engine_count(struct i915_gpu_error *error,
+				 const struct intel_engine_cs *engine)
+{
+	atomic_inc(&error->reset_engine_count[engine->class]);
 }
 
 #define CORE_DUMP_FLAG_NONE           0x0
@@ -277,14 +285,7 @@ static inline void intel_klog_error_capture(struct intel_gt *gt,
 
 __printf(2, 3)
 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
-void intel_gpu_error_print_vma(struct drm_i915_error_state_buf *m,
-			       const struct intel_engine_cs *engine,
-			       const struct i915_vma_coredump *vma);
-struct i915_vma_coredump *
-intel_gpu_error_find_batch(const struct intel_engine_coredump *ee);
-
-struct i915_gpu_coredump *i915_gpu_coredump(struct intel_gt *gt,
-					    intel_engine_mask_t engine_mask, u32 dump_flags);
+
 void i915_capture_error_state(struct intel_gt *gt,
 			      intel_engine_mask_t engine_mask, u32 dump_flags);
 
@@ -332,10 +333,13 @@ static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
 		kref_put(&gpu->ref, __i915_gpu_coredump_free);
 }
 
-struct i915_gpu_coredump *i915_first_error_state(struct drm_i915_private *i915);
 void i915_reset_error_state(struct drm_i915_private *i915);
 void i915_disable_error_state(struct drm_i915_private *i915, int err);
 
+void i915_gpu_error_debugfs_register(struct drm_i915_private *i915);
+void i915_gpu_error_sysfs_setup(struct drm_i915_private *i915);
+void i915_gpu_error_sysfs_teardown(struct drm_i915_private *i915);
+
 #else
 
 __printf(2, 3)
@@ -403,12 +407,6 @@ static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
 {
 }
 
-static inline struct i915_gpu_coredump *
-i915_first_error_state(struct drm_i915_private *i915)
-{
-	return ERR_PTR(-ENODEV);
-}
-
 static inline void i915_reset_error_state(struct drm_i915_private *i915)
 {
 }
@@ -418,6 +416,18 @@ static inline void i915_disable_error_state(struct drm_i915_private *i915,
 {
 }
 
+static inline void i915_gpu_error_debugfs_register(struct drm_i915_private *i915)
+{
+}
+
+static inline void i915_gpu_error_sysfs_setup(struct drm_i915_private *i915)
+{
+}
+
+static inline void i915_gpu_error_sysfs_teardown(struct drm_i915_private *i915)
+{
+}
+
 #endif /* IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) */
 
 #endif /* _I915_GPU_ERROR_H_ */
diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c
index 975da8e7f2a9..8c3f443c8347 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -175,7 +175,7 @@ hwm_power1_max_interval_show(struct device *dev, struct device_attribute *attr,
 	 *     tau4 = (4 | x) << y
 	 * but add 2 when doing the final right shift to account for units
 	 */
-	tau4 = ((1 << x_w) | x) << y;
+	tau4 = (u64)((1 << x_w) | x) << y;
 	/* val in hwmon interface units (millisec) */
 	out = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
 
@@ -211,7 +211,7 @@ hwm_power1_max_interval_store(struct device *dev,
 	r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT);
 	x = REG_FIELD_GET(PKG_MAX_WIN_X, r);
 	y = REG_FIELD_GET(PKG_MAX_WIN_Y, r);
-	tau4 = ((1 << x_w) | x) << y;
+	tau4 = (u64)((1 << x_w) | x) << y;
 	max_win = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
 
 	if (val > max_win)
diff --git a/drivers/gpu/drm/i915/i915_memcpy.c b/drivers/gpu/drm/i915/i915_memcpy.c
index 1b021a4902de..ba82277254b7 100644
--- a/drivers/gpu/drm/i915/i915_memcpy.c
+++ b/drivers/gpu/drm/i915/i915_memcpy.c
@@ -23,6 +23,8 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/cpufeature.h>
 #include <asm/fpu/api.h>
 
 #include "i915_memcpy.h"
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 7b1c8de2f9cb..2d695818f006 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -772,10 +772,6 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 		 * The reason field includes flags identifying what
 		 * triggered this specific report (mostly timer
 		 * triggered or e.g. due to a context switch).
-		 *
-		 * In MMIO triggered reports, some platforms do not set the
-		 * reason bit in this field and it is valid to have a reason
-		 * field of zero.
 		 */
 		reason = oa_report_reason(stream, report);
 		ctx_id = oa_context_id(stream, report32);
@@ -787,8 +783,41 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 		 *
 		 * Note: that we don't clear the valid_ctx_bit so userspace can
 		 * understand that the ID has been squashed by the kernel.
+		 *
+		 * Update:
+		 *
+		 * On XEHP platforms the behavior of context id valid bit has
+		 * changed compared to prior platforms. To describe this, we
+		 * define a few terms:
+		 *
+		 * context-switch-report: This is a report with the reason type
+		 * being context-switch. It is generated when a context switches
+		 * out.
+		 *
+		 * context-valid-bit: A bit that is set in the report ID field
+		 * to indicate that a valid context has been loaded.
+		 *
+		 * gpu-idle: A condition characterized by a
+		 * context-switch-report with context-valid-bit set to 0.
+		 *
+		 * On prior platforms, context-id-valid bit is set to 0 only
+		 * when GPU goes idle. In all other reports, it is set to 1.
+		 *
+		 * On XEHP platforms, context-valid-bit is set to 1 in a context
+		 * switch report if a new context switched in. For all other
+		 * reports it is set to 0.
+		 *
+		 * This change in behavior causes an issue with MMIO triggered
+		 * reports. MMIO triggered reports have the markers in the
+		 * context ID field and the context-valid-bit is 0. The logic
+		 * below to squash the context ID would render the report
+		 * useless since the user will not be able to find it in the OA
+		 * buffer. Since MMIO triggered reports exist only on XEHP,
+		 * we should avoid squashing these for XEHP platforms.
 		 */
-		if (oa_report_ctx_invalid(stream, report)) {
+
+		if (oa_report_ctx_invalid(stream, report) &&
+		    GRAPHICS_VER_FULL(stream->engine->i915) < IP_VER(12, 50)) {
 			ctx_id = INVALID_CTX_ID;
 			oa_context_id_squash(stream, report32);
 		}
diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
index 13b1ae9b96c7..46445248d193 100644
--- a/drivers/gpu/drm/i915/i915_perf_types.h
+++ b/drivers/gpu/drm/i915/i915_perf_types.h
@@ -291,7 +291,8 @@ struct i915_perf_stream {
 		int size_exponent;
 
 		/**
-		 * @ptr_lock: Locks reads and writes to all head/tail state
+		 * @oa_buffer.ptr_lock: Locks reads and writes to all
+		 * head/tail state
 		 *
 		 * Consider: the head and tail pointer state needs to be read
 		 * consistently from a hrtimer callback (atomic context) and
@@ -313,7 +314,8 @@ struct i915_perf_stream {
 		spinlock_t ptr_lock;
 
 		/**
-		 * @head: Although we can always read back the head pointer register,
+		 * @oa_buffer.head: Although we can always read back
+		 * the head pointer register,
 		 * we prefer to avoid trusting the HW state, just to avoid any
 		 * risk that some hardware condition could * somehow bump the
 		 * head pointer unpredictably and cause us to forward the wrong
@@ -322,7 +324,8 @@ struct i915_perf_stream {
 		u32 head;
 
 		/**
-		 * @tail: The last verified tail that can be read by userspace.
+		 * @oa_buffer.tail: The last verified tail that can be
+		 * read by userspace.
 		 */
 		u32 tail;
 	} oa_buffer;
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index f861863eb7c1..21eb0c5b320d 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -31,6 +31,16 @@
 static cpumask_t i915_pmu_cpumask;
 static unsigned int i915_pmu_target_cpu = -1;
 
+static struct i915_pmu *event_to_pmu(struct perf_event *event)
+{
+	return container_of(event->pmu, struct i915_pmu, base);
+}
+
+static struct drm_i915_private *pmu_to_i915(struct i915_pmu *pmu)
+{
+	return container_of(pmu, struct drm_i915_private, pmu);
+}
+
 static u8 engine_config_sample(u64 config)
 {
 	return config & I915_PMU_SAMPLE_MASK;
@@ -141,7 +151,7 @@ static u32 frequency_enabled_mask(void)
 
 static bool pmu_needs_timer(struct i915_pmu *pmu)
 {
-	struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
+	struct drm_i915_private *i915 = pmu_to_i915(pmu);
 	u32 enable;
 
 	/*
@@ -213,19 +223,19 @@ static u64 get_rc6(struct intel_gt *gt)
 	struct drm_i915_private *i915 = gt->i915;
 	const unsigned int gt_id = gt->info.id;
 	struct i915_pmu *pmu = &i915->pmu;
+	intel_wakeref_t wakeref;
 	unsigned long flags;
-	bool awake = false;
 	u64 val;
 
-	if (intel_gt_pm_get_if_awake(gt)) {
+	wakeref = intel_gt_pm_get_if_awake(gt);
+	if (wakeref) {
 		val = __get_rc6(gt);
-		intel_gt_pm_put_async(gt);
-		awake = true;
+		intel_gt_pm_put_async(gt, wakeref);
 	}
 
 	spin_lock_irqsave(&pmu->lock, flags);
 
-	if (awake) {
+	if (wakeref) {
 		store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
 	} else {
 		/*
@@ -251,7 +261,7 @@ static u64 get_rc6(struct intel_gt *gt)
 
 static void init_rc6(struct i915_pmu *pmu)
 {
-	struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
+	struct drm_i915_private *i915 = pmu_to_i915(pmu);
 	struct intel_gt *gt;
 	unsigned int i;
 
@@ -429,12 +439,14 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
 	const unsigned int gt_id = gt->info.id;
 	struct i915_pmu *pmu = &i915->pmu;
 	struct intel_rps *rps = &gt->rps;
+	intel_wakeref_t wakeref;
 
 	if (!frequency_sampling_enabled(pmu, gt_id))
 		return;
 
 	/* Report 0/0 (actual/requested) frequency while parked. */
-	if (!intel_gt_pm_get_if_awake(gt))
+	wakeref = intel_gt_pm_get_if_awake(gt);
+	if (!wakeref)
 		return;
 
 	if (pmu->enable & config_mask(__I915_PMU_ACTUAL_FREQUENCY(gt_id))) {
@@ -463,14 +475,13 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
 				period_ns / 1000);
 	}
 
-	intel_gt_pm_put_async(gt);
+	intel_gt_pm_put_async(gt, wakeref);
 }
 
 static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
 {
-	struct drm_i915_private *i915 =
-		container_of(hrtimer, struct drm_i915_private, pmu.timer);
-	struct i915_pmu *pmu = &i915->pmu;
+	struct i915_pmu *pmu = container_of(hrtimer, struct i915_pmu, timer);
+	struct drm_i915_private *i915 = pmu_to_i915(pmu);
 	unsigned int period_ns;
 	struct intel_gt *gt;
 	unsigned int i;
@@ -505,8 +516,8 @@ static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
 
 static void i915_pmu_event_destroy(struct perf_event *event)
 {
-	struct drm_i915_private *i915 =
-		container_of(event->pmu, typeof(*i915), pmu.base);
+	struct i915_pmu *pmu = event_to_pmu(event);
+	struct drm_i915_private *i915 = pmu_to_i915(pmu);
 
 	drm_WARN_ON(&i915->drm, event->parent);
 
@@ -572,8 +583,8 @@ config_status(struct drm_i915_private *i915, u64 config)
 
 static int engine_event_init(struct perf_event *event)
 {
-	struct drm_i915_private *i915 =
-		container_of(event->pmu, typeof(*i915), pmu.base);
+	struct i915_pmu *pmu = event_to_pmu(event);
+	struct drm_i915_private *i915 = pmu_to_i915(pmu);
 	struct intel_engine_cs *engine;
 
 	engine = intel_engine_lookup_user(i915, engine_event_class(event),
@@ -586,9 +597,8 @@ static int engine_event_init(struct perf_event *event)
 
 static int i915_pmu_event_init(struct perf_event *event)
 {
-	struct drm_i915_private *i915 =
-		container_of(event->pmu, typeof(*i915), pmu.base);
-	struct i915_pmu *pmu = &i915->pmu;
+	struct i915_pmu *pmu = event_to_pmu(event);
+	struct drm_i915_private *i915 = pmu_to_i915(pmu);
 	int ret;
 
 	if (pmu->closed)
@@ -628,9 +638,8 @@ static int i915_pmu_event_init(struct perf_event *event)
 
 static u64 __i915_pmu_event_read(struct perf_event *event)
 {
-	struct drm_i915_private *i915 =
-		container_of(event->pmu, typeof(*i915), pmu.base);
-	struct i915_pmu *pmu = &i915->pmu;
+	struct i915_pmu *pmu = event_to_pmu(event);
+	struct drm_i915_private *i915 = pmu_to_i915(pmu);
 	u64 val = 0;
 
 	if (is_engine_event(event)) {
@@ -686,10 +695,8 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
 
 static void i915_pmu_event_read(struct perf_event *event)
 {
-	struct drm_i915_private *i915 =
-		container_of(event->pmu, typeof(*i915), pmu.base);
+	struct i915_pmu *pmu = event_to_pmu(event);
 	struct hw_perf_event *hwc = &event->hw;
-	struct i915_pmu *pmu = &i915->pmu;
 	u64 prev, new;
 
 	if (pmu->closed) {
@@ -707,10 +714,9 @@ static void i915_pmu_event_read(struct perf_event *event)
 
 static void i915_pmu_enable(struct perf_event *event)
 {
-	struct drm_i915_private *i915 =
-		container_of(event->pmu, typeof(*i915), pmu.base);
+	struct i915_pmu *pmu = event_to_pmu(event);
+	struct drm_i915_private *i915 = pmu_to_i915(pmu);
 	const unsigned int bit = event_bit(event);
-	struct i915_pmu *pmu = &i915->pmu;
 	unsigned long flags;
 
 	if (bit == -1)
@@ -771,10 +777,9 @@ update:
 
 static void i915_pmu_disable(struct perf_event *event)
 {
-	struct drm_i915_private *i915 =
-		container_of(event->pmu, typeof(*i915), pmu.base);
+	struct i915_pmu *pmu = event_to_pmu(event);
+	struct drm_i915_private *i915 = pmu_to_i915(pmu);
 	const unsigned int bit = event_bit(event);
-	struct i915_pmu *pmu = &i915->pmu;
 	unsigned long flags;
 
 	if (bit == -1)
@@ -818,9 +823,7 @@ static void i915_pmu_disable(struct perf_event *event)
 
 static void i915_pmu_event_start(struct perf_event *event, int flags)
 {
-	struct drm_i915_private *i915 =
-		container_of(event->pmu, typeof(*i915), pmu.base);
-	struct i915_pmu *pmu = &i915->pmu;
+	struct i915_pmu *pmu = event_to_pmu(event);
 
 	if (pmu->closed)
 		return;
@@ -848,9 +851,7 @@ out:
 
 static int i915_pmu_event_add(struct perf_event *event, int flags)
 {
-	struct drm_i915_private *i915 =
-		container_of(event->pmu, typeof(*i915), pmu.base);
-	struct i915_pmu *pmu = &i915->pmu;
+	struct i915_pmu *pmu = event_to_pmu(event);
 
 	if (pmu->closed)
 		return -ENODEV;
@@ -982,7 +983,7 @@ add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
 static struct attribute **
 create_event_attributes(struct i915_pmu *pmu)
 {
-	struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
+	struct drm_i915_private *i915 = pmu_to_i915(pmu);
 	static const struct {
 		unsigned int counter;
 		const char *name;
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index e88bb4f04305..613decd47760 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -155,81 +155,6 @@ static const struct bin_attribute dpf_attrs_1 = {
 	.private = (void *)1
 };
 
-#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
-
-static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
-				struct bin_attribute *attr, char *buf,
-				loff_t off, size_t count)
-{
-
-	struct device *kdev = kobj_to_dev(kobj);
-	struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
-	struct i915_gpu_coredump *gpu;
-	ssize_t ret = 0;
-
-	/*
-	 * FIXME: Concurrent clients triggering resets and reading + clearing
-	 * dumps can cause inconsistent sysfs reads when a user calls in with a
-	 * non-zero offset to complete a prior partial read but the
-	 * gpu_coredump has been cleared or replaced.
-	 */
-
-	gpu = i915_first_error_state(i915);
-	if (IS_ERR(gpu)) {
-		ret = PTR_ERR(gpu);
-	} else if (gpu) {
-		ret = i915_gpu_coredump_copy_to_buffer(gpu, buf, off, count);
-		i915_gpu_coredump_put(gpu);
-	} else {
-		const char *str = "No error state collected\n";
-		size_t len = strlen(str);
-
-		if (off < len) {
-			ret = min_t(size_t, count, len - off);
-			memcpy(buf, str + off, ret);
-		}
-	}
-
-	return ret;
-}
-
-static ssize_t error_state_write(struct file *file, struct kobject *kobj,
-				 struct bin_attribute *attr, char *buf,
-				 loff_t off, size_t count)
-{
-	struct device *kdev = kobj_to_dev(kobj);
-	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
-
-	drm_dbg(&dev_priv->drm, "Resetting error state\n");
-	i915_reset_error_state(dev_priv);
-
-	return count;
-}
-
-static const struct bin_attribute error_state_attr = {
-	.attr.name = "error",
-	.attr.mode = S_IRUSR | S_IWUSR,
-	.size = 0,
-	.read = error_state_read,
-	.write = error_state_write,
-};
-
-static void i915_setup_error_capture(struct device *kdev)
-{
-	if (sysfs_create_bin_file(&kdev->kobj, &error_state_attr))
-		drm_err(&kdev_minor_to_i915(kdev)->drm,
-			"error_state sysfs setup failed\n");
-}
-
-static void i915_teardown_error_capture(struct device *kdev)
-{
-	sysfs_remove_bin_file(&kdev->kobj, &error_state_attr);
-}
-#else
-static void i915_setup_error_capture(struct device *kdev) {}
-static void i915_teardown_error_capture(struct device *kdev) {}
-#endif
-
 void i915_setup_sysfs(struct drm_i915_private *dev_priv)
 {
 	struct device *kdev = dev_priv->drm.primary->kdev;
@@ -255,7 +180,7 @@ void i915_setup_sysfs(struct drm_i915_private *dev_priv)
 		drm_warn(&dev_priv->drm,
 			 "failed to register GT sysfs directory\n");
 
-	i915_setup_error_capture(kdev);
+	i915_gpu_error_sysfs_setup(dev_priv);
 
 	intel_engines_add_sysfs(dev_priv);
 }
@@ -264,7 +189,7 @@ void i915_teardown_sysfs(struct drm_i915_private *dev_priv)
 {
 	struct device *kdev = dev_priv->drm.primary->kdev;
 
-	i915_teardown_error_capture(kdev);
+	i915_gpu_error_sysfs_teardown(dev_priv);
 
 	device_remove_bin_file(kdev,  &dpf_attrs_1);
 	device_remove_bin_file(kdev,  &dpf_attrs);
diff --git a/drivers/gpu/drm/i915/intel_gvt.c b/drivers/gpu/drm/i915/intel_gvt.c
index e98b6d69a91a..9b6d87c8b583 100644
--- a/drivers/gpu/drm/i915/intel_gvt.c
+++ b/drivers/gpu/drm/i915/intel_gvt.c
@@ -41,7 +41,7 @@
  * To virtualize GPU resources GVT-g driver depends on hypervisor technology
  * e.g KVM/VFIO/mdev, Xen, etc. to provide resource access trapping capability
  * and be virtualized within GVT-g device module. More architectural design
- * doc is available on https://01.org/group/2230/documentation-list.
+ * doc is available on https://github.com/intel/gvt-linux/wiki.
  */
 
 static LIST_HEAD(intel_gvt_devices);
diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c
index 3d1fdea9811d..60a03340bbd4 100644
--- a/drivers/gpu/drm/i915/intel_memory_region.c
+++ b/drivers/gpu/drm/i915/intel_memory_region.c
@@ -216,6 +216,22 @@ static int intel_memory_region_memtest(struct intel_memory_region *mem,
 	return err;
 }
 
+static const char *region_type_str(u16 type)
+{
+	switch (type) {
+	case INTEL_MEMORY_SYSTEM:
+		return "system";
+	case INTEL_MEMORY_LOCAL:
+		return "local";
+	case INTEL_MEMORY_STOLEN_LOCAL:
+		return "stolen-local";
+	case INTEL_MEMORY_STOLEN_SYSTEM:
+		return "stolen-system";
+	default:
+		return "unknown";
+	}
+}
+
 struct intel_memory_region *
 intel_memory_region_create(struct drm_i915_private *i915,
 			   resource_size_t start,
@@ -244,6 +260,9 @@ intel_memory_region_create(struct drm_i915_private *i915,
 	mem->type = type;
 	mem->instance = instance;
 
+	snprintf(mem->uabi_name, sizeof(mem->uabi_name), "%s%u",
+		 region_type_str(type), instance);
+
 	mutex_init(&mem->objects.lock);
 	INIT_LIST_HEAD(&mem->objects.list);
 
diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h
index 2953ed5c3248..9ba36454e51b 100644
--- a/drivers/gpu/drm/i915/intel_memory_region.h
+++ b/drivers/gpu/drm/i915/intel_memory_region.h
@@ -80,6 +80,7 @@ struct intel_memory_region {
 	u16 instance;
 	enum intel_region_id id;
 	char name[16];
+	char uabi_name[16];
 	bool private; /* not for userspace */
 
 	struct {
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 8743153fad87..860b51b56a92 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -50,184 +50,44 @@
  * present for a given platform.
  */
 
-#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
-
-#include <linux/sort.h>
-
-#define STACKDEPTH 8
-
-static noinline depot_stack_handle_t __save_depot_stack(void)
+static struct drm_i915_private *rpm_to_i915(struct intel_runtime_pm *rpm)
 {
-	unsigned long entries[STACKDEPTH];
-	unsigned int n;
-
-	n = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
-	return stack_depot_save(entries, n, GFP_NOWAIT | __GFP_NOWARN);
+	return container_of(rpm, struct drm_i915_private, runtime_pm);
 }
 
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
+
 static void init_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
 {
-	spin_lock_init(&rpm->debug.lock);
-	stack_depot_init();
+	ref_tracker_dir_init(&rpm->debug, INTEL_REFTRACK_DEAD_COUNT, dev_name(rpm->kdev));
 }
 
-static noinline depot_stack_handle_t
+static intel_wakeref_t
 track_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
 {
-	depot_stack_handle_t stack, *stacks;
-	unsigned long flags;
-
-	if (rpm->no_wakeref_tracking)
+	if (!rpm->available || rpm->no_wakeref_tracking)
 		return -1;
 
-	stack = __save_depot_stack();
-	if (!stack)
-		return -1;
-
-	spin_lock_irqsave(&rpm->debug.lock, flags);
-
-	if (!rpm->debug.count)
-		rpm->debug.last_acquire = stack;
-
-	stacks = krealloc(rpm->debug.owners,
-			  (rpm->debug.count + 1) * sizeof(*stacks),
-			  GFP_NOWAIT | __GFP_NOWARN);
-	if (stacks) {
-		stacks[rpm->debug.count++] = stack;
-		rpm->debug.owners = stacks;
-	} else {
-		stack = -1;
-	}
-
-	spin_unlock_irqrestore(&rpm->debug.lock, flags);
-
-	return stack;
+	return intel_ref_tracker_alloc(&rpm->debug);
 }
 
 static void untrack_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm,
-					     depot_stack_handle_t stack)
+					     intel_wakeref_t wakeref)
 {
-	struct drm_i915_private *i915 = container_of(rpm,
-						     struct drm_i915_private,
-						     runtime_pm);
-	unsigned long flags, n;
-	bool found = false;
-
-	if (unlikely(stack == -1))
+	if (!rpm->available || rpm->no_wakeref_tracking)
 		return;
 
-	spin_lock_irqsave(&rpm->debug.lock, flags);
-	for (n = rpm->debug.count; n--; ) {
-		if (rpm->debug.owners[n] == stack) {
-			memmove(rpm->debug.owners + n,
-				rpm->debug.owners + n + 1,
-				(--rpm->debug.count - n) * sizeof(stack));
-			found = true;
-			break;
-		}
-	}
-	spin_unlock_irqrestore(&rpm->debug.lock, flags);
-
-	if (drm_WARN(&i915->drm, !found,
-		     "Unmatched wakeref (tracking %lu), count %u\n",
-		     rpm->debug.count, atomic_read(&rpm->wakeref_count))) {
-		char *buf;
-
-		buf = kmalloc(PAGE_SIZE, GFP_NOWAIT | __GFP_NOWARN);
-		if (!buf)
-			return;
-
-		stack_depot_snprint(stack, buf, PAGE_SIZE, 2);
-		DRM_DEBUG_DRIVER("wakeref %x from\n%s", stack, buf);
-
-		stack = READ_ONCE(rpm->debug.last_release);
-		if (stack) {
-			stack_depot_snprint(stack, buf, PAGE_SIZE, 2);
-			DRM_DEBUG_DRIVER("wakeref last released at\n%s", buf);
-		}
-
-		kfree(buf);
-	}
+	intel_ref_tracker_free(&rpm->debug, wakeref);
 }
 
-static int cmphandle(const void *_a, const void *_b)
+static void untrack_all_intel_runtime_pm_wakerefs(struct intel_runtime_pm *rpm)
 {
-	const depot_stack_handle_t * const a = _a, * const b = _b;
-
-	if (*a < *b)
-		return -1;
-	else if (*a > *b)
-		return 1;
-	else
-		return 0;
-}
-
-static void
-__print_intel_runtime_pm_wakeref(struct drm_printer *p,
-				 const struct intel_runtime_pm_debug *dbg)
-{
-	unsigned long i;
-	char *buf;
-
-	buf = kmalloc(PAGE_SIZE, GFP_NOWAIT | __GFP_NOWARN);
-	if (!buf)
-		return;
-
-	if (dbg->last_acquire) {
-		stack_depot_snprint(dbg->last_acquire, buf, PAGE_SIZE, 2);
-		drm_printf(p, "Wakeref last acquired:\n%s", buf);
-	}
-
-	if (dbg->last_release) {
-		stack_depot_snprint(dbg->last_release, buf, PAGE_SIZE, 2);
-		drm_printf(p, "Wakeref last released:\n%s", buf);
-	}
-
-	drm_printf(p, "Wakeref count: %lu\n", dbg->count);
-
-	sort(dbg->owners, dbg->count, sizeof(*dbg->owners), cmphandle, NULL);
-
-	for (i = 0; i < dbg->count; i++) {
-		depot_stack_handle_t stack = dbg->owners[i];
-		unsigned long rep;
-
-		rep = 1;
-		while (i + 1 < dbg->count && dbg->owners[i + 1] == stack)
-			rep++, i++;
-		stack_depot_snprint(stack, buf, PAGE_SIZE, 2);
-		drm_printf(p, "Wakeref x%lu taken at:\n%s", rep, buf);
-	}
-
-	kfree(buf);
-}
-
-static noinline void
-__untrack_all_wakerefs(struct intel_runtime_pm_debug *debug,
-		       struct intel_runtime_pm_debug *saved)
-{
-	*saved = *debug;
-
-	debug->owners = NULL;
-	debug->count = 0;
-	debug->last_release = __save_depot_stack();
-}
-
-static void
-dump_and_free_wakeref_tracking(struct intel_runtime_pm_debug *debug)
-{
-	if (debug->count) {
-		struct drm_printer p = drm_debug_printer("i915");
-
-		__print_intel_runtime_pm_wakeref(&p, debug);
-	}
-
-	kfree(debug->owners);
+	ref_tracker_dir_exit(&rpm->debug);
 }
 
 static noinline void
 __intel_wakeref_dec_and_check_tracking(struct intel_runtime_pm *rpm)
 {
-	struct intel_runtime_pm_debug dbg = {};
 	unsigned long flags;
 
 	if (!atomic_dec_and_lock_irqsave(&rpm->wakeref_count,
@@ -235,60 +95,14 @@ __intel_wakeref_dec_and_check_tracking(struct intel_runtime_pm *rpm)
 					 flags))
 		return;
 
-	__untrack_all_wakerefs(&rpm->debug, &dbg);
+	ref_tracker_dir_print_locked(&rpm->debug, INTEL_REFTRACK_PRINT_LIMIT);
 	spin_unlock_irqrestore(&rpm->debug.lock, flags);
-
-	dump_and_free_wakeref_tracking(&dbg);
-}
-
-static noinline void
-untrack_all_intel_runtime_pm_wakerefs(struct intel_runtime_pm *rpm)
-{
-	struct intel_runtime_pm_debug dbg = {};
-	unsigned long flags;
-
-	spin_lock_irqsave(&rpm->debug.lock, flags);
-	__untrack_all_wakerefs(&rpm->debug, &dbg);
-	spin_unlock_irqrestore(&rpm->debug.lock, flags);
-
-	dump_and_free_wakeref_tracking(&dbg);
 }
 
 void print_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm,
 				    struct drm_printer *p)
 {
-	struct intel_runtime_pm_debug dbg = {};
-
-	do {
-		unsigned long alloc = dbg.count;
-		depot_stack_handle_t *s;
-
-		spin_lock_irq(&rpm->debug.lock);
-		dbg.count = rpm->debug.count;
-		if (dbg.count <= alloc) {
-			memcpy(dbg.owners,
-			       rpm->debug.owners,
-			       dbg.count * sizeof(*s));
-		}
-		dbg.last_acquire = rpm->debug.last_acquire;
-		dbg.last_release = rpm->debug.last_release;
-		spin_unlock_irq(&rpm->debug.lock);
-		if (dbg.count <= alloc)
-			break;
-
-		s = krealloc(dbg.owners,
-			     dbg.count * sizeof(*s),
-			     GFP_NOWAIT | __GFP_NOWARN);
-		if (!s)
-			goto out;
-
-		dbg.owners = s;
-	} while (1);
-
-	__print_intel_runtime_pm_wakeref(p, &dbg);
-
-out:
-	kfree(dbg.owners);
+	intel_ref_tracker_show(&rpm->debug, p);
 }
 
 #else
@@ -297,14 +111,14 @@ static void init_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
 {
 }
 
-static depot_stack_handle_t
+static intel_wakeref_t
 track_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
 {
 	return -1;
 }
 
 static void untrack_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm,
-					     intel_wakeref_t wref)
+					     intel_wakeref_t wakeref)
 {
 }
 
@@ -349,9 +163,7 @@ intel_runtime_pm_release(struct intel_runtime_pm *rpm, int wakelock)
 static intel_wakeref_t __intel_runtime_pm_get(struct intel_runtime_pm *rpm,
 					      bool wakelock)
 {
-	struct drm_i915_private *i915 = container_of(rpm,
-						     struct drm_i915_private,
-						     runtime_pm);
+	struct drm_i915_private *i915 = rpm_to_i915(rpm);
 	int ret;
 
 	ret = pm_runtime_get_sync(rpm->kdev);
@@ -556,9 +368,7 @@ void intel_runtime_pm_put(struct intel_runtime_pm *rpm, intel_wakeref_t wref)
  */
 void intel_runtime_pm_enable(struct intel_runtime_pm *rpm)
 {
-	struct drm_i915_private *i915 = container_of(rpm,
-						     struct drm_i915_private,
-						     runtime_pm);
+	struct drm_i915_private *i915 = rpm_to_i915(rpm);
 	struct device *kdev = rpm->kdev;
 
 	/*
@@ -611,9 +421,7 @@ void intel_runtime_pm_enable(struct intel_runtime_pm *rpm)
 
 void intel_runtime_pm_disable(struct intel_runtime_pm *rpm)
 {
-	struct drm_i915_private *i915 = container_of(rpm,
-						     struct drm_i915_private,
-						     runtime_pm);
+	struct drm_i915_private *i915 = rpm_to_i915(rpm);
 	struct device *kdev = rpm->kdev;
 
 	/* Transfer rpm ownership back to core */
@@ -628,9 +436,7 @@ void intel_runtime_pm_disable(struct intel_runtime_pm *rpm)
 
 void intel_runtime_pm_driver_release(struct intel_runtime_pm *rpm)
 {
-	struct drm_i915_private *i915 = container_of(rpm,
-						     struct drm_i915_private,
-						     runtime_pm);
+	struct drm_i915_private *i915 = rpm_to_i915(rpm);
 	int count = atomic_read(&rpm->wakeref_count);
 
 	intel_wakeref_auto_fini(&rpm->userfault_wakeref);
@@ -639,14 +445,17 @@ void intel_runtime_pm_driver_release(struct intel_runtime_pm *rpm)
 		 "i915 raw-wakerefs=%d wakelocks=%d on cleanup\n",
 		 intel_rpm_raw_wakeref_count(count),
 		 intel_rpm_wakelock_count(count));
+}
 
+void intel_runtime_pm_driver_last_release(struct intel_runtime_pm *rpm)
+{
+	intel_runtime_pm_driver_release(rpm);
 	untrack_all_intel_runtime_pm_wakerefs(rpm);
 }
 
 void intel_runtime_pm_init_early(struct intel_runtime_pm *rpm)
 {
-	struct drm_i915_private *i915 =
-			container_of(rpm, struct drm_i915_private, runtime_pm);
+	struct drm_i915_private *i915 = rpm_to_i915(rpm);
 	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
 	struct device *kdev = &pdev->dev;
 
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.h b/drivers/gpu/drm/i915/intel_runtime_pm.h
index be43614c73fd..de3579d399e1 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.h
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.h
@@ -75,15 +75,7 @@ struct intel_runtime_pm {
 	 * paired rpm_put) we can remove corresponding pairs of and keep
 	 * the array trimmed to active wakerefs.
 	 */
-	struct intel_runtime_pm_debug {
-		spinlock_t lock;
-
-		depot_stack_handle_t last_acquire;
-		depot_stack_handle_t last_release;
-
-		depot_stack_handle_t *owners;
-		unsigned long count;
-	} debug;
+	struct ref_tracker_dir debug;
 #endif
 };
 
@@ -187,6 +179,7 @@ void intel_runtime_pm_init_early(struct intel_runtime_pm *rpm);
 void intel_runtime_pm_enable(struct intel_runtime_pm *rpm);
 void intel_runtime_pm_disable(struct intel_runtime_pm *rpm);
 void intel_runtime_pm_driver_release(struct intel_runtime_pm *rpm);
+void intel_runtime_pm_driver_last_release(struct intel_runtime_pm *rpm);
 
 intel_wakeref_t intel_runtime_pm_get(struct intel_runtime_pm *rpm);
 intel_wakeref_t intel_runtime_pm_get_if_in_use(struct intel_runtime_pm *rpm);
diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c
index 623a69089386..dea2f63184f8 100644
--- a/drivers/gpu/drm/i915/intel_wakeref.c
+++ b/drivers/gpu/drm/i915/intel_wakeref.c
@@ -99,7 +99,8 @@ static void __intel_wakeref_put_work(struct work_struct *wrk)
 void __intel_wakeref_init(struct intel_wakeref *wf,
 			  struct drm_i915_private *i915,
 			  const struct intel_wakeref_ops *ops,
-			  struct intel_wakeref_lockclass *key)
+			  struct intel_wakeref_lockclass *key,
+			  const char *name)
 {
 	wf->i915 = i915;
 	wf->ops = ops;
@@ -111,6 +112,10 @@ void __intel_wakeref_init(struct intel_wakeref *wf,
 	INIT_DELAYED_WORK(&wf->work, __intel_wakeref_put_work);
 	lockdep_init_map(&wf->work.work.lockdep_map,
 			 "wakeref.work", &key->work, 0);
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_WAKEREF)
+	ref_tracker_dir_init(&wf->debug, INTEL_REFTRACK_DEAD_COUNT, name);
+#endif
 }
 
 int intel_wakeref_wait_for_idle(struct intel_wakeref *wf)
@@ -191,3 +196,31 @@ void intel_wakeref_auto_fini(struct intel_wakeref_auto *wf)
 	intel_wakeref_auto(wf, 0);
 	INTEL_WAKEREF_BUG_ON(wf->wakeref);
 }
+
+void intel_ref_tracker_show(struct ref_tracker_dir *dir,
+			    struct drm_printer *p)
+{
+	const size_t buf_size = PAGE_SIZE;
+	char *buf, *sb, *se;
+	size_t count;
+
+	buf = kmalloc(buf_size, GFP_NOWAIT);
+	if (!buf)
+		return;
+
+	count = ref_tracker_dir_snprint(dir, buf, buf_size);
+	if (!count)
+		goto free;
+	/* printk does not like big buffers, so we split it */
+	for (sb = buf; *sb; sb = se + 1) {
+		se = strchrnul(sb, '\n');
+		drm_printf(p, "%.*s", (int)(se - sb + 1), sb);
+		if (!*se)
+			break;
+	}
+	if (count >= buf_size)
+		drm_printf(p, "\n...dropped %zd extra bytes of leak report.\n",
+			   count + 1 - buf_size);
+free:
+	kfree(buf);
+}
diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h
index ec881b097368..68aa3be48251 100644
--- a/drivers/gpu/drm/i915/intel_wakeref.h
+++ b/drivers/gpu/drm/i915/intel_wakeref.h
@@ -7,16 +7,25 @@
 #ifndef INTEL_WAKEREF_H
 #define INTEL_WAKEREF_H
 
+#include <drm/drm_print.h>
+
 #include <linux/atomic.h>
 #include <linux/bitfield.h>
 #include <linux/bits.h>
 #include <linux/lockdep.h>
 #include <linux/mutex.h>
 #include <linux/refcount.h>
+#include <linux/ref_tracker.h>
+#include <linux/slab.h>
 #include <linux/stackdepot.h>
 #include <linux/timer.h>
 #include <linux/workqueue.h>
 
+typedef unsigned long intel_wakeref_t;
+
+#define INTEL_REFTRACK_DEAD_COUNT 16
+#define INTEL_REFTRACK_PRINT_LIMIT 16
+
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
 #define INTEL_WAKEREF_BUG_ON(expr) BUG_ON(expr)
 #else
@@ -26,8 +35,6 @@
 struct intel_runtime_pm;
 struct intel_wakeref;
 
-typedef depot_stack_handle_t intel_wakeref_t;
-
 struct intel_wakeref_ops {
 	int (*get)(struct intel_wakeref *wf);
 	int (*put)(struct intel_wakeref *wf);
@@ -43,6 +50,10 @@ struct intel_wakeref {
 	const struct intel_wakeref_ops *ops;
 
 	struct delayed_work work;
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_WAKEREF)
+	struct ref_tracker_dir debug;
+#endif
 };
 
 struct intel_wakeref_lockclass {
@@ -53,11 +64,12 @@ struct intel_wakeref_lockclass {
 void __intel_wakeref_init(struct intel_wakeref *wf,
 			  struct drm_i915_private *i915,
 			  const struct intel_wakeref_ops *ops,
-			  struct intel_wakeref_lockclass *key);
-#define intel_wakeref_init(wf, i915, ops) do {				\
+			  struct intel_wakeref_lockclass *key,
+			  const char *name);
+#define intel_wakeref_init(wf, i915, ops, name) do {			\
 	static struct intel_wakeref_lockclass __key;			\
 									\
-	__intel_wakeref_init((wf), (i915), (ops), &__key);		\
+	__intel_wakeref_init((wf), (i915), (ops), &__key, name);	\
 } while (0)
 
 int __intel_wakeref_get_first(struct intel_wakeref *wf);
@@ -261,6 +273,57 @@ __intel_wakeref_defer_park(struct intel_wakeref *wf)
  */
 int intel_wakeref_wait_for_idle(struct intel_wakeref *wf);
 
+#define INTEL_WAKEREF_DEF ((intel_wakeref_t)(-1))
+
+static inline intel_wakeref_t intel_ref_tracker_alloc(struct ref_tracker_dir *dir)
+{
+	struct ref_tracker *user = NULL;
+
+	ref_tracker_alloc(dir, &user, GFP_NOWAIT);
+
+	return (intel_wakeref_t)user ?: INTEL_WAKEREF_DEF;
+}
+
+static inline void intel_ref_tracker_free(struct ref_tracker_dir *dir,
+					  intel_wakeref_t handle)
+{
+	struct ref_tracker *user;
+
+	user = (handle == INTEL_WAKEREF_DEF) ? NULL : (void *)handle;
+
+	ref_tracker_free(dir, &user);
+}
+
+void intel_ref_tracker_show(struct ref_tracker_dir *dir,
+			    struct drm_printer *p);
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_WAKEREF)
+
+static inline intel_wakeref_t intel_wakeref_track(struct intel_wakeref *wf)
+{
+	return intel_ref_tracker_alloc(&wf->debug);
+}
+
+static inline void intel_wakeref_untrack(struct intel_wakeref *wf,
+					 intel_wakeref_t handle)
+{
+	intel_ref_tracker_free(&wf->debug, handle);
+}
+
+#else
+
+static inline intel_wakeref_t intel_wakeref_track(struct intel_wakeref *wf)
+{
+	return -1;
+}
+
+static inline void intel_wakeref_untrack(struct intel_wakeref *wf,
+					 intel_wakeref_t handle)
+{
+}
+
+#endif
+
 struct intel_wakeref_auto {
 	struct drm_i915_private *i915;
 	struct timer_list timer;
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.c b/drivers/gpu/drm/i915/pxp/intel_pxp.c
index dc327cf40b5a..75278e78ca90 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp.c
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp.c
@@ -199,6 +199,9 @@ int intel_pxp_init(struct drm_i915_private *i915)
 	struct intel_gt *gt;
 	bool is_full_feature = false;
 
+	if (intel_gt_is_wedged(to_gt(i915)))
+		return -ENOTCONN;
+
 	/*
 	 * NOTE: Get the ctrl_gt before checking intel_pxp_is_supported since
 	 * we still need it if PXP's backend tee transport is needed.
@@ -303,6 +306,8 @@ static int __pxp_global_teardown_final(struct intel_pxp *pxp)
 
 	if (!pxp->arb_is_valid)
 		return 0;
+
+	drm_dbg(&pxp->ctrl_gt->i915->drm, "PXP: teardown for suspend/fini");
 	/*
 	 * To ensure synchronous and coherent session teardown completion
 	 * in response to suspend or shutdown triggers, don't use a worker.
@@ -324,6 +329,8 @@ static int __pxp_global_teardown_restart(struct intel_pxp *pxp)
 
 	if (pxp->arb_is_valid)
 		return 0;
+
+	drm_dbg(&pxp->ctrl_gt->i915->drm, "PXP: teardown for restart");
 	/*
 	 * The arb-session is currently inactive and we are doing a reset and restart
 	 * due to a runtime event. Use the worker that was designed for this.
@@ -332,8 +339,11 @@ static int __pxp_global_teardown_restart(struct intel_pxp *pxp)
 
 	timeout = intel_pxp_get_backend_timeout_ms(pxp);
 
-	if (!wait_for_completion_timeout(&pxp->termination, msecs_to_jiffies(timeout)))
+	if (!wait_for_completion_timeout(&pxp->termination, msecs_to_jiffies(timeout))) {
+		drm_dbg(&pxp->ctrl_gt->i915->drm, "PXP: restart backend timed out (%d ms)",
+			timeout);
 		return -ETIMEDOUT;
+	}
 
 	return 0;
 }
@@ -414,10 +424,12 @@ int intel_pxp_start(struct intel_pxp *pxp)
 	int ret = 0;
 
 	ret = intel_pxp_get_readiness_status(pxp, PXP_READINESS_TIMEOUT);
-	if (ret < 0)
+	if (ret < 0) {
+		drm_dbg(&pxp->ctrl_gt->i915->drm, "PXP: tried but not-avail (%d)", ret);
 		return ret;
-	else if (ret > 1)
+	} else if (ret > 1) {
 		return -EIO; /* per UAPI spec, user may retry later */
+	}
 
 	mutex_lock(&pxp->arb_mutex);
 
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c b/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c
index 91e9622c07d0..d81750b9bdda 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c
@@ -40,11 +40,12 @@ void intel_pxp_irq_handler(struct intel_pxp *pxp, u16 iir)
 		   GEN12_DISPLAY_APP_TERMINATED_PER_FW_REQ_INTERRUPT)) {
 		/* immediately mark PXP as inactive on termination */
 		intel_pxp_mark_termination_in_progress(pxp);
-		pxp->session_events |= PXP_TERMINATION_REQUEST | PXP_INVAL_REQUIRED;
+		pxp->session_events |= PXP_TERMINATION_REQUEST | PXP_INVAL_REQUIRED |
+				       PXP_EVENT_TYPE_IRQ;
 	}
 
 	if (iir & GEN12_DISPLAY_STATE_RESET_COMPLETE_INTERRUPT)
-		pxp->session_events |= PXP_TERMINATION_COMPLETE;
+		pxp->session_events |= PXP_TERMINATION_COMPLETE | PXP_EVENT_TYPE_IRQ;
 
 	if (pxp->session_events)
 		queue_work(system_unbound_wq, &pxp->session_work);
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c
index 0a3e66b0265e..091c86e03d1a 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c
@@ -137,8 +137,10 @@ void intel_pxp_terminate(struct intel_pxp *pxp, bool post_invalidation_needs_res
 static void pxp_terminate_complete(struct intel_pxp *pxp)
 {
 	/* Re-create the arb session after teardown handle complete */
-	if (fetch_and_zero(&pxp->hw_state_invalidated))
+	if (fetch_and_zero(&pxp->hw_state_invalidated)) {
+		drm_dbg(&pxp->ctrl_gt->i915->drm, "PXP: creating arb_session after invalidation");
 		pxp_create_arb_session(pxp);
+	}
 
 	complete_all(&pxp->termination);
 }
@@ -157,6 +159,8 @@ static void pxp_session_work(struct work_struct *work)
 	if (!events)
 		return;
 
+	drm_dbg(&gt->i915->drm, "PXP: processing event-flags 0x%08x", events);
+
 	if (events & PXP_INVAL_REQUIRED)
 		intel_pxp_invalidate(pxp);
 
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h
index 7e11fa8034b2..07864b584cf4 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h
@@ -124,6 +124,7 @@ struct intel_pxp {
 #define PXP_TERMINATION_REQUEST  BIT(0)
 #define PXP_TERMINATION_COMPLETE BIT(1)
 #define PXP_INVAL_REQUIRED       BIT(2)
+#define PXP_EVENT_TYPE_IRQ       BIT(3)
 };
 
 #endif /* __INTEL_PXP_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/selftests/i915_syncmap.c b/drivers/gpu/drm/i915/selftests/i915_syncmap.c
index 47f4ae18a1ef..88fa845e9f4a 100644
--- a/drivers/gpu/drm/i915/selftests/i915_syncmap.c
+++ b/drivers/gpu/drm/i915/selftests/i915_syncmap.c
@@ -77,7 +77,7 @@ __sync_print(struct i915_syncmap *p,
 		for_each_set_bit(i, (unsigned long *)&p->bitmap, KSYNCMAP) {
 			buf = __sync_print(__sync_child(p)[i], buf, sz,
 					   depth + 1,
-					   last << 1 | !!(p->bitmap >> (i + 1)),
+					   last << 1 | ((p->bitmap >> (i + 1)) ? 1 : 0),
 					   i);
 		}
 	}
diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c
index 4ddc6d902752..7d41874a49c5 100644
--- a/drivers/gpu/drm/i915/selftests/igt_live_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c
@@ -37,8 +37,9 @@ int igt_live_test_begin(struct igt_live_test *t,
 		}
 
 		for_each_engine(engine, gt, id)
-			t->reset_engine[id] =
-			i915_reset_engine_count(&i915->gpu_error, engine);
+			t->reset_engine[i][id] =
+				i915_reset_engine_count(&i915->gpu_error,
+							engine);
 	}
 
 	t->reset_global = i915_reset_count(&i915->gpu_error);
@@ -66,14 +67,14 @@ int igt_live_test_end(struct igt_live_test *t)
 
 	for_each_gt(gt, i915, i) {
 		for_each_engine(engine, gt, id) {
-			if (t->reset_engine[id] ==
+			if (t->reset_engine[i][id] ==
 			    i915_reset_engine_count(&i915->gpu_error, engine))
 				continue;
 
 			gt_err(gt, "%s(%s): engine '%s' was reset %d times!\n",
 			       t->func, t->name, engine->name,
 			       i915_reset_engine_count(&i915->gpu_error, engine) -
-			       t->reset_engine[id]);
+			       t->reset_engine[i][id]);
 			return -EIO;
 		}
 	}
diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.h b/drivers/gpu/drm/i915/selftests/igt_live_test.h
index 36ed42736c52..83e3ad430922 100644
--- a/drivers/gpu/drm/i915/selftests/igt_live_test.h
+++ b/drivers/gpu/drm/i915/selftests/igt_live_test.h
@@ -7,6 +7,7 @@
 #ifndef IGT_LIVE_TEST_H
 #define IGT_LIVE_TEST_H
 
+#include "gt/intel_gt_defines.h" /* for I915_MAX_GT */
 #include "gt/intel_engine.h" /* for I915_NUM_ENGINES */
 
 struct drm_i915_private;
@@ -17,7 +18,7 @@ struct igt_live_test {
 	const char *name;
 
 	unsigned int reset_global;
-	unsigned int reset_engine[I915_NUM_ENGINES];
+	unsigned int reset_engine[I915_MAX_GT][I915_NUM_ENGINES];
 };
 
 /*
diff --git a/drivers/gpu/drm/imagination/pvr_device.h b/drivers/gpu/drm/imagination/pvr_device.h
index 2ca7e535799f..ecdd5767d8ef 100644
--- a/drivers/gpu/drm/imagination/pvr_device.h
+++ b/drivers/gpu/drm/imagination/pvr_device.h
@@ -193,13 +193,14 @@ struct pvr_device {
 	 * @queues: Queue-related fields.
 	 */
 	struct {
-		/** @active: Active queue list. */
+		/** @queues.active: Active queue list. */
 		struct list_head active;
 
-		/** @idle: Idle queue list. */
+		/** @queues.idle: Idle queue list. */
 		struct list_head idle;
 
-		/** @lock: Lock protecting access to the active/idle lists. */
+		/** @queues.lock: Lock protecting access to the active/idle
+		 *  lists. */
 		struct mutex lock;
 	} queues;
 
@@ -207,18 +208,18 @@ struct pvr_device {
 	 * @watchdog: Watchdog for communications with firmware.
 	 */
 	struct {
-		/** @work: Work item for watchdog callback. */
+		/** @watchdog.work: Work item for watchdog callback. */
 		struct delayed_work work;
 
 		/**
-		 * @old_kccb_cmds_executed: KCCB command execution count at last
-		 * watchdog poll.
+		 * @watchdog.old_kccb_cmds_executed: KCCB command execution
+		 * count at last watchdog poll.
 		 */
 		u32 old_kccb_cmds_executed;
 
 		/**
-		 * @kccb_stall_count: Number of watchdog polls KCCB has been
-		 * stalled for.
+		 * @watchdog.kccb_stall_count: Number of watchdog polls
+		 * KCCB has been stalled for.
 		 */
 		u32 kccb_stall_count;
 	} watchdog;
@@ -227,43 +228,46 @@ struct pvr_device {
 	 * @kccb: Circular buffer for communications with firmware.
 	 */
 	struct {
-		/** @ccb: Kernel CCB. */
+		/** @kccb.ccb: Kernel CCB. */
 		struct pvr_ccb ccb;
 
-		/** @rtn_q: Waitqueue for KCCB command return waiters. */
+		/** @kccb.rtn_q: Waitqueue for KCCB command return waiters. */
 		wait_queue_head_t rtn_q;
 
-		/** @rtn_obj: Object representing KCCB return slots. */
+		/** @kccb.rtn_obj: Object representing KCCB return slots. */
 		struct pvr_fw_object *rtn_obj;
 
 		/**
-		 * @rtn: Pointer to CPU mapping of KCCB return slots. Must be
-		 * accessed by READ_ONCE()/WRITE_ONCE().
+		 * @kccb.rtn: Pointer to CPU mapping of KCCB return slots.
+		 * Must be accessed by READ_ONCE()/WRITE_ONCE().
 		 */
 		u32 *rtn;
 
-		/** @slot_count: Total number of KCCB slots available. */
+		/** @kccb.slot_count: Total number of KCCB slots available. */
 		u32 slot_count;
 
-		/** @reserved_count: Number of KCCB slots reserved for future use. */
+		/** @kccb.reserved_count: Number of KCCB slots reserved for
+		 *  future use. */
 		u32 reserved_count;
 
 		/**
-		 * @waiters: List of KCCB slot waiters.
+		 * @kccb.waiters: List of KCCB slot waiters.
 		 */
 		struct list_head waiters;
 
-		/** @fence_ctx: KCCB fence context. */
+		/** @kccb.fence_ctx: KCCB fence context. */
 		struct {
-			/** @id: KCCB fence context ID allocated with dma_fence_context_alloc(). */
+			/** @kccb.fence_ctx.id: KCCB fence context ID
+			 *  allocated with dma_fence_context_alloc(). */
 			u64 id;
 
-			/** @seqno: Sequence number incremented each time a fence is created. */
+			/** @kccb.fence_ctx.seqno: Sequence number incremented
+			 *  each time a fence is created. */
 			atomic_t seqno;
 
 			/**
-			 * @lock: Lock used to synchronize access to fences allocated by this
-			 * context.
+			 * @kccb.fence_ctx.lock: Lock used to synchronize
+			 * access to fences allocated by this context.
 			 */
 			spinlock_t lock;
 		} fence_ctx;
diff --git a/drivers/gpu/drm/imagination/pvr_hwrt.c b/drivers/gpu/drm/imagination/pvr_hwrt.c
index c4213c18489e..54f88d6c01e5 100644
--- a/drivers/gpu/drm/imagination/pvr_hwrt.c
+++ b/drivers/gpu/drm/imagination/pvr_hwrt.c
@@ -458,7 +458,7 @@ pvr_hwrt_dataset_create(struct pvr_file *pvr_file,
 			struct drm_pvr_ioctl_create_hwrt_dataset_args *args)
 {
 	struct pvr_hwrt_dataset *hwrt;
-	int err;
+	int err, i = 0;
 
 	/* Create and fill out the kernel structure */
 	hwrt = kzalloc(sizeof(*hwrt), GFP_KERNEL);
@@ -466,35 +466,36 @@ pvr_hwrt_dataset_create(struct pvr_file *pvr_file,
 	if (!hwrt)
 		return ERR_PTR(-ENOMEM);
 
-	kref_init(&hwrt->ref_count);
-
 	err = hwrt_init_kernel_structure(pvr_file, args, hwrt);
 	if (err < 0)
 		goto err_free;
 
 	err = hwrt_init_common_fw_structure(pvr_file, args, hwrt);
 	if (err < 0)
-		goto err_free;
+		goto err_fini_kernel_structure;
 
-	for (int i = 0; i < ARRAY_SIZE(hwrt->data); i++) {
+	for (; i < ARRAY_SIZE(hwrt->data); i++) {
 		err = hwrt_data_init_fw_structure(pvr_file, hwrt, args,
 						  &args->rt_data_args[i],
 						  &hwrt->data[i]);
-		if (err < 0) {
-			i--;
-			/* Destroy already created structures. */
-			for (; i >= 0; i--)
-				hwrt_data_fini_fw_structure(hwrt, i);
-			goto err_free;
-		}
+		if (err < 0)
+			goto err_fini_data_structures;
 
 		hwrt->data[i].hwrt_dataset = hwrt;
 	}
 
+	kref_init(&hwrt->ref_count);
 	return hwrt;
 
+err_fini_data_structures:
+	while (--i >= 0)
+		hwrt_data_fini_fw_structure(hwrt, i);
+
+err_fini_kernel_structure:
+	hwrt_fini_kernel_structure(hwrt);
+
 err_free:
-	pvr_hwrt_dataset_put(hwrt);
+	kfree(hwrt);
 
 	return ERR_PTR(err);
 }
diff --git a/drivers/gpu/drm/imagination/pvr_vm.c b/drivers/gpu/drm/imagination/pvr_vm.c
index f42345fbe4bf..e59517ba039e 100644
--- a/drivers/gpu/drm/imagination/pvr_vm.c
+++ b/drivers/gpu/drm/imagination/pvr_vm.c
@@ -225,7 +225,7 @@ pvr_vm_bind_op_map_init(struct pvr_vm_bind_op *bind_op,
 			u64 device_addr, u64 size)
 {
 	struct drm_gem_object *obj = gem_from_pvr_gem(pvr_obj);
-	const bool is_user = vm_ctx == vm_ctx->pvr_dev->kernel_vm_ctx;
+	const bool is_user = vm_ctx != vm_ctx->pvr_dev->kernel_vm_ctx;
 	const u64 pvr_obj_size = pvr_gem_object_size(pvr_obj);
 	struct sg_table *sgt;
 	u64 offset_plus_size;
@@ -556,23 +556,12 @@ pvr_vm_create_context(struct pvr_device *pvr_dev, bool is_userspace_context)
 	if (!vm_ctx)
 		return ERR_PTR(-ENOMEM);
 
-	drm_gem_private_object_init(&pvr_dev->base, &vm_ctx->dummy_gem, 0);
-
 	vm_ctx->pvr_dev = pvr_dev;
-	kref_init(&vm_ctx->ref_count);
-	mutex_init(&vm_ctx->lock);
-
-	drm_gpuvm_init(&vm_ctx->gpuvm_mgr,
-		       is_userspace_context ? "PowerVR-user-VM" : "PowerVR-FW-VM",
-		       0, &pvr_dev->base, &vm_ctx->dummy_gem,
-		       0, 1ULL << device_addr_bits, 0, 0, &pvr_vm_gpuva_ops);
 
 	vm_ctx->mmu_ctx = pvr_mmu_context_create(pvr_dev);
-	err = PTR_ERR_OR_ZERO(&vm_ctx->mmu_ctx);
-	if (err) {
-		vm_ctx->mmu_ctx = NULL;
-		goto err_put_ctx;
-	}
+	err = PTR_ERR_OR_ZERO(vm_ctx->mmu_ctx);
+	if (err)
+		goto err_free;
 
 	if (is_userspace_context) {
 		err = pvr_fw_object_create(pvr_dev, sizeof(struct rogue_fwif_fwmemcontext),
@@ -583,13 +572,22 @@ pvr_vm_create_context(struct pvr_device *pvr_dev, bool is_userspace_context)
 			goto err_page_table_destroy;
 	}
 
+	drm_gem_private_object_init(&pvr_dev->base, &vm_ctx->dummy_gem, 0);
+	drm_gpuvm_init(&vm_ctx->gpuvm_mgr,
+		       is_userspace_context ? "PowerVR-user-VM" : "PowerVR-FW-VM",
+		       0, &pvr_dev->base, &vm_ctx->dummy_gem,
+		       0, 1ULL << device_addr_bits, 0, 0, &pvr_vm_gpuva_ops);
+
+	mutex_init(&vm_ctx->lock);
+	kref_init(&vm_ctx->ref_count);
+
 	return vm_ctx;
 
 err_page_table_destroy:
 	pvr_mmu_context_destroy(vm_ctx->mmu_ctx);
 
-err_put_ctx:
-	pvr_vm_context_put(vm_ctx);
+err_free:
+	kfree(vm_ctx);
 
 	return ERR_PTR(err);
 }
diff --git a/drivers/gpu/drm/imagination/pvr_vm_mips.c b/drivers/gpu/drm/imagination/pvr_vm_mips.c
index 2bc7181a4c3e..b7fef3c797e6 100644
--- a/drivers/gpu/drm/imagination/pvr_vm_mips.c
+++ b/drivers/gpu/drm/imagination/pvr_vm_mips.c
@@ -152,8 +152,8 @@ pvr_vm_mips_map(struct pvr_device *pvr_dev, struct pvr_fw_object *fw_obj)
 	u64 end;
 	u32 cache_policy;
 	u32 pte_flags;
-	u32 start_pfn;
-	u32 end_pfn;
+	s32 start_pfn;
+	s32 end_pfn;
 	s32 pfn;
 	int err;
 
@@ -201,7 +201,7 @@ pvr_vm_mips_map(struct pvr_device *pvr_dev, struct pvr_fw_object *fw_obj)
 	return 0;
 
 err_unmap_pages:
-	for (; pfn >= start_pfn; pfn--)
+	while (--pfn >= start_pfn)
 		WRITE_ONCE(mips_data->pt[pfn], 0);
 
 	pvr_mmu_flush_request_all(pvr_dev);
diff --git a/drivers/gpu/drm/lima/lima_ctx.c b/drivers/gpu/drm/lima/lima_ctx.c
index 891d5cd5019a..8389f2d7d021 100644
--- a/drivers/gpu/drm/lima/lima_ctx.c
+++ b/drivers/gpu/drm/lima/lima_ctx.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 OR MIT
 /* Copyright 2018-2019 Qiang Yu <yuq825@gmail.com> */
 
+#include <linux/pid.h>
 #include <linux/slab.h>
 
 #include "lima_device.h"
diff --git a/drivers/gpu/drm/loongson/lsdc_i2c.c b/drivers/gpu/drm/loongson/lsdc_i2c.c
index 9625d0b1d0b4..ce90c25536d2 100644
--- a/drivers/gpu/drm/loongson/lsdc_i2c.c
+++ b/drivers/gpu/drm/loongson/lsdc_i2c.c
@@ -154,7 +154,6 @@ int lsdc_create_i2c_chan(struct drm_device *ddev,
 	adapter = &li2c->adapter;
 	adapter->algo_data = &li2c->bit;
 	adapter->owner = THIS_MODULE;
-	adapter->class = I2C_CLASS_DDC;
 	adapter->dev.parent = ddev->dev;
 	adapter->nr = -1;
 
diff --git a/drivers/gpu/drm/mediatek/Makefile b/drivers/gpu/drm/mediatek/Makefile
index d4d193f60271..5e4436403b8d 100644
--- a/drivers/gpu/drm/mediatek/Makefile
+++ b/drivers/gpu/drm/mediatek/Makefile
@@ -16,7 +16,8 @@ mediatek-drm-y := mtk_disp_aal.o \
 		  mtk_dsi.o \
 		  mtk_dpi.o \
 		  mtk_ethdr.o \
-		  mtk_mdp_rdma.o
+		  mtk_mdp_rdma.o \
+		  mtk_padding.o
 
 obj-$(CONFIG_DRM_MEDIATEK) += mediatek-drm.o
 
diff --git a/drivers/gpu/drm/mediatek/mtk_cec.c b/drivers/gpu/drm/mediatek/mtk_cec.c
index f47f417d8ba6..8519e9bade36 100644
--- a/drivers/gpu/drm/mediatek/mtk_cec.c
+++ b/drivers/gpu/drm/mediatek/mtk_cec.c
@@ -185,7 +185,6 @@ static int mtk_cec_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct mtk_cec *cec;
-	struct resource *res;
 	int ret;
 
 	cec = devm_kzalloc(dev, sizeof(*cec), GFP_KERNEL);
@@ -195,8 +194,7 @@ static int mtk_cec_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, cec);
 	spin_lock_init(&cec->lock);
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	cec->regs = devm_ioremap_resource(dev, res);
+	cec->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(cec->regs)) {
 		ret = PTR_ERR(cec->regs);
 		dev_err(dev, "Failed to ioremap cec: %d\n", ret);
diff --git a/drivers/gpu/drm/mediatek/mtk_disp_aal.c b/drivers/gpu/drm/mediatek/mtk_disp_aal.c
index 2209159d8855..40fe403086c3 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_aal.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_aal.c
@@ -168,7 +168,6 @@ static int mtk_disp_aal_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct mtk_disp_aal *priv;
-	struct resource *res;
 	int ret;
 
 	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
@@ -181,8 +180,7 @@ static int mtk_disp_aal_probe(struct platform_device *pdev)
 		return PTR_ERR(priv->clk);
 	}
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	priv->regs = devm_ioremap_resource(dev, res);
+	priv->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(priv->regs)) {
 		dev_err(dev, "failed to ioremap aal\n");
 		return PTR_ERR(priv->regs);
diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ccorr.c b/drivers/gpu/drm/mediatek/mtk_disp_ccorr.c
index 4234ff7485e8..465cddce0d32 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_ccorr.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_ccorr.c
@@ -153,7 +153,6 @@ static int mtk_disp_ccorr_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct mtk_disp_ccorr *priv;
-	struct resource *res;
 	int ret;
 
 	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
@@ -166,8 +165,7 @@ static int mtk_disp_ccorr_probe(struct platform_device *pdev)
 		return PTR_ERR(priv->clk);
 	}
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	priv->regs = devm_ioremap_resource(dev, res);
+	priv->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(priv->regs)) {
 		dev_err(dev, "failed to ioremap ccorr\n");
 		return PTR_ERR(priv->regs);
diff --git a/drivers/gpu/drm/mediatek/mtk_disp_drv.h b/drivers/gpu/drm/mediatek/mtk_disp_drv.h
index 1311562d25cc..74fa56339383 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_drv.h
+++ b/drivers/gpu/drm/mediatek/mtk_disp_drv.h
@@ -110,6 +110,8 @@ void mtk_ovl_adaptor_connect(struct device *dev, struct device *mmsys_dev,
 			     unsigned int next);
 void mtk_ovl_adaptor_disconnect(struct device *dev, struct device *mmsys_dev,
 				unsigned int next);
+int mtk_ovl_adaptor_power_on(struct device *dev);
+void mtk_ovl_adaptor_power_off(struct device *dev);
 int mtk_ovl_adaptor_clk_enable(struct device *dev);
 void mtk_ovl_adaptor_clk_disable(struct device *dev);
 void mtk_ovl_adaptor_config(struct device *dev, unsigned int w,
@@ -151,6 +153,8 @@ void mtk_rdma_disable_vblank(struct device *dev);
 const u32 *mtk_rdma_get_formats(struct device *dev);
 size_t mtk_rdma_get_num_formats(struct device *dev);
 
+int mtk_mdp_rdma_power_on(struct device *dev);
+void mtk_mdp_rdma_power_off(struct device *dev);
 int mtk_mdp_rdma_clk_enable(struct device *dev);
 void mtk_mdp_rdma_clk_disable(struct device *dev);
 void mtk_mdp_rdma_start(struct device *dev, struct cmdq_pkt *cmdq_pkt);
@@ -160,4 +164,8 @@ void mtk_mdp_rdma_config(struct device *dev, struct mtk_mdp_rdma_cfg *cfg,
 const u32 *mtk_mdp_rdma_get_formats(struct device *dev);
 size_t mtk_mdp_rdma_get_num_formats(struct device *dev);
 
+int mtk_padding_clk_enable(struct device *dev);
+void mtk_padding_clk_disable(struct device *dev);
+void mtk_padding_start(struct device *dev);
+void mtk_padding_stop(struct device *dev);
 #endif
diff --git a/drivers/gpu/drm/mediatek/mtk_disp_gamma.c b/drivers/gpu/drm/mediatek/mtk_disp_gamma.c
index f81dc34c9c3e..c1bc8b00d938 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_gamma.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_gamma.c
@@ -203,7 +203,7 @@ void mtk_gamma_set(struct device *dev, struct drm_crtc_state *state)
 	/* Disable RELAY mode to pass the processed image */
 	cfg_val &= ~GAMMA_RELAY_MODE;
 
-	cfg_val = readl(gamma->regs + DISP_GAMMA_CFG);
+	writel(cfg_val, gamma->regs + DISP_GAMMA_CFG);
 }
 
 void mtk_gamma_config(struct device *dev, unsigned int w,
diff --git a/drivers/gpu/drm/mediatek/mtk_disp_merge.c b/drivers/gpu/drm/mediatek/mtk_disp_merge.c
index e525a6b9e5b0..22f768d923d5 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_merge.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_merge.c
@@ -103,7 +103,7 @@ void mtk_merge_stop_cmdq(struct device *dev, struct cmdq_pkt *cmdq_pkt)
 	mtk_ddp_write(cmdq_pkt, 0, &priv->cmdq_reg, priv->regs,
 		      DISP_REG_MERGE_CTRL);
 
-	if (priv->async_clk)
+	if (!cmdq_pkt && priv->async_clk)
 		reset_control_reset(priv->reset_ctl);
 }
 
diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c
index 3fdef3ad4ffd..12a37f740bf4 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c
@@ -27,13 +27,14 @@
 #define MTK_OVL_ADAPTOR_LAYER_NUM 4
 
 enum mtk_ovl_adaptor_comp_type {
-	OVL_ADAPTOR_TYPE_RDMA = 0,
-	OVL_ADAPTOR_TYPE_MERGE,
 	OVL_ADAPTOR_TYPE_ETHDR,
+	OVL_ADAPTOR_TYPE_MDP_RDMA,
+	OVL_ADAPTOR_TYPE_MERGE,
 	OVL_ADAPTOR_TYPE_NUM,
 };
 
 enum mtk_ovl_adaptor_comp_id {
+	OVL_ADAPTOR_ETHDR0,
 	OVL_ADAPTOR_MDP_RDMA0,
 	OVL_ADAPTOR_MDP_RDMA1,
 	OVL_ADAPTOR_MDP_RDMA2,
@@ -46,13 +47,14 @@ enum mtk_ovl_adaptor_comp_id {
 	OVL_ADAPTOR_MERGE1,
 	OVL_ADAPTOR_MERGE2,
 	OVL_ADAPTOR_MERGE3,
-	OVL_ADAPTOR_ETHDR0,
 	OVL_ADAPTOR_ID_MAX
 };
 
 struct ovl_adaptor_comp_match {
 	enum mtk_ovl_adaptor_comp_type type;
+	enum mtk_ddp_comp_id comp_id;
 	int alias_id;
+	const struct mtk_ddp_comp_funcs *funcs;
 };
 
 struct mtk_disp_ovl_adaptor {
@@ -62,25 +64,44 @@ struct mtk_disp_ovl_adaptor {
 };
 
 static const char * const private_comp_stem[OVL_ADAPTOR_TYPE_NUM] = {
-	[OVL_ADAPTOR_TYPE_RDMA]		= "vdo1-rdma",
-	[OVL_ADAPTOR_TYPE_MERGE]	= "merge",
 	[OVL_ADAPTOR_TYPE_ETHDR]	= "ethdr",
+	[OVL_ADAPTOR_TYPE_MDP_RDMA]	= "vdo1-rdma",
+	[OVL_ADAPTOR_TYPE_MERGE]	= "merge",
+};
+
+static const struct mtk_ddp_comp_funcs ethdr = {
+	.clk_enable = mtk_ethdr_clk_enable,
+	.clk_disable = mtk_ethdr_clk_disable,
+	.start = mtk_ethdr_start,
+	.stop = mtk_ethdr_stop,
+};
+
+static const struct mtk_ddp_comp_funcs merge = {
+	.clk_enable = mtk_merge_clk_enable,
+	.clk_disable = mtk_merge_clk_disable,
+};
+
+static const struct mtk_ddp_comp_funcs rdma = {
+	.power_on = mtk_mdp_rdma_power_on,
+	.power_off = mtk_mdp_rdma_power_off,
+	.clk_enable = mtk_mdp_rdma_clk_enable,
+	.clk_disable = mtk_mdp_rdma_clk_disable,
 };
 
 static const struct ovl_adaptor_comp_match comp_matches[OVL_ADAPTOR_ID_MAX] = {
-	[OVL_ADAPTOR_MDP_RDMA0]	= { OVL_ADAPTOR_TYPE_RDMA, 0 },
-	[OVL_ADAPTOR_MDP_RDMA1]	= { OVL_ADAPTOR_TYPE_RDMA, 1 },
-	[OVL_ADAPTOR_MDP_RDMA2]	= { OVL_ADAPTOR_TYPE_RDMA, 2 },
-	[OVL_ADAPTOR_MDP_RDMA3]	= { OVL_ADAPTOR_TYPE_RDMA, 3 },
-	[OVL_ADAPTOR_MDP_RDMA4]	= { OVL_ADAPTOR_TYPE_RDMA, 4 },
-	[OVL_ADAPTOR_MDP_RDMA5]	= { OVL_ADAPTOR_TYPE_RDMA, 5 },
-	[OVL_ADAPTOR_MDP_RDMA6]	= { OVL_ADAPTOR_TYPE_RDMA, 6 },
-	[OVL_ADAPTOR_MDP_RDMA7]	= { OVL_ADAPTOR_TYPE_RDMA, 7 },
-	[OVL_ADAPTOR_MERGE0]	= { OVL_ADAPTOR_TYPE_MERGE, 1 },
-	[OVL_ADAPTOR_MERGE1]	= { OVL_ADAPTOR_TYPE_MERGE, 2 },
-	[OVL_ADAPTOR_MERGE2]	= { OVL_ADAPTOR_TYPE_MERGE, 3 },
-	[OVL_ADAPTOR_MERGE3]	= { OVL_ADAPTOR_TYPE_MERGE, 4 },
-	[OVL_ADAPTOR_ETHDR0]	= { OVL_ADAPTOR_TYPE_ETHDR, 0 },
+	[OVL_ADAPTOR_ETHDR0] = { OVL_ADAPTOR_TYPE_ETHDR, DDP_COMPONENT_ETHDR_MIXER, 0, &ethdr },
+	[OVL_ADAPTOR_MDP_RDMA0] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA0, 0, &rdma },
+	[OVL_ADAPTOR_MDP_RDMA1] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA1, 1, &rdma },
+	[OVL_ADAPTOR_MDP_RDMA2] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA2, 2, &rdma },
+	[OVL_ADAPTOR_MDP_RDMA3] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA3, 3, &rdma },
+	[OVL_ADAPTOR_MDP_RDMA4] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA4, 4, &rdma },
+	[OVL_ADAPTOR_MDP_RDMA5] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA5, 5, &rdma },
+	[OVL_ADAPTOR_MDP_RDMA6] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA6, 6, &rdma },
+	[OVL_ADAPTOR_MDP_RDMA7] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA7, 7, &rdma },
+	[OVL_ADAPTOR_MERGE0] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE1, 1, &merge },
+	[OVL_ADAPTOR_MERGE1] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE2, 2, &merge },
+	[OVL_ADAPTOR_MERGE2] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE3, 3, &merge },
+	[OVL_ADAPTOR_MERGE3] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE4, 4, &merge },
 };
 
 void mtk_ovl_adaptor_layer_config(struct device *dev, unsigned int idx,
@@ -172,68 +193,112 @@ void mtk_ovl_adaptor_config(struct device *dev, unsigned int w,
 
 void mtk_ovl_adaptor_start(struct device *dev)
 {
+	int i;
 	struct mtk_disp_ovl_adaptor *ovl_adaptor = dev_get_drvdata(dev);
 
-	mtk_ethdr_start(ovl_adaptor->ovl_adaptor_comp[OVL_ADAPTOR_ETHDR0]);
+	for (i = 0; i < OVL_ADAPTOR_ID_MAX; i++) {
+		if (!ovl_adaptor->ovl_adaptor_comp[i] ||
+		    !comp_matches[i].funcs->start)
+			continue;
+
+		comp_matches[i].funcs->start(ovl_adaptor->ovl_adaptor_comp[i]);
+	}
 }
 
 void mtk_ovl_adaptor_stop(struct device *dev)
 {
+	int i;
 	struct mtk_disp_ovl_adaptor *ovl_adaptor = dev_get_drvdata(dev);
 
-	mtk_ethdr_stop(ovl_adaptor->ovl_adaptor_comp[OVL_ADAPTOR_ETHDR0]);
+	for (i = 0; i < OVL_ADAPTOR_ID_MAX; i++) {
+		if (!ovl_adaptor->ovl_adaptor_comp[i] ||
+		    !comp_matches[i].funcs->stop)
+			continue;
+
+		comp_matches[i].funcs->stop(ovl_adaptor->ovl_adaptor_comp[i]);
+	}
 }
 
-int mtk_ovl_adaptor_clk_enable(struct device *dev)
+/**
+ * power_off - Power off the devices in OVL adaptor
+ * @dev: Device to be powered off
+ * @num: Number of the devices to be powered off
+ *
+ * Calls the .power_off() ovl_adaptor component callback if it is present.
+ */
+static inline void power_off(struct device *dev, int num)
 {
 	struct mtk_disp_ovl_adaptor *ovl_adaptor = dev_get_drvdata(dev);
-	struct device *comp;
-	int ret;
 	int i;
 
-	for (i = 0; i < OVL_ADAPTOR_MERGE0; i++) {
-		comp = ovl_adaptor->ovl_adaptor_comp[i];
-		ret = pm_runtime_get_sync(comp);
+	if (num > OVL_ADAPTOR_ID_MAX)
+		num = OVL_ADAPTOR_ID_MAX;
+
+	for (i = num - 1; i >= 0; i--) {
+		if (!ovl_adaptor->ovl_adaptor_comp[i] ||
+		    !comp_matches[i].funcs->power_off)
+			continue;
+
+		comp_matches[i].funcs->power_off(ovl_adaptor->ovl_adaptor_comp[i]);
+	}
+}
+
+/**
+ * mtk_ovl_adaptor_power_on - Power on the devices in OVL adaptor
+ * @dev: Device to be powered on
+ *
+ * Different from OVL, OVL adaptor is a pseudo device so
+ * we didn't define it in the device tree, pm_runtime_resume_and_get()
+ * called by .atomic_enable() power on no device in OVL adaptor,
+ * we have to implement a function to do the job instead.
+ *
+ * Return: Zero for success or negative number for failure.
+ */
+int mtk_ovl_adaptor_power_on(struct device *dev)
+{
+	struct mtk_disp_ovl_adaptor *ovl_adaptor = dev_get_drvdata(dev);
+	int i, ret;
+
+	for (i = 0; i < OVL_ADAPTOR_ID_MAX; i++) {
+		if (!ovl_adaptor->ovl_adaptor_comp[i] ||
+		    !comp_matches[i].funcs->power_on)
+			continue;
+
+		ret = comp_matches[i].funcs->power_on(ovl_adaptor->ovl_adaptor_comp[i]);
 		if (ret < 0) {
 			dev_err(dev, "Failed to enable power domain %d, err %d\n", i, ret);
-			goto pwr_err;
+			power_off(dev, i);
+			return ret;
 		}
 	}
+	return 0;
+}
+
+void mtk_ovl_adaptor_power_off(struct device *dev)
+{
+	power_off(dev, OVL_ADAPTOR_ID_MAX);
+}
+
+int mtk_ovl_adaptor_clk_enable(struct device *dev)
+{
+	struct mtk_disp_ovl_adaptor *ovl_adaptor = dev_get_drvdata(dev);
+	struct device *comp;
+	int ret;
+	int i;
 
 	for (i = 0; i < OVL_ADAPTOR_ID_MAX; i++) {
 		comp = ovl_adaptor->ovl_adaptor_comp[i];
-
-		if (i < OVL_ADAPTOR_MERGE0)
-			ret = mtk_mdp_rdma_clk_enable(comp);
-		else if (i < OVL_ADAPTOR_ETHDR0)
-			ret = mtk_merge_clk_enable(comp);
-		else
-			ret = mtk_ethdr_clk_enable(comp);
+		if (!comp || !comp_matches[i].funcs->clk_enable)
+			continue;
+		ret = comp_matches[i].funcs->clk_enable(comp);
 		if (ret) {
 			dev_err(dev, "Failed to enable clock %d, err %d\n", i, ret);
-			goto clk_err;
+			while (--i >= 0)
+				comp_matches[i].funcs->clk_disable(comp);
+			return ret;
 		}
 	}
-
-	return ret;
-
-clk_err:
-	while (--i >= 0) {
-		comp = ovl_adaptor->ovl_adaptor_comp[i];
-		if (i < OVL_ADAPTOR_MERGE0)
-			mtk_mdp_rdma_clk_disable(comp);
-		else if (i < OVL_ADAPTOR_ETHDR0)
-			mtk_merge_clk_disable(comp);
-		else
-			mtk_ethdr_clk_disable(comp);
-	}
-	i = OVL_ADAPTOR_MERGE0;
-
-pwr_err:
-	while (--i >= 0)
-		pm_runtime_put(ovl_adaptor->ovl_adaptor_comp[i]);
-
-	return ret;
+	return 0;
 }
 
 void mtk_ovl_adaptor_clk_disable(struct device *dev)
@@ -244,15 +309,11 @@ void mtk_ovl_adaptor_clk_disable(struct device *dev)
 
 	for (i = 0; i < OVL_ADAPTOR_ID_MAX; i++) {
 		comp = ovl_adaptor->ovl_adaptor_comp[i];
-
-		if (i < OVL_ADAPTOR_MERGE0) {
-			mtk_mdp_rdma_clk_disable(comp);
+		if (!comp || !comp_matches[i].funcs->clk_disable)
+			continue;
+		comp_matches[i].funcs->clk_disable(comp);
+		if (i < OVL_ADAPTOR_MERGE0)
 			pm_runtime_put(comp);
-		} else if (i < OVL_ADAPTOR_ETHDR0) {
-			mtk_merge_clk_disable(comp);
-		} else {
-			mtk_ethdr_clk_disable(comp);
-		}
 	}
 }
 
@@ -314,40 +375,31 @@ size_t mtk_ovl_adaptor_get_num_formats(struct device *dev)
 
 void mtk_ovl_adaptor_add_comp(struct device *dev, struct mtk_mutex *mutex)
 {
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA0);
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA1);
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA2);
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA3);
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA4);
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA5);
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA6);
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA7);
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_MERGE1);
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_MERGE2);
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_MERGE3);
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_MERGE4);
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_ETHDR_MIXER);
+	int i;
+	struct mtk_disp_ovl_adaptor *ovl_adaptor = dev_get_drvdata(dev);
+
+	for (i = 0; i < OVL_ADAPTOR_ID_MAX; i++) {
+		if (!ovl_adaptor->ovl_adaptor_comp[i])
+			continue;
+		mtk_mutex_add_comp(mutex, comp_matches[i].comp_id);
+	}
 }
 
 void mtk_ovl_adaptor_remove_comp(struct device *dev, struct mtk_mutex *mutex)
 {
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA0);
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA1);
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA2);
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA3);
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA4);
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA5);
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA6);
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA7);
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MERGE1);
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MERGE2);
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MERGE3);
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MERGE4);
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_ETHDR_MIXER);
+	int i;
+	struct mtk_disp_ovl_adaptor *ovl_adaptor = dev_get_drvdata(dev);
+
+	for (i = 0; i < OVL_ADAPTOR_ID_MAX; i++) {
+		if (!ovl_adaptor->ovl_adaptor_comp[i])
+			continue;
+		mtk_mutex_remove_comp(mutex, comp_matches[i].comp_id);
+	}
 }
 
 void mtk_ovl_adaptor_connect(struct device *dev, struct device *mmsys_dev, unsigned int next)
 {
+	mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_ETHDR_MIXER, next);
 	mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_MDP_RDMA0, DDP_COMPONENT_MERGE1);
 	mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_MDP_RDMA1, DDP_COMPONENT_MERGE1);
 	mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_MDP_RDMA2, DDP_COMPONENT_MERGE2);
@@ -355,11 +407,11 @@ void mtk_ovl_adaptor_connect(struct device *dev, struct device *mmsys_dev, unsig
 	mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_MERGE2, DDP_COMPONENT_ETHDR_MIXER);
 	mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_MERGE3, DDP_COMPONENT_ETHDR_MIXER);
 	mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_MERGE4, DDP_COMPONENT_ETHDR_MIXER);
-	mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_ETHDR_MIXER, next);
 }
 
 void mtk_ovl_adaptor_disconnect(struct device *dev, struct device *mmsys_dev, unsigned int next)
 {
+	mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_ETHDR_MIXER, next);
 	mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_MDP_RDMA0, DDP_COMPONENT_MERGE1);
 	mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_MDP_RDMA1, DDP_COMPONENT_MERGE1);
 	mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_MDP_RDMA2, DDP_COMPONENT_MERGE2);
@@ -367,7 +419,6 @@ void mtk_ovl_adaptor_disconnect(struct device *dev, struct device *mmsys_dev, un
 	mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_MERGE2, DDP_COMPONENT_ETHDR_MIXER);
 	mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_MERGE3, DDP_COMPONENT_ETHDR_MIXER);
 	mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_MERGE4, DDP_COMPONENT_ETHDR_MIXER);
-	mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_ETHDR_MIXER, next);
 }
 
 static int ovl_adaptor_comp_get_id(struct device *dev, struct device_node *node,
@@ -386,17 +437,10 @@ static int ovl_adaptor_comp_get_id(struct device *dev, struct device_node *node,
 }
 
 static const struct of_device_id mtk_ovl_adaptor_comp_dt_ids[] = {
-	{
-		.compatible = "mediatek,mt8195-vdo1-rdma",
-		.data = (void *)OVL_ADAPTOR_TYPE_RDMA,
-	}, {
-		.compatible = "mediatek,mt8195-disp-merge",
-		.data = (void *)OVL_ADAPTOR_TYPE_MERGE,
-	}, {
-		.compatible = "mediatek,mt8195-disp-ethdr",
-		.data = (void *)OVL_ADAPTOR_TYPE_ETHDR,
-	},
-	{},
+	{ .compatible = "mediatek,mt8195-disp-ethdr", .data = (void *)OVL_ADAPTOR_TYPE_ETHDR },
+	{ .compatible = "mediatek,mt8195-disp-merge", .data = (void *)OVL_ADAPTOR_TYPE_MERGE },
+	{ .compatible = "mediatek,mt8195-vdo1-rdma", .data = (void *)OVL_ADAPTOR_TYPE_MDP_RDMA },
+	{ /* sentinel */ }
 };
 
 static int compare_of(struct device *dev, void *data)
diff --git a/drivers/gpu/drm/mediatek/mtk_dp.c b/drivers/gpu/drm/mediatek/mtk_dp.c
index e4c16ba9902d..2136a596efa1 100644
--- a/drivers/gpu/drm/mediatek/mtk_dp.c
+++ b/drivers/gpu/drm/mediatek/mtk_dp.c
@@ -2818,3 +2818,4 @@ MODULE_AUTHOR("Markus Schneider-Pargmann <msp@baylibre.com>");
 MODULE_AUTHOR("Bo-Chen Chen <rex-bc.chen@mediatek.com>");
 MODULE_DESCRIPTION("MediaTek DisplayPort Driver");
 MODULE_LICENSE("GPL");
+MODULE_SOFTDEP("pre: phy_mtk_dp");
diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c b/drivers/gpu/drm/mediatek/mtk_dpi.c
index 4e3d9f7b4d8c..beb7d9d08e97 100644
--- a/drivers/gpu/drm/mediatek/mtk_dpi.c
+++ b/drivers/gpu/drm/mediatek/mtk_dpi.c
@@ -966,20 +966,6 @@ static const struct mtk_dpi_conf mt8186_conf = {
 	.csc_enable_bit = CSC_ENABLE,
 };
 
-static const struct mtk_dpi_conf mt8188_dpintf_conf = {
-	.cal_factor = mt8195_dpintf_calculate_factor,
-	.max_clock_khz = 600000,
-	.output_fmts = mt8195_output_fmts,
-	.num_output_fmts = ARRAY_SIZE(mt8195_output_fmts),
-	.pixels_per_iter = 4,
-	.input_2pixel = false,
-	.dimension_mask = DPINTF_HPW_MASK,
-	.hvsize_mask = DPINTF_HSIZE_MASK,
-	.channel_swap_shift = DPINTF_CH_SWAP,
-	.yuv422_en_bit = DPINTF_YUV422_EN,
-	.csc_enable_bit = DPINTF_CSC_ENABLE,
-};
-
 static const struct mtk_dpi_conf mt8192_conf = {
 	.cal_factor = mt8183_calculate_factor,
 	.reg_h_fre_con = 0xe0,
@@ -1103,7 +1089,7 @@ static const struct of_device_id mtk_dpi_of_ids[] = {
 	{ .compatible = "mediatek,mt8173-dpi", .data = &mt8173_conf },
 	{ .compatible = "mediatek,mt8183-dpi", .data = &mt8183_conf },
 	{ .compatible = "mediatek,mt8186-dpi", .data = &mt8186_conf },
-	{ .compatible = "mediatek,mt8188-dp-intf", .data = &mt8188_dpintf_conf },
+	{ .compatible = "mediatek,mt8188-dp-intf", .data = &mt8195_dpintf_conf },
 	{ .compatible = "mediatek,mt8192-dpi", .data = &mt8192_conf },
 	{ .compatible = "mediatek,mt8195-dp-intf", .data = &mt8195_dpintf_conf },
 	{ /* sentinel */ },
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
index c277b9fae950..c729af3b9822 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
@@ -721,7 +721,7 @@ static void mtk_drm_crtc_atomic_enable(struct drm_crtc *crtc,
 
 	DRM_DEBUG_DRIVER("%s %d\n", __func__, crtc->base.id);
 
-	ret = pm_runtime_resume_and_get(comp->dev);
+	ret = mtk_ddp_comp_power_on(comp);
 	if (ret < 0) {
 		DRM_DEV_ERROR(comp->dev, "Failed to enable power domain: %d\n", ret);
 		return;
@@ -731,7 +731,7 @@ static void mtk_drm_crtc_atomic_enable(struct drm_crtc *crtc,
 
 	ret = mtk_crtc_ddp_hw_init(mtk_crtc);
 	if (ret) {
-		pm_runtime_put(comp->dev);
+		mtk_ddp_comp_power_off(comp);
 		return;
 	}
 
@@ -744,7 +744,7 @@ static void mtk_drm_crtc_atomic_disable(struct drm_crtc *crtc,
 {
 	struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc);
 	struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[0];
-	int i, ret;
+	int i;
 
 	DRM_DEBUG_DRIVER("%s %d\n", __func__, crtc->base.id);
 	if (!mtk_crtc->enabled)
@@ -774,9 +774,7 @@ static void mtk_drm_crtc_atomic_disable(struct drm_crtc *crtc,
 
 	drm_crtc_vblank_off(crtc);
 	mtk_crtc_ddp_hw_fini(mtk_crtc);
-	ret = pm_runtime_put(comp->dev);
-	if (ret < 0)
-		DRM_DEV_ERROR(comp->dev, "Failed to disable power domain: %d\n", ret);
+	mtk_ddp_comp_power_off(comp);
 
 	mtk_crtc->enabled = false;
 }
@@ -788,6 +786,7 @@ static void mtk_drm_crtc_atomic_begin(struct drm_crtc *crtc,
 									  crtc);
 	struct mtk_crtc_state *mtk_crtc_state = to_mtk_crtc_state(crtc_state);
 	struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc);
+	unsigned long flags;
 
 	if (mtk_crtc->event && mtk_crtc_state->base.event)
 		DRM_ERROR("new event while there is still a pending event\n");
@@ -795,7 +794,11 @@ static void mtk_drm_crtc_atomic_begin(struct drm_crtc *crtc,
 	if (mtk_crtc_state->base.event) {
 		mtk_crtc_state->base.event->pipe = drm_crtc_index(crtc);
 		WARN_ON(drm_crtc_vblank_get(crtc) != 0);
+
+		spin_lock_irqsave(&crtc->dev->event_lock, flags);
 		mtk_crtc->event = mtk_crtc_state->base.event;
+		spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+
 		mtk_crtc_state->base.event = NULL;
 	}
 }
@@ -921,7 +924,14 @@ static int mtk_drm_crtc_init_comp_planes(struct drm_device *drm_dev,
 
 struct device *mtk_drm_crtc_dma_dev_get(struct drm_crtc *crtc)
 {
-	struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc);
+	struct mtk_drm_crtc *mtk_crtc = NULL;
+
+	if (!crtc)
+		return NULL;
+
+	mtk_crtc = to_mtk_crtc(crtc);
+	if (!mtk_crtc)
+		return NULL;
 
 	return mtk_crtc->dma_dev;
 }
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c
index 3046c0409353..a9b5a21cde2d 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c
@@ -398,6 +398,8 @@ static const struct mtk_ddp_comp_funcs ddp_ufoe = {
 };
 
 static const struct mtk_ddp_comp_funcs ddp_ovl_adaptor = {
+	.power_on = mtk_ovl_adaptor_power_on,
+	.power_off = mtk_ovl_adaptor_power_off,
 	.clk_enable = mtk_ovl_adaptor_clk_enable,
 	.clk_disable = mtk_ovl_adaptor_clk_disable,
 	.config = mtk_ovl_adaptor_config,
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h
index 4bae55bdb034..15b2eafff438 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h
+++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h
@@ -7,6 +7,7 @@
 #define MTK_DRM_DDP_COMP_H
 
 #include <linux/io.h>
+#include <linux/pm_runtime.h>
 #include <linux/soc/mediatek/mtk-cmdq.h>
 #include <linux/soc/mediatek/mtk-mmsys.h>
 #include <linux/soc/mediatek/mtk-mutex.h>
@@ -46,6 +47,8 @@ enum mtk_ddp_comp_type {
 struct mtk_ddp_comp;
 struct cmdq_pkt;
 struct mtk_ddp_comp_funcs {
+	int (*power_on)(struct device *dev);
+	void (*power_off)(struct device *dev);
 	int (*clk_enable)(struct device *dev);
 	void (*clk_disable)(struct device *dev);
 	void (*config)(struct device *dev, unsigned int w,
@@ -92,6 +95,23 @@ struct mtk_ddp_comp {
 	const struct mtk_ddp_comp_funcs *funcs;
 };
 
+static inline int mtk_ddp_comp_power_on(struct mtk_ddp_comp *comp)
+{
+	if (comp->funcs && comp->funcs->power_on)
+		return comp->funcs->power_on(comp->dev);
+	else
+		return pm_runtime_resume_and_get(comp->dev);
+	return 0;
+}
+
+static inline void mtk_ddp_comp_power_off(struct mtk_ddp_comp *comp)
+{
+	if (comp->funcs && comp->funcs->power_off)
+		comp->funcs->power_off(comp->dev);
+	else
+		pm_runtime_put(comp->dev);
+}
+
 static inline int mtk_ddp_comp_clk_enable(struct mtk_ddp_comp *comp)
 {
 	if (comp->funcs && comp->funcs->clk_enable)
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
index 2dfaa613276a..14a1e0157cc4 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
@@ -5,7 +5,6 @@
  */
 
 #include <linux/component.h>
-#include <linux/iommu.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_platform.h>
@@ -443,6 +442,7 @@ static int mtk_drm_kms_init(struct drm_device *drm)
 	struct mtk_drm_private *private = drm->dev_private;
 	struct mtk_drm_private *priv_n;
 	struct device *dma_dev = NULL;
+	struct drm_crtc *crtc;
 	int ret, i, j;
 
 	if (drm_firmware_drivers_only())
@@ -519,7 +519,9 @@ static int mtk_drm_kms_init(struct drm_device *drm)
 	}
 
 	/* Use OVL device for all DMA memory allocations */
-	dma_dev = mtk_drm_crtc_dma_dev_get(drm_crtc_from_index(drm, 0));
+	crtc = drm_crtc_from_index(drm, 0);
+	if (crtc)
+		dma_dev = mtk_drm_crtc_dma_dev_get(crtc);
 	if (!dma_dev) {
 		ret = -ENODEV;
 		dev_err(drm->dev, "Need at least one OVL device\n");
@@ -608,9 +610,6 @@ static int mtk_drm_bind(struct device *dev)
 	struct drm_device *drm;
 	int ret, i;
 
-	if (!iommu_present(&platform_bus_type))
-		return -EPROBE_DEFER;
-
 	pdev = of_find_device_by_node(private->mutex_node);
 	if (!pdev) {
 		dev_err(dev, "Waiting for disp-mutex device %pOF\n",
@@ -1000,6 +999,7 @@ static struct platform_driver * const mtk_drm_drivers[] = {
 	&mtk_dsi_driver,
 	&mtk_ethdr_driver,
 	&mtk_mdp_rdma_driver,
+	&mtk_padding_driver,
 };
 
 static int __init mtk_drm_init(void)
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.h b/drivers/gpu/drm/mediatek/mtk_drm_drv.h
index 6f98fff4f1a4..33fadb08dc1c 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_drv.h
+++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.h
@@ -77,5 +77,5 @@ extern struct platform_driver mtk_dpi_driver;
 extern struct platform_driver mtk_dsi_driver;
 extern struct platform_driver mtk_ethdr_driver;
 extern struct platform_driver mtk_mdp_rdma_driver;
-
+extern struct platform_driver mtk_padding_driver;
 #endif /* MTK_DRM_DRV_H */
diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c b/drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c
index d675c954befe..54e46e440e0f 100644
--- a/drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c
+++ b/drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c
@@ -297,7 +297,6 @@ static int mtk_hdmi_ddc_probe(struct platform_device *pdev)
 
 	strscpy(ddc->adap.name, "mediatek-hdmi-ddc", sizeof(ddc->adap.name));
 	ddc->adap.owner = THIS_MODULE;
-	ddc->adap.class = I2C_CLASS_DDC;
 	ddc->adap.algo = &mtk_hdmi_ddc_algorithm;
 	ddc->adap.retries = 3;
 	ddc->adap.dev.of_node = dev->of_node;
diff --git a/drivers/gpu/drm/mediatek/mtk_mdp_rdma.c b/drivers/gpu/drm/mediatek/mtk_mdp_rdma.c
index c3adaeefd551..ee9ce9b6d078 100644
--- a/drivers/gpu/drm/mediatek/mtk_mdp_rdma.c
+++ b/drivers/gpu/drm/mediatek/mtk_mdp_rdma.c
@@ -242,12 +242,27 @@ size_t mtk_mdp_rdma_get_num_formats(struct device *dev)
 	return ARRAY_SIZE(formats);
 }
 
+int mtk_mdp_rdma_power_on(struct device *dev)
+{
+	int ret = pm_runtime_resume_and_get(dev);
+
+	if (ret < 0) {
+		dev_err(dev, "Failed to power on: %d\n", ret);
+		return ret;
+	}
+	return 0;
+}
+
+void mtk_mdp_rdma_power_off(struct device *dev)
+{
+	pm_runtime_put(dev);
+}
+
 int mtk_mdp_rdma_clk_enable(struct device *dev)
 {
 	struct mtk_mdp_rdma *rdma = dev_get_drvdata(dev);
 
-	clk_prepare_enable(rdma->clk);
-	return 0;
+	return clk_prepare_enable(rdma->clk);
 }
 
 void mtk_mdp_rdma_clk_disable(struct device *dev)
diff --git a/drivers/gpu/drm/mediatek/mtk_padding.c b/drivers/gpu/drm/mediatek/mtk_padding.c
new file mode 100644
index 000000000000..0d6451c149b6
--- /dev/null
+++ b/drivers/gpu/drm/mediatek/mtk_padding.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2023 MediaTek Inc.
+ */
+
+#include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/soc/mediatek/mtk-cmdq.h>
+
+#include "mtk_disp_drv.h"
+#include "mtk_drm_crtc.h"
+#include "mtk_drm_ddp_comp.h"
+
+#define PADDING_CONTROL_REG	0x00
+#define PADDING_BYPASS			BIT(0)
+#define PADDING_ENABLE			BIT(1)
+#define PADDING_PIC_SIZE_REG	0x04
+#define PADDING_H_REG		0x08 /* horizontal */
+#define PADDING_V_REG		0x0c /* vertical */
+#define PADDING_COLOR_REG	0x10
+
+/**
+ * struct mtk_padding - Basic information of the Padding
+ * @clk: Clock of the module
+ * @reg: Virtual address of the Padding for CPU to access
+ * @cmdq_reg: CMDQ setting of the Padding
+ *
+ * Every Padding should have different clock source, register base, and
+ * CMDQ settings, we stored these differences all together.
+ */
+struct mtk_padding {
+	struct clk		*clk;
+	void __iomem		*reg;
+	struct cmdq_client_reg	cmdq_reg;
+};
+
+int mtk_padding_clk_enable(struct device *dev)
+{
+	struct mtk_padding *padding = dev_get_drvdata(dev);
+
+	return clk_prepare_enable(padding->clk);
+}
+
+void mtk_padding_clk_disable(struct device *dev)
+{
+	struct mtk_padding *padding = dev_get_drvdata(dev);
+
+	clk_disable_unprepare(padding->clk);
+}
+
+void mtk_padding_start(struct device *dev)
+{
+	struct mtk_padding *padding = dev_get_drvdata(dev);
+
+	writel(PADDING_ENABLE | PADDING_BYPASS,
+	       padding->reg + PADDING_CONTROL_REG);
+
+	/*
+	 * Notice that even the padding is in bypass mode,
+	 * all the settings must be cleared to 0 or
+	 * undefined behaviors could happen
+	 */
+	writel(0, padding->reg + PADDING_PIC_SIZE_REG);
+	writel(0, padding->reg + PADDING_H_REG);
+	writel(0, padding->reg + PADDING_V_REG);
+	writel(0, padding->reg + PADDING_COLOR_REG);
+}
+
+void mtk_padding_stop(struct device *dev)
+{
+	struct mtk_padding *padding = dev_get_drvdata(dev);
+
+	writel(0, padding->reg + PADDING_CONTROL_REG);
+}
+
+static int mtk_padding_bind(struct device *dev, struct device *master, void *data)
+{
+	return 0;
+}
+
+static void mtk_padding_unbind(struct device *dev, struct device *master, void *data)
+{
+}
+
+static const struct component_ops mtk_padding_component_ops = {
+	.bind	= mtk_padding_bind,
+	.unbind = mtk_padding_unbind,
+};
+
+static int mtk_padding_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct mtk_padding *priv;
+	struct resource *res;
+	int ret;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(priv->clk)) {
+		dev_err(dev, "failed to get clk\n");
+		return PTR_ERR(priv->clk);
+	}
+
+	priv->reg = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
+	if (IS_ERR(priv->reg)) {
+		dev_err(dev, "failed to do ioremap\n");
+		return PTR_ERR(priv->reg);
+	}
+
+#if IS_REACHABLE(CONFIG_MTK_CMDQ)
+	ret = cmdq_dev_get_client_reg(dev, &priv->cmdq_reg, 0);
+	if (ret) {
+		dev_err(dev, "failed to get gce client reg\n");
+		return ret;
+	}
+#endif
+
+	platform_set_drvdata(pdev, priv);
+
+	ret = devm_pm_runtime_enable(dev);
+	if (ret)
+		return ret;
+
+	ret = component_add(dev, &mtk_padding_component_ops);
+	if (ret) {
+		pm_runtime_disable(dev);
+		return dev_err_probe(dev, ret, "failed to add component\n");
+	}
+
+	return 0;
+}
+
+static int mtk_padding_remove(struct platform_device *pdev)
+{
+	component_del(&pdev->dev, &mtk_padding_component_ops);
+	return 0;
+}
+
+static const struct of_device_id mtk_padding_driver_dt_match[] = {
+	{ .compatible = "mediatek,mt8188-disp-padding" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, mtk_padding_driver_dt_match);
+
+struct platform_driver mtk_padding_driver = {
+	.probe		= mtk_padding_probe,
+	.remove		= mtk_padding_remove,
+	.driver		= {
+		.name	= "mediatek-disp-padding",
+		.owner	= THIS_MODULE,
+		.of_match_table = mtk_padding_driver_dt_match,
+	},
+};
diff --git a/drivers/gpu/drm/meson/meson_encoder_cvbs.c b/drivers/gpu/drm/meson/meson_encoder_cvbs.c
index 3f73b211fa8e..3407450435e2 100644
--- a/drivers/gpu/drm/meson/meson_encoder_cvbs.c
+++ b/drivers/gpu/drm/meson/meson_encoder_cvbs.c
@@ -294,6 +294,5 @@ void meson_encoder_cvbs_remove(struct meson_drm *priv)
 	if (priv->encoders[MESON_ENC_CVBS]) {
 		meson_encoder_cvbs = priv->encoders[MESON_ENC_CVBS];
 		drm_bridge_remove(&meson_encoder_cvbs->bridge);
-		drm_bridge_remove(meson_encoder_cvbs->next_bridge);
 	}
 }
diff --git a/drivers/gpu/drm/meson/meson_encoder_dsi.c b/drivers/gpu/drm/meson/meson_encoder_dsi.c
index 3f93c70488ca..311b91630fbe 100644
--- a/drivers/gpu/drm/meson/meson_encoder_dsi.c
+++ b/drivers/gpu/drm/meson/meson_encoder_dsi.c
@@ -168,6 +168,5 @@ void meson_encoder_dsi_remove(struct meson_drm *priv)
 	if (priv->encoders[MESON_ENC_DSI]) {
 		meson_encoder_dsi = priv->encoders[MESON_ENC_DSI];
 		drm_bridge_remove(&meson_encoder_dsi->bridge);
-		drm_bridge_remove(meson_encoder_dsi->next_bridge);
 	}
 }
diff --git a/drivers/gpu/drm/meson/meson_encoder_hdmi.c b/drivers/gpu/drm/meson/meson_encoder_hdmi.c
index 25ea76558690..c4686568c9ca 100644
--- a/drivers/gpu/drm/meson/meson_encoder_hdmi.c
+++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c
@@ -474,6 +474,5 @@ void meson_encoder_hdmi_remove(struct meson_drm *priv)
 	if (priv->encoders[MESON_ENC_HDMI]) {
 		meson_encoder_hdmi = priv->encoders[MESON_ENC_HDMI];
 		drm_bridge_remove(&meson_encoder_hdmi->bridge);
-		drm_bridge_remove(meson_encoder_hdmi->next_bridge);
 	}
 }
diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.h b/drivers/gpu/drm/mgag200/mgag200_drv.h
index 57c7edcab602..765e49fd8911 100644
--- a/drivers/gpu/drm/mgag200/mgag200_drv.h
+++ b/drivers/gpu/drm/mgag200/mgag200_drv.h
@@ -392,6 +392,11 @@ void mgag200_primary_plane_helper_atomic_disable(struct drm_plane *plane,
 	.destroy = drm_plane_cleanup, \
 	DRM_GEM_SHADOW_PLANE_FUNCS
 
+void mgag200_crtc_set_gamma_linear(struct mga_device *mdev, const struct drm_format_info *format);
+void mgag200_crtc_set_gamma(struct mga_device *mdev,
+			    const struct drm_format_info *format,
+			    struct drm_color_lut *lut);
+
 enum drm_mode_status mgag200_crtc_helper_mode_valid(struct drm_crtc *crtc,
 						    const struct drm_display_mode *mode);
 int mgag200_crtc_helper_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *new_state);
diff --git a/drivers/gpu/drm/mgag200/mgag200_g200er.c b/drivers/gpu/drm/mgag200/mgag200_g200er.c
index bce267e0f7de..8d4538b71047 100644
--- a/drivers/gpu/drm/mgag200/mgag200_g200er.c
+++ b/drivers/gpu/drm/mgag200/mgag200_g200er.c
@@ -202,6 +202,11 @@ static void mgag200_g200er_crtc_helper_atomic_enable(struct drm_crtc *crtc,
 
 	mgag200_g200er_reset_tagfifo(mdev);
 
+	if (crtc_state->gamma_lut)
+		mgag200_crtc_set_gamma(mdev, format, crtc_state->gamma_lut->data);
+	else
+		mgag200_crtc_set_gamma_linear(mdev, format);
+
 	mgag200_enable_display(mdev);
 
 	if (funcs->enable_vidrst)
diff --git a/drivers/gpu/drm/mgag200/mgag200_g200ev.c b/drivers/gpu/drm/mgag200/mgag200_g200ev.c
index ac957f42abe1..56e6f986bff3 100644
--- a/drivers/gpu/drm/mgag200/mgag200_g200ev.c
+++ b/drivers/gpu/drm/mgag200/mgag200_g200ev.c
@@ -203,6 +203,11 @@ static void mgag200_g200ev_crtc_helper_atomic_enable(struct drm_crtc *crtc,
 
 	mgag200_g200ev_set_hiprilvl(mdev);
 
+	if (crtc_state->gamma_lut)
+		mgag200_crtc_set_gamma(mdev, format, crtc_state->gamma_lut->data);
+	else
+		mgag200_crtc_set_gamma_linear(mdev, format);
+
 	mgag200_enable_display(mdev);
 
 	if (funcs->enable_vidrst)
diff --git a/drivers/gpu/drm/mgag200/mgag200_g200se.c b/drivers/gpu/drm/mgag200/mgag200_g200se.c
index bd6e573c9a1a..ff2b3c6622e7 100644
--- a/drivers/gpu/drm/mgag200/mgag200_g200se.c
+++ b/drivers/gpu/drm/mgag200/mgag200_g200se.c
@@ -334,6 +334,11 @@ static void mgag200_g200se_crtc_helper_atomic_enable(struct drm_crtc *crtc,
 
 	mgag200_g200se_set_hiprilvl(mdev, adjusted_mode, format);
 
+	if (crtc_state->gamma_lut)
+		mgag200_crtc_set_gamma(mdev, format, crtc_state->gamma_lut->data);
+	else
+		mgag200_crtc_set_gamma_linear(mdev, format);
+
 	mgag200_enable_display(mdev);
 
 	if (funcs->enable_vidrst)
diff --git a/drivers/gpu/drm/mgag200/mgag200_i2c.c b/drivers/gpu/drm/mgag200/mgag200_i2c.c
index 0c48bdf3e7f8..423eb302be7e 100644
--- a/drivers/gpu/drm/mgag200/mgag200_i2c.c
+++ b/drivers/gpu/drm/mgag200/mgag200_i2c.c
@@ -106,7 +106,6 @@ int mgag200_i2c_init(struct mga_device *mdev, struct mga_i2c_chan *i2c)
 	i2c->data = BIT(info->i2c.data_bit);
 	i2c->clock = BIT(info->i2c.clock_bit);
 	i2c->adapter.owner = THIS_MODULE;
-	i2c->adapter.class = I2C_CLASS_DDC;
 	i2c->adapter.dev.parent = dev->dev;
 	i2c->dev = dev;
 	i2c_set_adapdata(&i2c->adapter, i2c);
diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c
index af3ce5a6a636..0f0d59938c3a 100644
--- a/drivers/gpu/drm/mgag200/mgag200_mode.c
+++ b/drivers/gpu/drm/mgag200/mgag200_mode.c
@@ -28,8 +28,8 @@
  * This file contains setup code for the CRTC.
  */
 
-static void mgag200_crtc_set_gamma_linear(struct mga_device *mdev,
-					  const struct drm_format_info *format)
+void mgag200_crtc_set_gamma_linear(struct mga_device *mdev,
+				   const struct drm_format_info *format)
 {
 	int i;
 
@@ -65,9 +65,9 @@ static void mgag200_crtc_set_gamma_linear(struct mga_device *mdev,
 	}
 }
 
-static void mgag200_crtc_set_gamma(struct mga_device *mdev,
-				   const struct drm_format_info *format,
-				   struct drm_color_lut *lut)
+void mgag200_crtc_set_gamma(struct mga_device *mdev,
+			    const struct drm_format_info *format,
+			    struct drm_color_lut *lut)
 {
 	int i;
 
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_i2c.c b/drivers/gpu/drm/msm/hdmi/hdmi_i2c.c
index de182c004843..7aa500d24240 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi_i2c.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_i2c.c
@@ -249,7 +249,6 @@ struct i2c_adapter *msm_hdmi_i2c_init(struct hdmi *hdmi)
 
 
 	i2c->owner = THIS_MODULE;
-	i2c->class = I2C_CLASS_DDC;
 	snprintf(i2c->name, sizeof(i2c->name), "msm hdmi i2c");
 	i2c->dev.parent = &hdmi->pdev->dev;
 	i2c->algo = &msm_hdmi_i2c_algorithm;
diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig
index 1e6aaf95ff7c..ceef470c9fbf 100644
--- a/drivers/gpu/drm/nouveau/Kconfig
+++ b/drivers/gpu/drm/nouveau/Kconfig
@@ -100,3 +100,11 @@ config DRM_NOUVEAU_SVM
 	help
 	  Say Y here if you want to enable experimental support for
 	  Shared Virtual Memory (SVM).
+
+config DRM_NOUVEAU_GSP_DEFAULT
+	bool "Use GSP firmware for Turing/Ampere (needs firmware installed)"
+	depends on DRM_NOUVEAU
+	default n
+	help
+	  Say Y here if you want to use the GSP codepaths by default on
+	  Turing and Ampere GPUs.
diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c
index 11fe75b68e95..8d37a694b772 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
@@ -2476,7 +2476,7 @@ nv50_disp_atomic_commit(struct drm_device *dev,
 
 err_cleanup:
 	if (ret)
-		drm_atomic_helper_cleanup_planes(dev, state);
+		drm_atomic_helper_unprepare_planes(dev, state);
 done:
 	pm_runtime_put_autosuspend(dev->dev);
 	return ret;
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
index 2fa0445d8928..6f5d376d8fcc 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
@@ -9,7 +9,7 @@
 #define GSP_PAGE_SIZE  BIT(GSP_PAGE_SHIFT)
 
 struct nvkm_gsp_mem {
-	u32 size;
+	size_t size;
 	void *data;
 	dma_addr_t addr;
 };
@@ -187,7 +187,7 @@ struct nvkm_gsp {
 		void (*rpc_done)(struct nvkm_gsp *gsp, void *repv);
 
 		void *(*rm_ctrl_get)(struct nvkm_gsp_object *, u32 cmd, u32 argc);
-		void *(*rm_ctrl_push)(struct nvkm_gsp_object *, void *argv, u32 repc);
+		int (*rm_ctrl_push)(struct nvkm_gsp_object *, void **argv, u32 repc);
 		void (*rm_ctrl_done)(struct nvkm_gsp_object *, void *repv);
 
 		void *(*rm_alloc_get)(struct nvkm_gsp_object *, u32 oclass, u32 argc);
@@ -265,7 +265,7 @@ nvkm_gsp_rm_ctrl_get(struct nvkm_gsp_object *object, u32 cmd, u32 argc)
 	return object->client->gsp->rm->rm_ctrl_get(object, cmd, argc);
 }
 
-static inline void *
+static inline int
 nvkm_gsp_rm_ctrl_push(struct nvkm_gsp_object *object, void *argv, u32 repc)
 {
 	return object->client->gsp->rm->rm_ctrl_push(object, argv, repc);
@@ -275,21 +275,24 @@ static inline void *
 nvkm_gsp_rm_ctrl_rd(struct nvkm_gsp_object *object, u32 cmd, u32 repc)
 {
 	void *argv = nvkm_gsp_rm_ctrl_get(object, cmd, repc);
+	int ret;
 
 	if (IS_ERR(argv))
 		return argv;
 
-	return nvkm_gsp_rm_ctrl_push(object, argv, repc);
+	ret = nvkm_gsp_rm_ctrl_push(object, &argv, repc);
+	if (ret)
+		return ERR_PTR(ret);
+	return argv;
 }
 
 static inline int
 nvkm_gsp_rm_ctrl_wr(struct nvkm_gsp_object *object, void *argv)
 {
-	void *repv = nvkm_gsp_rm_ctrl_push(object, argv, 0);
-
-	if (IS_ERR(repv))
-		return PTR_ERR(repv);
+	int ret = nvkm_gsp_rm_ctrl_push(object, &argv, 0);
 
+	if (ret)
+		return ret;
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/common/shared/msgq/inc/msgq/msgq_priv.h b/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/common/shared/msgq/inc/msgq/msgq_priv.h
index 5a2f273d95c8..0e32e71e123f 100644
--- a/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/common/shared/msgq/inc/msgq/msgq_priv.h
+++ b/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/common/shared/msgq/inc/msgq/msgq_priv.h
@@ -26,6 +26,49 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
+/**
+ * msgqTxHeader -- TX queue data structure
+ * @version: the version of this structure, must be 0
+ * @size: the size of the entire queue, including this header
+ * @msgSize: the padded size of queue element, 16 is minimum
+ * @msgCount: the number of elements in this queue
+ * @writePtr: head index of this queue
+ * @flags: 1 = swap the RX pointers
+ * @rxHdrOff: offset of readPtr in this structure
+ * @entryOff: offset of beginning of queue (msgqRxHeader), relative to
+ *          beginning of this structure
+ *
+ * The command queue is a queue of RPCs that are sent from the driver to the
+ * GSP.  The status queue is a queue of messages/responses from GSP-RM to the
+ * driver.  Although the driver allocates memory for both queues, the command
+ * queue is owned by the driver and the status queue is owned by GSP-RM.  In
+ * addition, the headers of the two queues must not share the same 4K page.
+ *
+ * Each queue is prefixed with this data structure.  The idea is that a queue
+ * and its header are written to only by their owner.  That is, only the
+ * driver writes to the command queue and command queue header, and only the
+ * GSP writes to the status (receive) queue and its header.
+ *
+ * This is enforced by the concept of "swapping" the RX pointers.  This is
+ * why the 'flags' field must be set to 1.  'rxHdrOff' is how the GSP knows
+ * where the where the tail pointer of its status queue.
+ *
+ * When the driver writes a new RPC to the command queue, it updates writePtr.
+ * When it reads a new message from the status queue, it updates readPtr.  In
+ * this way, the GSP knows when a new command is in the queue (it polls
+ * writePtr) and it knows how much free space is in the status queue (it
+ * checks readPtr).  The driver never cares about how much free space is in
+ * the status queue.
+ *
+ * As usual, producers write to the head pointer, and consumers read from the
+ * tail pointer.  When head == tail, the queue is empty.
+ *
+ * So to summarize:
+ * command.writePtr = head of command queue
+ * command.readPtr = tail of status queue
+ * status.writePtr = head of status queue
+ * status.readPtr = tail of command queue
+ */
 typedef struct
 {
     NvU32 version;   // queue version
@@ -38,6 +81,14 @@ typedef struct
     NvU32 entryOff;  // Offset of entries from start of backing store.
 } msgqTxHeader;
 
+/**
+ * msgqRxHeader - RX queue data structure
+ * @readPtr: tail index of the other queue
+ *
+ * Although this is a separate struct, it could easily be merged into
+ * msgqTxHeader.  msgqTxHeader.rxHdrOff is simply the offset of readPtr
+ * from the beginning of msgqTxHeader.
+ */
 typedef struct
 {
     NvU32 readPtr; // message id of last message read
diff --git a/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/nvidia/generated/g_os_nvoc.h b/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/nvidia/generated/g_os_nvoc.h
index 754c6af42f30..10121218f4d3 100644
--- a/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/nvidia/generated/g_os_nvoc.h
+++ b/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/nvidia/generated/g_os_nvoc.h
@@ -38,7 +38,7 @@ typedef struct PACKED_REGISTRY_TABLE
 {
     NvU32                   size;
     NvU32                   numEntries;
-    PACKED_REGISTRY_ENTRY   entries[0];
+    PACKED_REGISTRY_ENTRY   entries[] __counted_by(numEntries);
 } PACKED_REGISTRY_TABLE;
 
 #endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c
index a04156ca8390..cd14f993bdd1 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
@@ -128,12 +128,14 @@ nouveau_abi16_chan_fini(struct nouveau_abi16 *abi16,
 	struct nouveau_abi16_ntfy *ntfy, *temp;
 
 	/* Cancel all jobs from the entity's queue. */
-	drm_sched_entity_fini(&chan->sched.entity);
+	if (chan->sched)
+		drm_sched_entity_fini(&chan->sched->entity);
 
 	if (chan->chan)
 		nouveau_channel_idle(chan->chan);
 
-	nouveau_sched_fini(&chan->sched);
+	if (chan->sched)
+		nouveau_sched_destroy(&chan->sched);
 
 	/* cleanup notifier state */
 	list_for_each_entry_safe(ntfy, temp, &chan->notifiers, head) {
@@ -197,6 +199,7 @@ nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS)
 	struct nouveau_cli *cli = nouveau_cli(file_priv);
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct nvif_device *device = &drm->client.device;
+	struct nvkm_device *nvkm_device = nvxx_device(&drm->client.device);
 	struct nvkm_gr *gr = nvxx_gr(device);
 	struct drm_nouveau_getparam *getparam = data;
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
@@ -261,6 +264,14 @@ nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS)
 		getparam->value = nouveau_exec_push_max_from_ib_max(ib_max);
 		break;
 	}
+	case NOUVEAU_GETPARAM_VRAM_BAR_SIZE:
+		getparam->value = nvkm_device->func->resource_size(nvkm_device, 1);
+		break;
+	case NOUVEAU_GETPARAM_VRAM_USED: {
+		struct ttm_resource_manager *vram_mgr = ttm_manager_type(&drm->ttm.bdev, TTM_PL_VRAM);
+		getparam->value = (u64)ttm_resource_manager_usage(vram_mgr) << PAGE_SHIFT;
+		break;
+	}
 	default:
 		NV_PRINTK(dbg, cli, "unknown parameter %lld\n", getparam->param);
 		return -EINVAL;
@@ -337,10 +348,16 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS)
 	if (ret)
 		goto done;
 
-	ret = nouveau_sched_init(&chan->sched, drm, drm->sched_wq,
-				 chan->chan->dma.ib_max);
-	if (ret)
-		goto done;
+	/* If we're not using the VM_BIND uAPI, we don't need a scheduler.
+	 *
+	 * The client lock is already acquired by nouveau_abi16_get().
+	 */
+	if (nouveau_cli_uvmm(cli)) {
+		ret = nouveau_sched_create(&chan->sched, drm, drm->sched_wq,
+					   chan->chan->dma.ib_max);
+		if (ret)
+			goto done;
+	}
 
 	init->channel = chan->chan->chid;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.h b/drivers/gpu/drm/nouveau/nouveau_abi16.h
index 1f5e243c0c75..11c8c4a80079 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.h
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.h
@@ -26,7 +26,7 @@ struct nouveau_abi16_chan {
 	struct nouveau_bo *ntfy;
 	struct nouveau_vma *ntfy_vma;
 	struct nvkm_mm  heap;
-	struct nouveau_sched sched;
+	struct nouveau_sched *sched;
 };
 
 struct nouveau_abi16 {
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index b7dda486a7ea..00cc7d1abaa3 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -325,8 +325,9 @@ nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int *align, u32 domain,
 			    (!vmm->page[i].host || vmm->page[i].shift > PAGE_SHIFT))
 				continue;
 
-			if (pi < 0)
-				pi = i;
+			/* pick the last one as it will be smallest. */
+			pi = i;
+
 			/* Stop once the buffer is larger than the current page size. */
 			if (*size >= 1ULL << vmm->page[i].shift)
 				break;
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 6f6c31a9937b..a947e1d5f309 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -201,7 +201,8 @@ nouveau_cli_fini(struct nouveau_cli *cli)
 	WARN_ON(!list_empty(&cli->worker));
 
 	usif_client_fini(cli);
-	nouveau_sched_fini(&cli->sched);
+	if (cli->sched)
+		nouveau_sched_destroy(&cli->sched);
 	if (uvmm)
 		nouveau_uvmm_fini(uvmm);
 	nouveau_vmm_fini(&cli->svm);
@@ -311,7 +312,7 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname,
 	cli->mem = &mems[ret];
 
 	/* Don't pass in the (shared) sched_wq in order to let
-	 * nouveau_sched_init() create a dedicated one for VM_BIND jobs.
+	 * nouveau_sched_create() create a dedicated one for VM_BIND jobs.
 	 *
 	 * This is required to ensure that for VM_BIND jobs free_job() work and
 	 * run_job() work can always run concurrently and hence, free_job() work
@@ -320,7 +321,7 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname,
 	 * locks which indirectly or directly are held for allocations
 	 * elsewhere.
 	 */
-	ret = nouveau_sched_init(&cli->sched, drm, NULL, 1);
+	ret = nouveau_sched_create(&cli->sched, drm, NULL, 1);
 	if (ret)
 		goto done;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 8a6d94c8b163..e239c6bf4afa 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -98,7 +98,7 @@ struct nouveau_cli {
 		bool disabled;
 	} uvmm;
 
-	struct nouveau_sched sched;
+	struct nouveau_sched *sched;
 
 	const struct nvif_mclass *mem;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.c b/drivers/gpu/drm/nouveau/nouveau_exec.c
index bc5d71b79ab2..e65c0ef23bc7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_exec.c
+++ b/drivers/gpu/drm/nouveau/nouveau_exec.c
@@ -389,7 +389,7 @@ nouveau_exec_ioctl_exec(struct drm_device *dev,
 	if (ret)
 		goto out;
 
-	args.sched = &chan16->sched;
+	args.sched = chan16->sched;
 	args.file_priv = file_priv;
 	args.chan = chan;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index ca762ea55413..93f08f9479d8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -103,6 +103,7 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error)
 void
 nouveau_fence_context_del(struct nouveau_fence_chan *fctx)
 {
+	cancel_work_sync(&fctx->uevent_work);
 	nouveau_fence_context_kill(fctx, 0);
 	nvif_event_dtor(&fctx->event);
 	fctx->dead = 1;
@@ -145,12 +146,13 @@ nouveau_fence_update(struct nouveau_channel *chan, struct nouveau_fence_chan *fc
 	return drop;
 }
 
-static int
-nouveau_fence_wait_uevent_handler(struct nvif_event *event, void *repv, u32 repc)
+static void
+nouveau_fence_uevent_work(struct work_struct *work)
 {
-	struct nouveau_fence_chan *fctx = container_of(event, typeof(*fctx), event);
+	struct nouveau_fence_chan *fctx = container_of(work, struct nouveau_fence_chan,
+						       uevent_work);
 	unsigned long flags;
-	int ret = NVIF_EVENT_KEEP;
+	int drop = 0;
 
 	spin_lock_irqsave(&fctx->lock, flags);
 	if (!list_empty(&fctx->pending)) {
@@ -160,11 +162,20 @@ nouveau_fence_wait_uevent_handler(struct nvif_event *event, void *repv, u32 repc
 		fence = list_entry(fctx->pending.next, typeof(*fence), head);
 		chan = rcu_dereference_protected(fence->channel, lockdep_is_held(&fctx->lock));
 		if (nouveau_fence_update(chan, fctx))
-			ret = NVIF_EVENT_DROP;
+			drop = 1;
 	}
+	if (drop)
+		nvif_event_block(&fctx->event);
+
 	spin_unlock_irqrestore(&fctx->lock, flags);
+}
 
-	return ret;
+static int
+nouveau_fence_wait_uevent_handler(struct nvif_event *event, void *repv, u32 repc)
+{
+	struct nouveau_fence_chan *fctx = container_of(event, typeof(*fctx), event);
+	schedule_work(&fctx->uevent_work);
+	return NVIF_EVENT_KEEP;
 }
 
 void
@@ -178,6 +189,7 @@ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_cha
 	} args;
 	int ret;
 
+	INIT_WORK(&fctx->uevent_work, nouveau_fence_uevent_work);
 	INIT_LIST_HEAD(&fctx->flip);
 	INIT_LIST_HEAD(&fctx->pending);
 	spin_lock_init(&fctx->lock);
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h
index 64d33ae7f356..8bc065acfe35 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
@@ -44,6 +44,7 @@ struct nouveau_fence_chan {
 	u32 context;
 	char name[32];
 
+	struct work_struct uevent_work;
 	struct nvif_event event;
 	int notify_ref, dead, killed;
 };
diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c
index dd98f6910f9c..32fa2e273965 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sched.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
@@ -398,7 +398,7 @@ static const struct drm_sched_backend_ops nouveau_sched_ops = {
 	.free_job = nouveau_sched_free_job,
 };
 
-int
+static int
 nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm,
 		   struct workqueue_struct *wq, u32 credit_limit)
 {
@@ -453,7 +453,30 @@ fail_wq:
 	return ret;
 }
 
-void
+int
+nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm,
+		     struct workqueue_struct *wq, u32 credit_limit)
+{
+	struct nouveau_sched *sched;
+	int ret;
+
+	sched = kzalloc(sizeof(*sched), GFP_KERNEL);
+	if (!sched)
+		return -ENOMEM;
+
+	ret = nouveau_sched_init(sched, drm, wq, credit_limit);
+	if (ret) {
+		kfree(sched);
+		return ret;
+	}
+
+	*psched = sched;
+
+	return 0;
+}
+
+
+static void
 nouveau_sched_fini(struct nouveau_sched *sched)
 {
 	struct drm_gpu_scheduler *drm_sched = &sched->base;
@@ -471,3 +494,14 @@ nouveau_sched_fini(struct nouveau_sched *sched)
 	if (sched->wq)
 		destroy_workqueue(sched->wq);
 }
+
+void
+nouveau_sched_destroy(struct nouveau_sched **psched)
+{
+	struct nouveau_sched *sched = *psched;
+
+	nouveau_sched_fini(sched);
+	kfree(sched);
+
+	*psched = NULL;
+}
diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.h b/drivers/gpu/drm/nouveau/nouveau_sched.h
index a6528f5981e6..e1f01a23e6f6 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sched.h
+++ b/drivers/gpu/drm/nouveau/nouveau_sched.h
@@ -111,8 +111,8 @@ struct nouveau_sched {
 	} job;
 };
 
-int nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm,
-		       struct workqueue_struct *wq, u32 credit_limit);
-void nouveau_sched_fini(struct nouveau_sched *sched);
+int nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm,
+			 struct workqueue_struct *wq, u32 credit_limit);
+void nouveau_sched_destroy(struct nouveau_sched **psched);
 
 #endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c
index cc03e0c22ff3..5e4565c5011a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_svm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
@@ -1011,7 +1011,7 @@ nouveau_svm_fault_buffer_ctor(struct nouveau_svm *svm, s32 oclass, int id)
 	if (ret)
 		return ret;
 
-	buffer->fault = kvcalloc(sizeof(*buffer->fault), buffer->entries, GFP_KERNEL);
+	buffer->fault = kvcalloc(buffer->entries, sizeof(*buffer->fault), GFP_KERNEL);
 	if (!buffer->fault)
 		return -ENOMEM;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
index 4f223c972c6a..0a0a11dc9ec0 100644
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -1740,7 +1740,7 @@ nouveau_uvmm_ioctl_vm_bind(struct drm_device *dev,
 	if (ret)
 		return ret;
 
-	args.sched = &cli->sched;
+	args.sched = cli->sched;
 	args.file_priv = file_priv;
 
 	ret = nouveau_uvmm_vm_bind(&args);
diff --git a/drivers/gpu/drm/nouveau/nouveau_vmm.c b/drivers/gpu/drm/nouveau/nouveau_vmm.c
index a6602c012671..3dda885df5b2 100644
--- a/drivers/gpu/drm/nouveau/nouveau_vmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_vmm.c
@@ -108,6 +108,9 @@ nouveau_vma_new(struct nouveau_bo *nvbo, struct nouveau_vmm *vmm,
 	} else {
 		ret = nvif_vmm_get(&vmm->vmm, PTES, false, mem->mem.page, 0,
 				   mem->mem.size, &tmp);
+		if (ret)
+			goto done;
+
 		vma->addr = tmp.addr;
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c
index 457ec5db794d..b24eb1e560bc 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c
@@ -209,7 +209,7 @@ nvkm_disp_dtor(struct nvkm_engine *engine)
 		nvkm_head_del(&head);
 	}
 
-	if (disp->func->dtor)
+	if (disp->func && disp->func->dtor)
 		disp->func->dtor(disp);
 
 	return data;
@@ -243,8 +243,10 @@ nvkm_disp_new_(const struct nvkm_disp_func *func, struct nvkm_device *device,
 	spin_lock_init(&disp->client.lock);
 
 	ret = nvkm_engine_ctor(&nvkm_disp, device, type, inst, true, &disp->engine);
-	if (ret)
+	if (ret) {
+		disp->func = NULL;
 		return ret;
+	}
 
 	if (func->super) {
 		disp->super.wq = create_singlethread_workqueue("nvkm-disp");
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c
index 298035070b3a..6a0a4d3b8902 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c
@@ -282,7 +282,7 @@ r535_sor_bl_get(struct nvkm_ior *sor)
 {
 	struct nvkm_disp *disp = sor->disp;
 	NV0073_CTRL_SPECIFIC_BACKLIGHT_BRIGHTNESS_PARAMS *ctrl;
-	int lvl;
+	int ret, lvl;
 
 	ctrl = nvkm_gsp_rm_ctrl_get(&disp->rm.objcom,
 				    NV0073_CTRL_CMD_SPECIFIC_GET_BACKLIGHT_BRIGHTNESS,
@@ -292,9 +292,11 @@ r535_sor_bl_get(struct nvkm_ior *sor)
 
 	ctrl->displayId = BIT(sor->asy.outp->index);
 
-	ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl));
-	if (IS_ERR(ctrl))
-		return PTR_ERR(ctrl);
+	ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl));
+	if (ret) {
+		nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl);
+		return ret;
+	}
 
 	lvl = ctrl->brightness;
 	nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl);
@@ -649,9 +651,11 @@ r535_conn_new(struct nvkm_disp *disp, u32 id)
 	ctrl->subDeviceInstance = 0;
 	ctrl->displayId = BIT(id);
 
-	ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl));
-	if (IS_ERR(ctrl))
-		return (void *)ctrl;
+	ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl));
+	if (ret) {
+		nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl);
+		return ERR_PTR(ret);
+	}
 
 	list_for_each_entry(conn, &disp->conns, head) {
 		if (conn->index == ctrl->data[0].index) {
@@ -686,7 +690,7 @@ r535_outp_acquire(struct nvkm_outp *outp, bool hda)
 	struct nvkm_disp *disp = outp->disp;
 	struct nvkm_ior *ior;
 	NV0073_CTRL_DFP_ASSIGN_SOR_PARAMS *ctrl;
-	int or;
+	int ret, or;
 
 	ctrl = nvkm_gsp_rm_ctrl_get(&disp->rm.objcom,
 				    NV0073_CTRL_CMD_DFP_ASSIGN_SOR, sizeof(*ctrl));
@@ -699,9 +703,11 @@ r535_outp_acquire(struct nvkm_outp *outp, bool hda)
 	if (hda)
 		ctrl->flags |= NVDEF(NV0073_CTRL, DFP_ASSIGN_SOR_FLAGS, AUDIO, OPTIMAL);
 
-	ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl));
-	if (IS_ERR(ctrl))
-		return PTR_ERR(ctrl);
+	ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl));
+	if (ret) {
+		nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl);
+		return ret;
+	}
 
 	for (or = 0; or < ARRAY_SIZE(ctrl->sorAssignListWithTag); or++) {
 		if (ctrl->sorAssignListWithTag[or].displayMask & BIT(outp->index)) {
@@ -727,6 +733,7 @@ static int
 r535_disp_head_displayid(struct nvkm_disp *disp, int head, u32 *displayid)
 {
 	NV0073_CTRL_SYSTEM_GET_ACTIVE_PARAMS *ctrl;
+	int ret;
 
 	ctrl = nvkm_gsp_rm_ctrl_get(&disp->rm.objcom,
 				    NV0073_CTRL_CMD_SYSTEM_GET_ACTIVE, sizeof(*ctrl));
@@ -736,9 +743,11 @@ r535_disp_head_displayid(struct nvkm_disp *disp, int head, u32 *displayid)
 	ctrl->subDeviceInstance = 0;
 	ctrl->head = head;
 
-	ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl));
-	if (IS_ERR(ctrl))
-		return PTR_ERR(ctrl);
+	ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl));
+	if (ret) {
+		nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl);
+		return ret;
+	}
 
 	*displayid = ctrl->displayId;
 	nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl);
@@ -772,9 +781,11 @@ r535_outp_inherit(struct nvkm_outp *outp)
 			ctrl->subDeviceInstance = 0;
 			ctrl->displayId = displayid;
 
-			ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl));
-			if (IS_ERR(ctrl))
+			ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl));
+			if (ret) {
+				nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl);
 				return NULL;
+			}
 
 			id = ctrl->index;
 			proto = ctrl->protocol;
@@ -825,6 +836,7 @@ r535_outp_dfp_get_info(struct nvkm_outp *outp)
 {
 	NV0073_CTRL_DFP_GET_INFO_PARAMS *ctrl;
 	struct nvkm_disp *disp = outp->disp;
+	int ret;
 
 	ctrl = nvkm_gsp_rm_ctrl_get(&disp->rm.objcom, NV0073_CTRL_CMD_DFP_GET_INFO, sizeof(*ctrl));
 	if (IS_ERR(ctrl))
@@ -832,9 +844,11 @@ r535_outp_dfp_get_info(struct nvkm_outp *outp)
 
 	ctrl->displayId = BIT(outp->index);
 
-	ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl));
-	if (IS_ERR(ctrl))
-		return PTR_ERR(ctrl);
+	ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl));
+	if (ret) {
+		nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl);
+		return ret;
+	}
 
 	nvkm_debug(&disp->engine.subdev, "DFP %08x: flags:%08x flags2:%08x\n",
 		   ctrl->displayId, ctrl->flags, ctrl->flags2);
@@ -858,9 +872,11 @@ r535_outp_detect(struct nvkm_outp *outp)
 	ctrl->subDeviceInstance = 0;
 	ctrl->displayMask = BIT(outp->index);
 
-	ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl));
-	if (IS_ERR(ctrl))
-		return PTR_ERR(ctrl);
+	ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl));
+	if (ret) {
+		nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl);
+		return ret;
+	}
 
 	if (ctrl->displayMask & BIT(outp->index)) {
 		ret = r535_outp_dfp_get_info(outp);
@@ -895,6 +911,7 @@ r535_dp_mst_id_get(struct nvkm_outp *outp, u32 *pid)
 {
 	NV0073_CTRL_CMD_DP_TOPOLOGY_ALLOCATE_DISPLAYID_PARAMS *ctrl;
 	struct nvkm_disp *disp = outp->disp;
+	int ret;
 
 	ctrl = nvkm_gsp_rm_ctrl_get(&disp->rm.objcom,
 				    NV0073_CTRL_CMD_DP_TOPOLOGY_ALLOCATE_DISPLAYID,
@@ -904,9 +921,11 @@ r535_dp_mst_id_get(struct nvkm_outp *outp, u32 *pid)
 
 	ctrl->subDeviceInstance = 0;
 	ctrl->displayId = BIT(outp->index);
-	ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl));
-	if (IS_ERR(ctrl))
-		return PTR_ERR(ctrl);
+	ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl));
+	if (ret) {
+		nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl);
+		return ret;
+	}
 
 	*pid = ctrl->displayIdAssigned;
 	nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl);
@@ -938,38 +957,60 @@ r535_dp_train_target(struct nvkm_outp *outp, u8 target, bool mst, u8 link_nr, u8
 {
 	struct nvkm_disp *disp = outp->disp;
 	NV0073_CTRL_DP_CTRL_PARAMS *ctrl;
-	int ret;
-
-	ctrl = nvkm_gsp_rm_ctrl_get(&disp->rm.objcom, NV0073_CTRL_CMD_DP_CTRL, sizeof(*ctrl));
-	if (IS_ERR(ctrl))
-		return PTR_ERR(ctrl);
+	int ret, retries;
+	u32 cmd, data;
 
-	ctrl->subDeviceInstance = 0;
-	ctrl->displayId = BIT(outp->index);
-	ctrl->cmd = NVDEF(NV0073_CTRL, DP_CMD, SET_LANE_COUNT, TRUE) |
-		    NVDEF(NV0073_CTRL, DP_CMD, SET_LINK_BW, TRUE) |
-		    NVDEF(NV0073_CTRL, DP_CMD, TRAIN_PHY_REPEATER, YES);
-	ctrl->data = NVVAL(NV0073_CTRL, DP_DATA, SET_LANE_COUNT, link_nr) |
-		     NVVAL(NV0073_CTRL, DP_DATA, SET_LINK_BW, link_bw) |
-		     NVVAL(NV0073_CTRL, DP_DATA, TARGET, target);
+	cmd = NVDEF(NV0073_CTRL, DP_CMD, SET_LANE_COUNT, TRUE) |
+	      NVDEF(NV0073_CTRL, DP_CMD, SET_LINK_BW, TRUE) |
+	      NVDEF(NV0073_CTRL, DP_CMD, TRAIN_PHY_REPEATER, YES);
+	data = NVVAL(NV0073_CTRL, DP_DATA, SET_LANE_COUNT, link_nr) |
+	       NVVAL(NV0073_CTRL, DP_DATA, SET_LINK_BW, link_bw) |
+	       NVVAL(NV0073_CTRL, DP_DATA, TARGET, target);
 
 	if (mst)
-		ctrl->cmd |= NVDEF(NV0073_CTRL, DP_CMD, SET_FORMAT_MODE, MULTI_STREAM);
+		cmd |= NVDEF(NV0073_CTRL, DP_CMD, SET_FORMAT_MODE, MULTI_STREAM);
 
 	if (outp->dp.dpcd[DPCD_RC02] & DPCD_RC02_ENHANCED_FRAME_CAP)
-		ctrl->cmd |= NVDEF(NV0073_CTRL, DP_CMD, SET_ENHANCED_FRAMING, TRUE);
+		cmd |= NVDEF(NV0073_CTRL, DP_CMD, SET_ENHANCED_FRAMING, TRUE);
 
 	if (target == 0 &&
 	     (outp->dp.dpcd[DPCD_RC02] & 0x20) &&
 	    !(outp->dp.dpcd[DPCD_RC03] & DPCD_RC03_TPS4_SUPPORTED))
-	    ctrl->cmd |= NVDEF(NV0073_CTRL, DP_CMD, POST_LT_ADJ_REQ_GRANTED, YES);
+		cmd |= NVDEF(NV0073_CTRL, DP_CMD, POST_LT_ADJ_REQ_GRANTED, YES);
 
-	ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl));
-	if (IS_ERR(ctrl))
-		return PTR_ERR(ctrl);
+	/* We should retry up to 3 times, but only if GSP asks politely */
+	for (retries = 0; retries < 3; ++retries) {
+		ctrl = nvkm_gsp_rm_ctrl_get(&disp->rm.objcom, NV0073_CTRL_CMD_DP_CTRL,
+					    sizeof(*ctrl));
+		if (IS_ERR(ctrl))
+			return PTR_ERR(ctrl);
+
+		ctrl->subDeviceInstance = 0;
+		ctrl->displayId = BIT(outp->index);
+		ctrl->retryTimeMs = 0;
+		ctrl->cmd = cmd;
+		ctrl->data = data;
+
+		ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl));
+		if (ret == -EAGAIN && ctrl->retryTimeMs) {
+			/*
+			 * Device (likely an eDP panel) isn't ready yet, wait for the time specified
+			 * by GSP before retrying again
+			 */
+			nvkm_debug(&disp->engine.subdev,
+				   "Waiting %dms for GSP LT panel delay before retrying\n",
+				   ctrl->retryTimeMs);
+			msleep(ctrl->retryTimeMs);
+			nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl);
+		} else {
+			/* GSP didn't say to retry, or we were successful */
+			if (ctrl->err)
+				ret = -EIO;
+			nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl);
+			break;
+		}
+	}
 
-	ret = ctrl->err ? -EIO : 0;
-	nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl);
 	return ret;
 }
 
@@ -1036,9 +1077,11 @@ r535_dp_aux_xfer(struct nvkm_outp *outp, u8 type, u32 addr, u8 *data, u8 *psize)
 	ctrl->size = !ctrl->bAddrOnly ? (size - 1) : 0;
 	memcpy(ctrl->data, data, size);
 
-	ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl));
-	if (IS_ERR(ctrl))
+	ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl));
+	if (ret) {
+		nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl);
 		return PTR_ERR(ctrl);
+	}
 
 	memcpy(data, ctrl->data, size);
 	*psize = ctrl->size;
@@ -1111,10 +1154,13 @@ r535_tmds_edid_get(struct nvkm_outp *outp, u8 *data, u16 *psize)
 	ctrl->subDeviceInstance = 0;
 	ctrl->displayId = BIT(outp->index);
 
-	ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl));
-	if (IS_ERR(ctrl))
-		return PTR_ERR(ctrl);
+	ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl));
+	if (ret) {
+		nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl);
+		return ret;
+	}
 
+	ret = -E2BIG;
 	if (ctrl->bufferSize <= *psize) {
 		memcpy(data, ctrl->edidBuffer, ctrl->bufferSize);
 		*psize = ctrl->bufferSize;
@@ -1153,9 +1199,11 @@ r535_outp_new(struct nvkm_disp *disp, u32 id)
 	ctrl->subDeviceInstance = 0;
 	ctrl->displayId = BIT(id);
 
-	ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl));
-	if (IS_ERR(ctrl))
-		return PTR_ERR(ctrl);
+	ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl));
+	if (ret) {
+		nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl);
+		return ret;
+	}
 
 	switch (ctrl->type) {
 	case NV0073_CTRL_SPECIFIC_OR_TYPE_NONE:
@@ -1229,9 +1277,11 @@ r535_outp_new(struct nvkm_disp *disp, u32 id)
 
 		ctrl->sorIndex = ~0;
 
-		ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl));
-		if (IS_ERR(ctrl))
-			return PTR_ERR(ctrl);
+		ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl));
+		if (ret) {
+			nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl);
+			return ret;
+		}
 
 		switch (NVVAL_GET(ctrl->maxLinkRate, NV0073_CTRL_CMD, DP_GET_CAPS, MAX_LINK_RATE)) {
 		case NV0073_CTRL_CMD_DP_GET_CAPS_MAX_LINK_RATE_1_62:
@@ -1465,8 +1515,6 @@ r535_disp_oneinit(struct nvkm_disp *disp)
 				bool nvhg = acpi_check_dsm(handle, &NVHG_DSM_GUID, NVHG_DSM_REV,
 						           1ULL << 0x00000014);
 
-				printk(KERN_ERR "bl: nbci:%d nvhg:%d\n", nbci, nvhg);
-
 				if (nbci || nvhg) {
 					union acpi_object argv4 = {
 						.buffer.type    = ACPI_TYPE_BUFFER,
@@ -1479,9 +1527,6 @@ r535_disp_oneinit(struct nvkm_disp *disp)
 					if (!obj) {
 						acpi_handle_info(handle, "failed to evaluate _DSM\n");
 					} else {
-						printk(KERN_ERR "bl: obj type %d\n", obj->type);
-						printk(KERN_ERR "bl: obj len %d\n", obj->package.count);
-
 						for (int i = 0; i < obj->package.count; i++) {
 							union acpi_object *elt = &obj->package.elements[i];
 							u32 size;
@@ -1491,12 +1536,10 @@ r535_disp_oneinit(struct nvkm_disp *disp)
 							else
 								size = 4;
 
-							printk(KERN_ERR "elt %03d: type %d size %d\n", i, elt->type, size);
 							memcpy(&ctrl->backLightData[ctrl->backLightDataSize], &elt->integer.value, size);
 							ctrl->backLightDataSize += size;
 						}
 
-						printk(KERN_ERR "bl: data size %d\n", ctrl->backLightDataSize);
 						ctrl->status = 0;
 						ACPI_FREE(obj);
 					}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c
index e4279f1772a1..377d0e0cef84 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c
@@ -385,7 +385,7 @@ nvkm_uoutp_mthd_inherit(struct nvkm_outp *outp, void *argv, u32 argc)
 
 	/* Ensure an ior is hooked up to this outp already */
 	ior = outp->func->inherit(outp);
-	if (!ior)
+	if (!ior || !ior->arm.head)
 		return -ENODEV;
 
 	/* With iors, there will be a separate output path for each type of connector - and all of
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c
index c8ce7ff18713..e74493a4569e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c
@@ -550,6 +550,10 @@ ga100_fifo_nonstall_ctor(struct nvkm_fifo *fifo)
 		struct nvkm_engn *engn = list_first_entry(&runl->engns, typeof(*engn), head);
 
 		runl->nonstall.vector = engn->func->nonstall(engn);
+
+		/* if no nonstall vector just keep going */
+		if (runl->nonstall.vector == -1)
+			continue;
 		if (runl->nonstall.vector < 0) {
 			RUNL_ERROR(runl, "nonstall %d", runl->nonstall.vector);
 			return runl->nonstall.vector;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c
index d088e636edc3..3454c7d29502 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c
@@ -242,6 +242,7 @@ r535_chan_id_put(struct nvkm_chan *chan)
 				nvkm_memory_unref(&userd->mem);
 				nvkm_chid_put(runl->chid, userd->chid, &chan->cgrp->lock);
 				list_del(&userd->head);
+				kfree(userd);
 			}
 
 			break;
@@ -350,7 +351,7 @@ r535_engn_nonstall(struct nvkm_engn *engn)
 	int ret;
 
 	ret = nvkm_gsp_intr_nonstall(subdev->device->gsp, subdev->type, subdev->inst);
-	WARN_ON(ret < 0);
+	WARN_ON(ret == -ENOENT);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c
index 4135690326f4..3a30bea30e36 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c
@@ -168,12 +168,11 @@ r535_bar_new_(const struct nvkm_bar_func *hw, struct nvkm_device *device,
 	rm->flush = r535_bar_flush;
 
 	ret = gf100_bar_new_(rm, device, type, inst, &bar);
-	*pbar = bar;
 	if (ret) {
-		if (!bar)
-			kfree(rm);
+		kfree(rm);
 		return ret;
 	}
+	*pbar = bar;
 
 	bar->flushBAR2PhysMode = ioremap(device->func->resource_addr(device, 3), PAGE_SIZE);
 	if (!bar->flushBAR2PhysMode)
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c
index 19188683c8fc..8c2bf1c16f2a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c
@@ -154,11 +154,17 @@ shadow_fw_init(struct nvkm_bios *bios, const char *name)
 	return (void *)fw;
 }
 
+static void
+shadow_fw_release(void *fw)
+{
+	release_firmware(fw);
+}
+
 static const struct nvbios_source
 shadow_fw = {
 	.name = "firmware",
 	.init = shadow_fw_init,
-	.fini = (void(*)(void *))release_firmware,
+	.fini = shadow_fw_release,
 	.read = shadow_fw_read,
 	.rw = false,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/base.c
index 04bceaa28a19..da1bebb896f7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/base.c
@@ -25,12 +25,8 @@ int
 nvkm_gsp_intr_nonstall(struct nvkm_gsp *gsp, enum nvkm_subdev_type type, int inst)
 {
 	for (int i = 0; i < gsp->intr_nr; i++) {
-		if (gsp->intr[i].type == type && gsp->intr[i].inst == inst) {
-			if (gsp->intr[i].nonstall != ~0)
-				return gsp->intr[i].nonstall;
-
-			return -EINVAL;
-		}
+		if (gsp->intr[i].type == type && gsp->intr[i].inst == inst)
+			return gsp->intr[i].nonstall;
 	}
 
 	return -ENOENT;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
index dc44f5c7833f..a64c81385682 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
@@ -70,6 +70,20 @@ struct r535_gsp_msg {
 
 #define GSP_MSG_HDR_SIZE offsetof(struct r535_gsp_msg, data)
 
+static int
+r535_rpc_status_to_errno(uint32_t rpc_status)
+{
+	switch (rpc_status) {
+	case 0x55: /* NV_ERR_NOT_READY */
+	case 0x66: /* NV_ERR_TIMEOUT_RETRY */
+		return -EAGAIN;
+	case 0x51: /* NV_ERR_NO_MEMORY */
+		return -ENOMEM;
+	default:
+		return -EINVAL;
+	}
+}
+
 static void *
 r535_gsp_msgq_wait(struct nvkm_gsp *gsp, u32 repc, u32 *prepc, int *ptime)
 {
@@ -298,7 +312,8 @@ retry:
 		struct nvkm_gsp_msgq_ntfy *ntfy = &gsp->msgq.ntfy[i];
 
 		if (ntfy->fn == msg->function) {
-			ntfy->func(ntfy->priv, ntfy->fn, msg->data, msg->length - sizeof(*msg));
+			if (ntfy->func)
+				ntfy->func(ntfy->priv, ntfy->fn, msg->data, msg->length - sizeof(*msg));
 			break;
 		}
 	}
@@ -365,10 +380,8 @@ r535_gsp_rpc_send(struct nvkm_gsp *gsp, void *argv, bool wait, u32 repc)
 	}
 
 	ret = r535_gsp_cmdq_push(gsp, rpc);
-	if (ret) {
-		mutex_unlock(&gsp->cmdq.mutex);
+	if (ret)
 		return ERR_PTR(ret);
-	}
 
 	if (wait) {
 		msg = r535_gsp_msg_recv(gsp, fn, repc);
@@ -585,14 +598,14 @@ r535_gsp_rpc_rm_alloc_push(struct nvkm_gsp_object *object, void *argv, u32 repc)
 		return rpc;
 
 	if (rpc->status) {
-		nvkm_error(&gsp->subdev, "RM_ALLOC: 0x%x\n", rpc->status);
-		ret = ERR_PTR(-EINVAL);
+		ret = ERR_PTR(r535_rpc_status_to_errno(rpc->status));
+		if (PTR_ERR(ret) != -EAGAIN)
+			nvkm_error(&gsp->subdev, "RM_ALLOC: 0x%x\n", rpc->status);
 	} else {
 		ret = repc ? rpc->params : NULL;
 	}
 
-	if (IS_ERR_OR_NULL(ret))
-		nvkm_gsp_rpc_done(gsp, rpc);
+	nvkm_gsp_rpc_done(gsp, rpc);
 
 	return ret;
 }
@@ -625,29 +638,34 @@ r535_gsp_rpc_rm_ctrl_done(struct nvkm_gsp_object *object, void *repv)
 {
 	rpc_gsp_rm_control_v03_00 *rpc = container_of(repv, typeof(*rpc), params);
 
+	if (!repv)
+		return;
 	nvkm_gsp_rpc_done(object->client->gsp, rpc);
 }
 
-static void *
-r535_gsp_rpc_rm_ctrl_push(struct nvkm_gsp_object *object, void *argv, u32 repc)
+static int
+r535_gsp_rpc_rm_ctrl_push(struct nvkm_gsp_object *object, void **argv, u32 repc)
 {
-	rpc_gsp_rm_control_v03_00 *rpc = container_of(argv, typeof(*rpc), params);
+	rpc_gsp_rm_control_v03_00 *rpc = container_of((*argv), typeof(*rpc), params);
 	struct nvkm_gsp *gsp = object->client->gsp;
-	void *ret;
+	int ret = 0;
 
 	rpc = nvkm_gsp_rpc_push(gsp, rpc, true, repc);
-	if (IS_ERR_OR_NULL(rpc))
-		return rpc;
+	if (IS_ERR_OR_NULL(rpc)) {
+		*argv = NULL;
+		return PTR_ERR(rpc);
+	}
 
 	if (rpc->status) {
-		nvkm_error(&gsp->subdev, "cli:0x%08x obj:0x%08x ctrl cmd:0x%08x failed: 0x%08x\n",
-			   object->client->object.handle, object->handle, rpc->cmd, rpc->status);
-		ret = ERR_PTR(-EINVAL);
-	} else {
-		ret = repc ? rpc->params : NULL;
+		ret = r535_rpc_status_to_errno(rpc->status);
+		if (ret != -EAGAIN)
+			nvkm_error(&gsp->subdev, "cli:0x%08x obj:0x%08x ctrl cmd:0x%08x failed: 0x%08x\n",
+				   object->client->object.handle, object->handle, rpc->cmd, rpc->status);
 	}
 
-	if (IS_ERR_OR_NULL(ret))
+	if (repc)
+		*argv = rpc->params;
+	else
 		nvkm_gsp_rpc_done(gsp, rpc);
 
 	return ret;
@@ -845,9 +863,11 @@ r535_gsp_intr_get_table(struct nvkm_gsp *gsp)
 	if (IS_ERR(ctrl))
 		return PTR_ERR(ctrl);
 
-	ctrl = nvkm_gsp_rm_ctrl_push(&gsp->internal.device.subdevice, ctrl, sizeof(*ctrl));
-	if (WARN_ON(IS_ERR(ctrl)))
-		return PTR_ERR(ctrl);
+	ret = nvkm_gsp_rm_ctrl_push(&gsp->internal.device.subdevice, &ctrl, sizeof(*ctrl));
+	if (WARN_ON(ret)) {
+		nvkm_gsp_rm_ctrl_done(&gsp->internal.device.subdevice, ctrl);
+		return ret;
+	}
 
 	for (unsigned i = 0; i < ctrl->tableLen; i++) {
 		enum nvkm_subdev_type type;
@@ -977,6 +997,32 @@ r535_gsp_rpc_get_gsp_static_info(struct nvkm_gsp *gsp)
 	return 0;
 }
 
+static void
+nvkm_gsp_mem_dtor(struct nvkm_gsp *gsp, struct nvkm_gsp_mem *mem)
+{
+	if (mem->data) {
+		/*
+		 * Poison the buffer to catch any unexpected access from
+		 * GSP-RM if the buffer was prematurely freed.
+		 */
+		memset(mem->data, 0xFF, mem->size);
+
+		dma_free_coherent(gsp->subdev.device->dev, mem->size, mem->data, mem->addr);
+		memset(mem, 0, sizeof(*mem));
+	}
+}
+
+static int
+nvkm_gsp_mem_ctor(struct nvkm_gsp *gsp, size_t size, struct nvkm_gsp_mem *mem)
+{
+	mem->size = size;
+	mem->data = dma_alloc_coherent(gsp->subdev.device->dev, size, &mem->addr, GFP_KERNEL);
+	if (WARN_ON(!mem->data))
+		return -ENOMEM;
+
+	return 0;
+}
+
 static int
 r535_gsp_postinit(struct nvkm_gsp *gsp)
 {
@@ -1004,6 +1050,13 @@ r535_gsp_postinit(struct nvkm_gsp *gsp)
 
 	nvkm_inth_allow(&gsp->subdev.inth);
 	nvkm_wr32(device, 0x110004, 0x00000040);
+
+	/* Release the DMA buffers that were needed only for boot and init */
+	nvkm_gsp_mem_dtor(gsp, &gsp->boot.fw);
+	nvkm_gsp_mem_dtor(gsp, &gsp->libos);
+	nvkm_gsp_mem_dtor(gsp, &gsp->rmargs);
+	nvkm_gsp_mem_dtor(gsp, &gsp->wpr_meta);
+
 	return ret;
 }
 
@@ -1048,7 +1101,7 @@ r535_gsp_rpc_set_registry(struct nvkm_gsp *gsp)
 	char *strings;
 	int str_offset;
 	int i;
-	size_t rpc_size = sizeof(*rpc) + sizeof(rpc->entries[0]) * NV_GSP_REG_NUM_ENTRIES;
+	size_t rpc_size = struct_size(rpc, entries, NV_GSP_REG_NUM_ENTRIES);
 
 	/* add strings + null terminator */
 	for (i = 0; i < NV_GSP_REG_NUM_ENTRIES; i++)
@@ -1058,7 +1111,6 @@ r535_gsp_rpc_set_registry(struct nvkm_gsp *gsp)
 	if (IS_ERR(rpc))
 		return PTR_ERR(rpc);
 
-	rpc->size = sizeof(*rpc);
 	rpc->numEntries = NV_GSP_REG_NUM_ENTRIES;
 
 	str_offset = offsetof(typeof(*rpc), entries[NV_GSP_REG_NUM_ENTRIES]);
@@ -1074,6 +1126,7 @@ r535_gsp_rpc_set_registry(struct nvkm_gsp *gsp)
 		strings += name_len;
 		str_offset += name_len;
 	}
+	rpc->size = str_offset;
 
 	return nvkm_gsp_rpc_wr(gsp, rpc, false);
 }
@@ -1101,16 +1154,12 @@ r535_gsp_acpi_caps(acpi_handle handle, CAPS_METHOD_DATA *caps)
 	if (!obj)
 		return;
 
-	printk(KERN_ERR "nvop: obj type %d\n", obj->type);
-	printk(KERN_ERR "nvop: obj len %d\n", obj->buffer.length);
-
 	if (WARN_ON(obj->type != ACPI_TYPE_BUFFER) ||
 	    WARN_ON(obj->buffer.length != 4))
 		return;
 
 	caps->status = 0;
 	caps->optimusCaps = *(u32 *)obj->buffer.pointer;
-	printk(KERN_ERR "nvop: caps %08x\n", caps->optimusCaps);
 
 	ACPI_FREE(obj);
 
@@ -1137,9 +1186,6 @@ r535_gsp_acpi_jt(acpi_handle handle, JT_METHOD_DATA *jt)
 	if (!obj)
 		return;
 
-	printk(KERN_ERR "jt: obj type %d\n", obj->type);
-	printk(KERN_ERR "jt: obj len %d\n", obj->buffer.length);
-
 	if (WARN_ON(obj->type != ACPI_TYPE_BUFFER) ||
 	    WARN_ON(obj->buffer.length != 4))
 		return;
@@ -1148,7 +1194,6 @@ r535_gsp_acpi_jt(acpi_handle handle, JT_METHOD_DATA *jt)
 	jt->jtCaps = *(u32 *)obj->buffer.pointer;
 	jt->jtRevId = (jt->jtCaps & 0xfff00000) >> 20;
 	jt->bSBIOSCaps = 0;
-	printk(KERN_ERR "jt: caps %08x rev:%04x\n", jt->jtCaps, jt->jtRevId);
 
 	ACPI_FREE(obj);
 
@@ -1159,6 +1204,8 @@ static void
 r535_gsp_acpi_mux_id(acpi_handle handle, u32 id, MUX_METHOD_DATA_ELEMENT *mode,
 						 MUX_METHOD_DATA_ELEMENT *part)
 {
+	union acpi_object mux_arg = { ACPI_TYPE_INTEGER };
+	struct acpi_object_list input = { 1, &mux_arg };
 	acpi_handle iter = NULL, handle_mux = NULL;
 	acpi_status status;
 	unsigned long long value;
@@ -1181,14 +1228,18 @@ r535_gsp_acpi_mux_id(acpi_handle handle, u32 id, MUX_METHOD_DATA_ELEMENT *mode,
 	if (!handle_mux)
 		return;
 
-	status = acpi_evaluate_integer(handle_mux, "MXDM", NULL, &value);
+	/* I -think- 0 means "acquire" according to nvidia's driver source */
+	input.pointer->integer.type = ACPI_TYPE_INTEGER;
+	input.pointer->integer.value = 0;
+
+	status = acpi_evaluate_integer(handle_mux, "MXDM", &input, &value);
 	if (ACPI_SUCCESS(status)) {
 		mode->acpiId = id;
 		mode->mode   = value;
 		mode->status = 0;
 	}
 
-	status = acpi_evaluate_integer(handle_mux, "MXDS", NULL, &value);
+	status = acpi_evaluate_integer(handle_mux, "MXDS", &input, &value);
 	if (ACPI_SUCCESS(status)) {
 		part->acpiId = id;
 		part->mode   = value;
@@ -1234,8 +1285,8 @@ r535_gsp_acpi_dod(acpi_handle handle, DOD_METHOD_DATA *dod)
 		dod->acpiIdListLen += sizeof(dod->acpiIdList[0]);
 	}
 
-	printk(KERN_ERR "_DOD: ok! len:%d\n", dod->acpiIdListLen);
 	dod->status = 0;
+	kfree(output.pointer);
 }
 #endif
 
@@ -1379,6 +1430,13 @@ r535_gsp_msg_post_event(void *priv, u32 fn, void *repv, u32 repc)
 	return 0;
 }
 
+/**
+ * r535_gsp_msg_run_cpu_sequencer() -- process I/O commands from the GSP
+ *
+ * The GSP sequencer is a list of I/O commands that the GSP can send to
+ * the driver to perform for various purposes.  The most common usage is to
+ * perform a special mid-initialization reset.
+ */
 static int
 r535_gsp_msg_run_cpu_sequencer(void *priv, u32 fn, void *repv, u32 repc)
 {
@@ -1507,27 +1565,6 @@ r535_gsp_msg_run_cpu_sequencer(void *priv, u32 fn, void *repv, u32 repc)
 	return 0;
 }
 
-static void
-nvkm_gsp_mem_dtor(struct nvkm_gsp *gsp, struct nvkm_gsp_mem *mem)
-{
-	if (mem->data) {
-		dma_free_coherent(gsp->subdev.device->dev, mem->size, mem->data, mem->addr);
-		mem->data = NULL;
-	}
-}
-
-static int
-nvkm_gsp_mem_ctor(struct nvkm_gsp *gsp, u32 size, struct nvkm_gsp_mem *mem)
-{
-	mem->size = size;
-	mem->data = dma_alloc_coherent(gsp->subdev.device->dev, size, &mem->addr, GFP_KERNEL);
-	if (WARN_ON(!mem->data))
-		return -ENOMEM;
-
-	return 0;
-}
-
-
 static int
 r535_gsp_booter_unload(struct nvkm_gsp *gsp, u32 mbox0, u32 mbox1)
 {
@@ -1718,6 +1755,23 @@ r535_gsp_libos_id8(const char *name)
 	return id;
 }
 
+/**
+ * create_pte_array() - creates a PTE array of a physically contiguous buffer
+ * @ptes: pointer to the array
+ * @addr: base address of physically contiguous buffer (GSP_PAGE_SIZE aligned)
+ * @size: size of the buffer
+ *
+ * GSP-RM sometimes expects physically-contiguous buffers to have an array of
+ * "PTEs" for each page in that buffer.  Although in theory that allows for
+ * the buffer to be physically discontiguous, GSP-RM does not currently
+ * support that.
+ *
+ * In this case, the PTEs are DMA addresses of each page of the buffer.  Since
+ * the buffer is physically contiguous, calculating all the PTEs is simple
+ * math.
+ *
+ * See memdescGetPhysAddrsForGpu()
+ */
 static void create_pte_array(u64 *ptes, dma_addr_t addr, size_t size)
 {
 	unsigned int num_pages = DIV_ROUND_UP_ULL(size, GSP_PAGE_SIZE);
@@ -1727,6 +1781,35 @@ static void create_pte_array(u64 *ptes, dma_addr_t addr, size_t size)
 		ptes[i] = (u64)addr + (i << GSP_PAGE_SHIFT);
 }
 
+/**
+ * r535_gsp_libos_init() -- create the libos arguments structure
+ *
+ * The logging buffers are byte queues that contain encoded printf-like
+ * messages from GSP-RM.  They need to be decoded by a special application
+ * that can parse the buffers.
+ *
+ * The 'loginit' buffer contains logs from early GSP-RM init and
+ * exception dumps.  The 'logrm' buffer contains the subsequent logs. Both are
+ * written to directly by GSP-RM and can be any multiple of GSP_PAGE_SIZE.
+ *
+ * The physical address map for the log buffer is stored in the buffer
+ * itself, starting with offset 1. Offset 0 contains the "put" pointer.
+ *
+ * The GSP only understands 4K pages (GSP_PAGE_SIZE), so even if the kernel is
+ * configured for a larger page size (e.g. 64K pages), we need to give
+ * the GSP an array of 4K pages. Fortunately, since the buffer is
+ * physically contiguous, it's simple math to calculate the addresses.
+ *
+ * The buffers must be a multiple of GSP_PAGE_SIZE.  GSP-RM also currently
+ * ignores the @kind field for LOGINIT, LOGINTR, and LOGRM, but expects the
+ * buffers to be physically contiguous anyway.
+ *
+ * The memory allocated for the arguments must remain until the GSP sends the
+ * init_done RPC.
+ *
+ * See _kgspInitLibosLoggingStructures (allocates memory for buffers)
+ * See kgspSetupLibosInitArgs_IMPL (creates pLibosInitArgs[] array)
+ */
 static int
 r535_gsp_libos_init(struct nvkm_gsp *gsp)
 {
@@ -1837,21 +1920,50 @@ nvkm_gsp_radix3_dtor(struct nvkm_gsp *gsp, struct nvkm_gsp_radix3 *rx3)
 		nvkm_gsp_mem_dtor(gsp, &rx3->mem[i]);
 }
 
+/**
+ * nvkm_gsp_radix3_sg - build a radix3 table from a S/G list
+ *
+ * The GSP uses a three-level page table, called radix3, to map the firmware.
+ * Each 64-bit "pointer" in the table is either the bus address of an entry in
+ * the next table (for levels 0 and 1) or the bus address of the next page in
+ * the GSP firmware image itself.
+ *
+ * Level 0 contains a single entry in one page that points to the first page
+ * of level 1.
+ *
+ * Level 1, since it's also only one page in size, contains up to 512 entries,
+ * one for each page in Level 2.
+ *
+ * Level 2 can be up to 512 pages in size, and each of those entries points to
+ * the next page of the firmware image.  Since there can be up to 512*512
+ * pages, that limits the size of the firmware to 512*512*GSP_PAGE_SIZE = 1GB.
+ *
+ * Internally, the GSP has its window into system memory, but the base
+ * physical address of the aperture is not 0.  In fact, it varies depending on
+ * the GPU architecture.  Since the GPU is a PCI device, this window is
+ * accessed via DMA and is therefore bound by IOMMU translation.  The end
+ * result is that GSP-RM must translate the bus addresses in the table to GSP
+ * physical addresses.  All this should happen transparently.
+ *
+ * Returns 0 on success, or negative error code
+ *
+ * See kgspCreateRadix3_IMPL
+ */
 static int
-nvkm_gsp_radix3_sg(struct nvkm_device *device, struct sg_table *sgt, u64 size,
+nvkm_gsp_radix3_sg(struct nvkm_gsp *gsp, struct sg_table *sgt, u64 size,
 		   struct nvkm_gsp_radix3 *rx3)
 {
 	u64 addr;
 
 	for (int i = ARRAY_SIZE(rx3->mem) - 1; i >= 0; i--) {
 		u64 *ptes;
-		int idx;
+		size_t bufsize;
+		int ret, idx;
 
-		rx3->mem[i].size = ALIGN((size / GSP_PAGE_SIZE) * sizeof(u64), GSP_PAGE_SIZE);
-		rx3->mem[i].data = dma_alloc_coherent(device->dev, rx3->mem[i].size,
-						      &rx3->mem[i].addr, GFP_KERNEL);
-		if (WARN_ON(!rx3->mem[i].data))
-			return -ENOMEM;
+		bufsize = ALIGN((size / GSP_PAGE_SIZE) * sizeof(u64), GSP_PAGE_SIZE);
+		ret = nvkm_gsp_mem_ctor(gsp, bufsize, &rx3->mem[i]);
+		if (ret)
+			return ret;
 
 		ptes = rx3->mem[i].data;
 		if (i == 2) {
@@ -1891,7 +2003,7 @@ r535_gsp_fini(struct nvkm_gsp *gsp, bool suspend)
 		if (ret)
 			return ret;
 
-		ret = nvkm_gsp_radix3_sg(gsp->subdev.device, &gsp->sr.sgt, len, &gsp->sr.radix3);
+		ret = nvkm_gsp_radix3_sg(gsp, &gsp->sr.sgt, len, &gsp->sr.radix3);
 		if (ret)
 			return ret;
 
@@ -2050,6 +2162,11 @@ r535_gsp_dtor(struct nvkm_gsp *gsp)
 	mutex_destroy(&gsp->cmdq.mutex);
 
 	r535_gsp_dtor_fws(gsp);
+
+	nvkm_gsp_mem_dtor(gsp, &gsp->shm.mem);
+	nvkm_gsp_mem_dtor(gsp, &gsp->loginit);
+	nvkm_gsp_mem_dtor(gsp, &gsp->logintr);
+	nvkm_gsp_mem_dtor(gsp, &gsp->logrm);
 }
 
 int
@@ -2094,7 +2211,7 @@ r535_gsp_oneinit(struct nvkm_gsp *gsp)
 	memcpy(gsp->sig.data, data, size);
 
 	/* Build radix3 page table for ELF image. */
-	ret = nvkm_gsp_radix3_sg(device, &gsp->fw.mem.sgt, gsp->fw.len, &gsp->radix3);
+	ret = nvkm_gsp_radix3_sg(gsp, &gsp->fw.mem.sgt, gsp->fw.len, &gsp->radix3);
 	if (ret)
 		return ret;
 
@@ -2106,7 +2223,9 @@ r535_gsp_oneinit(struct nvkm_gsp *gsp)
 	r535_gsp_msg_ntfy_add(gsp, NV_VGPU_MSG_EVENT_MMU_FAULT_QUEUED,
 			      r535_gsp_msg_mmu_fault_queued, gsp);
 	r535_gsp_msg_ntfy_add(gsp, NV_VGPU_MSG_EVENT_OS_ERROR_LOG, r535_gsp_msg_os_error_log, gsp);
-
+	r535_gsp_msg_ntfy_add(gsp, NV_VGPU_MSG_EVENT_PERF_BRIDGELESS_INFO_UPDATE, NULL, NULL);
+	r535_gsp_msg_ntfy_add(gsp, NV_VGPU_MSG_EVENT_UCODE_LIBOS_PRINT, NULL, NULL);
+	r535_gsp_msg_ntfy_add(gsp, NV_VGPU_MSG_EVENT_GSP_SEND_USER_SHARED_DATA, NULL, NULL);
 	ret = r535_gsp_rm_boot_ctor(gsp);
 	if (ret)
 		return ret;
@@ -2193,8 +2312,12 @@ r535_gsp_load(struct nvkm_gsp *gsp, int ver, const struct nvkm_gsp_fwif *fwif)
 {
 	struct nvkm_subdev *subdev = &gsp->subdev;
 	int ret;
+	bool enable_gsp = fwif->enable;
 
-	if (!nvkm_boolopt(subdev->device->cfgopt, "NvGspRm", fwif->enable))
+#if IS_ENABLED(CONFIG_DRM_NOUVEAU_GSP_DEFAULT)
+	enable_gsp = true;
+#endif
+	if (!nvkm_boolopt(subdev->device->cfgopt, "NvGspRm", enable_gsp))
 		return -EINVAL;
 
 	if ((ret = r535_gsp_load_fw(gsp, "gsp", fwif->ver, &gsp->fws.rm)) ||
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
index 1b811d6972a1..201022ae9214 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
@@ -49,14 +49,14 @@
 #include <subdev/mmu.h>
 
 struct gk20a_instobj {
-	struct nvkm_memory memory;
+	struct nvkm_instobj base;
 	struct nvkm_mm_node *mn;
 	struct gk20a_instmem *imem;
 
 	/* CPU mapping */
 	u32 *vaddr;
 };
-#define gk20a_instobj(p) container_of((p), struct gk20a_instobj, memory)
+#define gk20a_instobj(p) container_of((p), struct gk20a_instobj, base.memory)
 
 /*
  * Used for objects allocated using the DMA API
@@ -148,7 +148,7 @@ gk20a_instobj_iommu_recycle_vaddr(struct gk20a_instobj_iommu *obj)
 	list_del(&obj->vaddr_node);
 	vunmap(obj->base.vaddr);
 	obj->base.vaddr = NULL;
-	imem->vaddr_use -= nvkm_memory_size(&obj->base.memory);
+	imem->vaddr_use -= nvkm_memory_size(&obj->base.base.memory);
 	nvkm_debug(&imem->base.subdev, "vaddr used: %x/%x\n", imem->vaddr_use,
 		   imem->vaddr_max);
 }
@@ -283,7 +283,7 @@ gk20a_instobj_map(struct nvkm_memory *memory, u64 offset, struct nvkm_vmm *vmm,
 {
 	struct gk20a_instobj *node = gk20a_instobj(memory);
 	struct nvkm_vmm_map map = {
-		.memory = &node->memory,
+		.memory = &node->base.memory,
 		.offset = offset,
 		.mem = node->mn,
 	};
@@ -391,8 +391,8 @@ gk20a_instobj_ctor_dma(struct gk20a_instmem *imem, u32 npages, u32 align,
 		return -ENOMEM;
 	*_node = &node->base;
 
-	nvkm_memory_ctor(&gk20a_instobj_func_dma, &node->base.memory);
-	node->base.memory.ptrs = &gk20a_instobj_ptrs;
+	nvkm_memory_ctor(&gk20a_instobj_func_dma, &node->base.base.memory);
+	node->base.base.memory.ptrs = &gk20a_instobj_ptrs;
 
 	node->base.vaddr = dma_alloc_attrs(dev, npages << PAGE_SHIFT,
 					   &node->handle, GFP_KERNEL,
@@ -438,8 +438,8 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align,
 	*_node = &node->base;
 	node->dma_addrs = (void *)(node->pages + npages);
 
-	nvkm_memory_ctor(&gk20a_instobj_func_iommu, &node->base.memory);
-	node->base.memory.ptrs = &gk20a_instobj_ptrs;
+	nvkm_memory_ctor(&gk20a_instobj_func_iommu, &node->base.base.memory);
+	node->base.base.memory.ptrs = &gk20a_instobj_ptrs;
 
 	/* Allocate backing memory */
 	for (i = 0; i < npages; i++) {
@@ -533,7 +533,7 @@ gk20a_instobj_new(struct nvkm_instmem *base, u32 size, u32 align, bool zero,
 	else
 		ret = gk20a_instobj_ctor_dma(imem, size >> PAGE_SHIFT,
 					     align, &node);
-	*pmemory = node ? &node->memory : NULL;
+	*pmemory = node ? &node->base.memory : NULL;
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c
index e7e8fdf3adab..29682722b0b3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c
@@ -28,19 +28,14 @@ static void
 gp10b_ltc_init(struct nvkm_ltc *ltc)
 {
 	struct nvkm_device *device = ltc->subdev.device;
-	struct iommu_fwspec *spec;
+	u32 sid;
 
 	nvkm_wr32(device, 0x17e27c, ltc->ltc_nr);
 	nvkm_wr32(device, 0x17e000, ltc->ltc_nr);
 	nvkm_wr32(device, 0x100800, ltc->ltc_nr);
 
-	spec = dev_iommu_fwspec_get(device->dev);
-	if (spec) {
-		u32 sid = spec->ids[0] & 0xffff;
-
-		/* stream ID */
+	if (tegra_dev_iommu_get_stream_id(device->dev, &sid))
 		nvkm_wr32(device, 0x160000, sid << 2);
-	}
 }
 
 static const struct nvkm_ltc_func
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c
index e34bc6076401..8379e72d77ab 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c
@@ -31,7 +31,7 @@ tu102_vmm_flush(struct nvkm_vmm *vmm, int depth)
 
 	type |= 0x00000001; /* PAGE_ALL */
 	if (atomic_read(&vmm->engref[NVKM_SUBDEV_BAR]))
-		type |= 0x00000004; /* HUB_ONLY */
+		type |= 0x00000006; /* HUB_ONLY | ALL PDB (hack) */
 
 	mutex_lock(&vmm->mmu->mutex);
 
diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig
index 99e14dc212ec..8f3783742208 100644
--- a/drivers/gpu/drm/panel/Kconfig
+++ b/drivers/gpu/drm/panel/Kconfig
@@ -194,6 +194,15 @@ config DRM_PANEL_ILITEK_ILI9341
 	  QVGA (240x320) RGB panels. support serial & parallel rgb
 	  interface.
 
+config DRM_PANEL_ILITEK_ILI9805
+	tristate "Ilitek ILI9805-based panels"
+	depends on OF
+	depends on DRM_MIPI_DSI
+	depends on BACKLIGHT_CLASS_DEVICE
+	help
+	  Say Y if you want to enable support for panels based on the
+	  Ilitek ILI9805 controller.
+
 config DRM_PANEL_ILITEK_ILI9881C
 	tristate "Ilitek ILI9881C-based panels"
 	depends on OF
@@ -530,6 +539,8 @@ config DRM_PANEL_RAYDIUM_RM692E5
 	depends on OF
 	depends on DRM_MIPI_DSI
 	depends on BACKLIGHT_CLASS_DEVICE
+	select DRM_DISPLAY_DP_HELPER
+	select DRM_DISPLAY_HELPER
 	help
 	  Say Y here if you want to enable support for Raydium RM692E5-based
 	  display panels, such as the one found in the Fairphone 5 smartphone.
@@ -735,6 +746,15 @@ config DRM_PANEL_SITRONIX_ST7789V
 	  Say Y here if you want to enable support for the Sitronix
 	  ST7789V controller for 240x320 LCD panels
 
+config DRM_PANEL_SYNAPTICS_R63353
+	tristate "Synaptics R63353-based panels"
+	depends on OF
+	depends on DRM_MIPI_DSI
+	depends on BACKLIGHT_CLASS_DEVICE
+	help
+	  Say Y if you want to enable support for panels based on the
+	  Synaptics R63353 controller.
+
 config DRM_PANEL_SONY_ACX565AKM
 	tristate "Sony ACX565AKM panel"
 	depends on GPIOLIB && OF && SPI
diff --git a/drivers/gpu/drm/panel/Makefile b/drivers/gpu/drm/panel/Makefile
index d10c3de51c6d..d94a644d0a6c 100644
--- a/drivers/gpu/drm/panel/Makefile
+++ b/drivers/gpu/drm/panel/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D) += panel-feiyang-fy07024di26a30d
 obj-$(CONFIG_DRM_PANEL_HIMAX_HX8394) += panel-himax-hx8394.o
 obj-$(CONFIG_DRM_PANEL_ILITEK_IL9322) += panel-ilitek-ili9322.o
 obj-$(CONFIG_DRM_PANEL_ILITEK_ILI9341) += panel-ilitek-ili9341.o
+obj-$(CONFIG_DRM_PANEL_ILITEK_ILI9805) += panel-ilitek-ili9805.o
 obj-$(CONFIG_DRM_PANEL_ILITEK_ILI9881C) += panel-ilitek-ili9881c.o
 obj-$(CONFIG_DRM_PANEL_ILITEK_ILI9882T) += panel-ilitek-ili9882t.o
 obj-$(CONFIG_DRM_PANEL_INNOLUX_EJ030NA) += panel-innolux-ej030na.o
@@ -74,6 +75,7 @@ obj-$(CONFIG_DRM_PANEL_SHARP_LS060T1SX01) += panel-sharp-ls060t1sx01.o
 obj-$(CONFIG_DRM_PANEL_SITRONIX_ST7701) += panel-sitronix-st7701.o
 obj-$(CONFIG_DRM_PANEL_SITRONIX_ST7703) += panel-sitronix-st7703.o
 obj-$(CONFIG_DRM_PANEL_SITRONIX_ST7789V) += panel-sitronix-st7789v.o
+obj-$(CONFIG_DRM_PANEL_SYNAPTICS_R63353) += panel-synaptics-r63353.o
 obj-$(CONFIG_DRM_PANEL_SONY_ACX565AKM) += panel-sony-acx565akm.o
 obj-$(CONFIG_DRM_PANEL_SONY_TD4353_JDI) += panel-sony-td4353-jdi.o
 obj-$(CONFIG_DRM_PANEL_SONY_TULIP_TRULY_NT35521) += panel-sony-tulip-truly-nt35521.o
diff --git a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c
index be8f48e3c1db..c4c0f08e9202 100644
--- a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c
+++ b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c
@@ -1764,6 +1764,7 @@ static const struct panel_desc starry_qfh032011_53g_desc = {
 	.mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_SYNC_PULSE |
 		      MIPI_DSI_MODE_LPM,
 	.init_cmds = starry_qfh032011_53g_init_cmd,
+	.lp11_before_reset = true,
 };
 
 static const struct drm_display_mode starry_himax83102_j02_default_mode = {
diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9805.c b/drivers/gpu/drm/panel/panel-ilitek-ili9805.c
new file mode 100644
index 000000000000..1cbc25758bd2
--- /dev/null
+++ b/drivers/gpu/drm/panel/panel-ilitek-ili9805.c
@@ -0,0 +1,405 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 BSH Hausgerate GmbH
+ */
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+
+#include <linux/gpio/consumer.h>
+#include <linux/regulator/consumer.h>
+
+#include <drm/drm_mipi_dsi.h>
+#include <drm/drm_modes.h>
+#include <drm/drm_panel.h>
+
+#include <video/mipi_display.h>
+
+#define ILI9805_EXTCMD_CMD_SET_ENABLE_REG	(0xff)
+#define ILI9805_SETEXTC_PARAMETER1		(0xff)
+#define ILI9805_SETEXTC_PARAMETER2		(0x98)
+#define ILI9805_SETEXTC_PARAMETER3		(0x05)
+
+#define ILI9805_INSTR(_delay, ...) { \
+		.delay = (_delay), \
+		.len = sizeof((u8[]) {__VA_ARGS__}), \
+		.data = (u8[]){__VA_ARGS__} \
+	}
+
+struct ili9805_instr {
+	size_t len;
+	const u8 *data;
+	u32 delay;
+};
+
+struct ili9805_desc {
+	const char *name;
+	const struct ili9805_instr *init;
+	const size_t init_length;
+	const struct drm_display_mode *mode;
+	u32 width_mm;
+	u32 height_mm;
+};
+
+struct ili9805 {
+	struct drm_panel	panel;
+	struct mipi_dsi_device	*dsi;
+	const struct ili9805_desc	*desc;
+
+	struct regulator	*dvdd;
+	struct regulator	*avdd;
+	struct gpio_desc	*reset_gpio;
+};
+
+static const struct ili9805_instr gpm1780a0_init[] = {
+	ILI9805_INSTR(100, ILI9805_EXTCMD_CMD_SET_ENABLE_REG, ILI9805_SETEXTC_PARAMETER1,
+		      ILI9805_SETEXTC_PARAMETER2, ILI9805_SETEXTC_PARAMETER3),
+	ILI9805_INSTR(100, 0xFD, 0x0F, 0x10, 0x44, 0x00),
+	ILI9805_INSTR(0, 0xf8, 0x18, 0x02, 0x02, 0x18, 0x02, 0x02, 0x30, 0x00,
+		      0x00, 0x30, 0x00, 0x00, 0x30, 0x00, 0x00),
+	ILI9805_INSTR(0, 0xB8, 0x62),
+	ILI9805_INSTR(0, 0xF1, 0x00),
+	ILI9805_INSTR(0, 0xF2, 0x00, 0x58, 0x40),
+	ILI9805_INSTR(0, 0xF3, 0x60, 0x83, 0x04),
+	ILI9805_INSTR(0, 0xFC, 0x04, 0x0F, 0x01),
+	ILI9805_INSTR(0, 0xEB, 0x08, 0x0F),
+	ILI9805_INSTR(0, 0xe0, 0x00, 0x08, 0x0d, 0x0e, 0x0e, 0x0d, 0x0a, 0x08, 0x04,
+		      0x08, 0x0d, 0x0f, 0x0b, 0x1c, 0x14, 0x0a),
+	ILI9805_INSTR(0, 0xe1, 0x00, 0x08, 0x0d, 0x0e, 0x0e, 0x0d, 0x0a, 0x08, 0x04,
+		      0x08, 0x0d, 0x0f, 0x0b, 0x1c, 0x14, 0x0a),
+	ILI9805_INSTR(10, 0xc1, 0x13, 0x39, 0x19, 0x06),
+	ILI9805_INSTR(10, 0xc7, 0xe5),
+	ILI9805_INSTR(10, 0xB1, 0x00, 0x12, 0x14),
+	ILI9805_INSTR(10, 0xB4, 0x02),
+	ILI9805_INSTR(0, 0xBB, 0x14, 0x55),
+	ILI9805_INSTR(0, MIPI_DCS_SET_ADDRESS_MODE, 0x08),
+	ILI9805_INSTR(0, MIPI_DCS_SET_PIXEL_FORMAT, 0x77),
+	ILI9805_INSTR(0, 0x20),
+	ILI9805_INSTR(0, 0xB0, 0x01),
+	ILI9805_INSTR(0, 0xB6, 0x31, 0x00, 0xef),
+	ILI9805_INSTR(0, 0xDF, 0x23),
+	ILI9805_INSTR(0, 0xB9, 0x02, 0x00),
+};
+
+static const struct ili9805_instr tm041xdhg01_init[] = {
+	ILI9805_INSTR(100, ILI9805_EXTCMD_CMD_SET_ENABLE_REG, ILI9805_SETEXTC_PARAMETER1,
+		      ILI9805_SETEXTC_PARAMETER2, ILI9805_SETEXTC_PARAMETER3),
+	ILI9805_INSTR(100, 0xFD, 0x0F, 0x13, 0x44, 0x00),
+	ILI9805_INSTR(0, 0xf8, 0x18, 0x02, 0x02, 0x18, 0x02, 0x02, 0x30, 0x01,
+		      0x01, 0x30, 0x01, 0x01, 0x30, 0x01, 0x01),
+	ILI9805_INSTR(0, 0xB8, 0x74),
+	ILI9805_INSTR(0, 0xF1, 0x00),
+	ILI9805_INSTR(0, 0xF2, 0x00, 0x58, 0x40),
+	ILI9805_INSTR(0, 0xFC, 0x04, 0x0F, 0x01),
+	ILI9805_INSTR(0, 0xEB, 0x08, 0x0F),
+	ILI9805_INSTR(0, 0xe0, 0x01, 0x0d, 0x15, 0x0e, 0x0f, 0x0f, 0x0b, 0x08, 0x04,
+		      0x07, 0x0a, 0x0d, 0x0c, 0x15, 0x0f, 0x08),
+	ILI9805_INSTR(0, 0xe1, 0x01, 0x0d, 0x15, 0x0e, 0x0f, 0x0f, 0x0b, 0x08, 0x04,
+		      0x07, 0x0a, 0x0d, 0x0c, 0x15, 0x0f, 0x08),
+	ILI9805_INSTR(10, 0xc1, 0x15, 0x03, 0x03, 0x31),
+	ILI9805_INSTR(10, 0xB1, 0x00, 0x12, 0x14),
+	ILI9805_INSTR(10, 0xB4, 0x02),
+	ILI9805_INSTR(0, 0xBB, 0x14, 0x55),
+	ILI9805_INSTR(0, MIPI_DCS_SET_ADDRESS_MODE, 0x0a),
+	ILI9805_INSTR(0, MIPI_DCS_SET_PIXEL_FORMAT, 0x77),
+	ILI9805_INSTR(0, 0x20),
+	ILI9805_INSTR(0, 0xB0, 0x00),
+	ILI9805_INSTR(0, 0xB6, 0x01),
+	ILI9805_INSTR(0, 0xc2, 0x11),
+	ILI9805_INSTR(0, 0x51, 0xFF),
+	ILI9805_INSTR(0, 0x53, 0x24),
+	ILI9805_INSTR(0, 0x55, 0x00),
+};
+
+static inline struct ili9805 *panel_to_ili9805(struct drm_panel *panel)
+{
+	return container_of(panel, struct ili9805, panel);
+}
+
+static int ili9805_power_on(struct ili9805 *ctx)
+{
+	struct mipi_dsi_device *dsi = ctx->dsi;
+	struct device *dev = &dsi->dev;
+	int ret;
+
+	ret = regulator_enable(ctx->avdd);
+	if (ret) {
+		dev_err(dev, "Failed to enable avdd regulator (%d)\n", ret);
+		return ret;
+	}
+
+	ret = regulator_enable(ctx->dvdd);
+	if (ret) {
+		dev_err(dev, "Failed to enable dvdd regulator (%d)\n", ret);
+		regulator_disable(ctx->avdd);
+		return ret;
+	}
+
+	gpiod_set_value(ctx->reset_gpio, 0);
+	usleep_range(5000, 10000);
+	gpiod_set_value(ctx->reset_gpio, 1);
+	msleep(120);
+
+	return 0;
+}
+
+static int ili9805_power_off(struct ili9805 *ctx)
+{
+	gpiod_set_value(ctx->reset_gpio, 0);
+	regulator_disable(ctx->dvdd);
+	regulator_disable(ctx->avdd);
+
+	return 0;
+}
+
+static int ili9805_activate(struct ili9805 *ctx)
+{
+	struct mipi_dsi_device *dsi = ctx->dsi;
+	struct device *dev = &dsi->dev;
+	int i, ret;
+
+	for (i = 0; i < ctx->desc->init_length; i++) {
+		const struct ili9805_instr *instr = &ctx->desc->init[i];
+
+		ret = mipi_dsi_dcs_write_buffer(ctx->dsi, instr->data, instr->len);
+		if (ret < 0)
+			return ret;
+
+		if (instr->delay > 0)
+			msleep(instr->delay);
+	}
+
+	ret = mipi_dsi_dcs_exit_sleep_mode(ctx->dsi);
+	if (ret) {
+		dev_err(dev, "Failed to exit sleep mode (%d)\n", ret);
+		return ret;
+	}
+
+	usleep_range(5000, 6000);
+
+	ret = mipi_dsi_dcs_set_display_on(ctx->dsi);
+	if (ret) {
+		dev_err(dev, "Failed to set display ON (%d)\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int ili9805_prepare(struct drm_panel *panel)
+{
+	struct ili9805 *ctx = panel_to_ili9805(panel);
+	int ret;
+
+	ret = ili9805_power_on(ctx);
+	if (ret)
+		return ret;
+
+	ret = ili9805_activate(ctx);
+	if (ret) {
+		ili9805_power_off(ctx);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int ili9805_deactivate(struct ili9805 *ctx)
+{
+	struct mipi_dsi_device *dsi = ctx->dsi;
+	struct device *dev = &dsi->dev;
+	int ret;
+
+	ret = mipi_dsi_dcs_set_display_off(ctx->dsi);
+	if (ret < 0) {
+		dev_err(dev, "Failed to set display OFF (%d)\n", ret);
+		return ret;
+	}
+
+	usleep_range(5000, 10000);
+
+	ret = mipi_dsi_dcs_enter_sleep_mode(ctx->dsi);
+	if (ret < 0) {
+		dev_err(dev, "Failed to enter sleep mode (%d)\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int ili9805_unprepare(struct drm_panel *panel)
+{
+	struct ili9805 *ctx = panel_to_ili9805(panel);
+
+	ili9805_deactivate(ctx);
+	ili9805_power_off(ctx);
+
+	return 0;
+}
+
+static const struct drm_display_mode gpm1780a0_timing = {
+	.clock = 26227,
+
+	.hdisplay = 480,
+	.hsync_start = 480 + 10,
+	.hsync_end = 480 + 10 + 2,
+	.htotal = 480 + 10 + 2 + 36,
+
+	.vdisplay = 480,
+	.vsync_start = 480 + 2,
+	.vsync_end = 480 + 10 + 4,
+	.vtotal = 480 + 2 + 4 + 10,
+};
+
+static const struct drm_display_mode tm041xdhg01_timing = {
+	.clock = 26227,
+
+	.hdisplay = 480,
+	.hsync_start = 480 + 10,
+	.hsync_end = 480 + 10 + 2,
+	.htotal = 480 + 10 + 2 + 36,
+
+	.vdisplay = 768,
+	.vsync_start = 768 + 2,
+	.vsync_end = 768 + 10 + 4,
+	.vtotal = 768 + 2 + 4 + 10,
+};
+
+static int ili9805_get_modes(struct drm_panel *panel,
+			      struct drm_connector *connector)
+{
+	struct ili9805 *ctx = panel_to_ili9805(panel);
+	struct drm_display_mode *mode;
+
+	mode = drm_mode_duplicate(connector->dev, ctx->desc->mode);
+	if (!mode) {
+		dev_err(&ctx->dsi->dev, "failed to add mode %ux%ux@%u\n",
+			ctx->desc->mode->hdisplay,
+			ctx->desc->mode->vdisplay,
+			drm_mode_vrefresh(ctx->desc->mode));
+		return -ENOMEM;
+	}
+
+	drm_mode_set_name(mode);
+
+	mode->type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED;
+	drm_mode_probed_add(connector, mode);
+
+	connector->display_info.width_mm = mode->width_mm;
+	connector->display_info.height_mm = mode->height_mm;
+
+	return 1;
+}
+
+static const struct drm_panel_funcs ili9805_funcs = {
+	.prepare	= ili9805_prepare,
+	.unprepare	= ili9805_unprepare,
+	.get_modes	= ili9805_get_modes,
+};
+
+static int ili9805_dsi_probe(struct mipi_dsi_device *dsi)
+{
+	struct ili9805 *ctx;
+	int ret;
+
+	ctx = devm_kzalloc(&dsi->dev, sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+	mipi_dsi_set_drvdata(dsi, ctx);
+	ctx->dsi = dsi;
+	ctx->desc = of_device_get_match_data(&dsi->dev);
+
+	dsi->format = MIPI_DSI_FMT_RGB888;
+	dsi->mode_flags = MIPI_DSI_MODE_VIDEO_HSE | MIPI_DSI_MODE_VIDEO |
+		MIPI_DSI_CLOCK_NON_CONTINUOUS | MIPI_DSI_MODE_LPM |
+		MIPI_DSI_MODE_VIDEO_SYNC_PULSE | MIPI_DSI_MODE_NO_EOT_PACKET;
+	dsi->lanes = 2;
+
+	drm_panel_init(&ctx->panel, &dsi->dev, &ili9805_funcs,
+		       DRM_MODE_CONNECTOR_DSI);
+
+	ctx->dvdd = devm_regulator_get(&dsi->dev, "dvdd");
+	if (IS_ERR(ctx->dvdd))
+		return PTR_ERR(ctx->dvdd);
+	ctx->avdd = devm_regulator_get(&dsi->dev, "avdd");
+	if (IS_ERR(ctx->avdd))
+		return PTR_ERR(ctx->avdd);
+
+	ctx->reset_gpio = devm_gpiod_get(&dsi->dev, "reset", GPIOD_OUT_LOW);
+	if (IS_ERR(ctx->reset_gpio)) {
+		dev_err(&dsi->dev, "Couldn't get our reset GPIO\n");
+		return PTR_ERR(ctx->reset_gpio);
+	}
+
+	ctx->panel.prepare_prev_first = true;
+	ret = drm_panel_of_backlight(&ctx->panel);
+	if (ret)
+		return ret;
+
+	drm_panel_add(&ctx->panel);
+
+	ret = mipi_dsi_attach(dsi);
+	if (ret < 0) {
+		dev_err(&dsi->dev, "mipi_dsi_attach failed: %d\n", ret);
+		drm_panel_remove(&ctx->panel);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void ili9805_dsi_remove(struct mipi_dsi_device *dsi)
+{
+	struct ili9805 *ctx = mipi_dsi_get_drvdata(dsi);
+	int ret;
+
+	ret = mipi_dsi_detach(dsi);
+	if (ret < 0)
+		dev_err(&dsi->dev, "failed to detach from DSI host: %d\n",
+			ret);
+
+	drm_panel_remove(&ctx->panel);
+}
+
+static const struct ili9805_desc gpm1780a0_desc = {
+	.init = gpm1780a0_init,
+	.init_length = ARRAY_SIZE(gpm1780a0_init),
+	.mode = &gpm1780a0_timing,
+	.width_mm = 65,
+	.height_mm = 65,
+};
+
+static const struct ili9805_desc tm041xdhg01_desc = {
+	.init = tm041xdhg01_init,
+	.init_length = ARRAY_SIZE(tm041xdhg01_init),
+	.mode = &tm041xdhg01_timing,
+	.width_mm = 42,
+	.height_mm = 96,
+};
+
+static const struct of_device_id ili9805_of_match[] = {
+	{ .compatible = "giantplus,gpm1790a0", .data = &gpm1780a0_desc },
+	{ .compatible = "tianma,tm041xdhg01", .data = &tm041xdhg01_desc },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, ili9805_of_match);
+
+static struct mipi_dsi_driver ili9805_dsi_driver = {
+	.probe		= ili9805_dsi_probe,
+	.remove		= ili9805_dsi_remove,
+	.driver = {
+		.name		= "ili9805-dsi",
+		.of_match_table	= ili9805_of_match,
+	},
+};
+module_mipi_dsi_driver(ili9805_dsi_driver);
+
+MODULE_AUTHOR("Matthias Proske <Matthias.Proske@bshg.com>");
+MODULE_AUTHOR("Michael Trimarchi <michael@amarulasolutions.com>");
+MODULE_DESCRIPTION("Ilitek ILI9805 Controller Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/panel/panel-leadtek-ltk050h3146w.c b/drivers/gpu/drm/panel/panel-leadtek-ltk050h3146w.c
index 6e3670508e3a..30919c872ac8 100644
--- a/drivers/gpu/drm/panel/panel-leadtek-ltk050h3146w.c
+++ b/drivers/gpu/drm/panel/panel-leadtek-ltk050h3146w.c
@@ -326,7 +326,7 @@ static const struct drm_display_mode ltk050h3148w_mode = {
 static const struct ltk050h3146w_desc ltk050h3148w_data = {
 	.mode = &ltk050h3148w_mode,
 	.init = ltk050h3148w_init_sequence,
-	.mode_flags = MIPI_DSI_MODE_VIDEO_SYNC_PULSE,
+	.mode_flags = MIPI_DSI_MODE_VIDEO_SYNC_PULSE | MIPI_DSI_MODE_VIDEO_BURST,
 };
 
 static int ltk050h3146w_init_sequence(struct ltk050h3146w *ctx)
diff --git a/drivers/gpu/drm/panel/panel-newvision-nv3052c.c b/drivers/gpu/drm/panel/panel-newvision-nv3052c.c
index 71e57de6d8b2..1aab0c9ae52f 100644
--- a/drivers/gpu/drm/panel/panel-newvision-nv3052c.c
+++ b/drivers/gpu/drm/panel/panel-newvision-nv3052c.c
@@ -20,11 +20,18 @@
 #include <drm/drm_modes.h>
 #include <drm/drm_panel.h>
 
+struct nv3052c_reg {
+	u8 cmd;
+	u8 val;
+};
+
 struct nv3052c_panel_info {
 	const struct drm_display_mode *display_modes;
 	unsigned int num_modes;
 	u16 width_mm, height_mm;
 	u32 bus_format, bus_flags;
+	const struct nv3052c_reg *panel_regs;
+	unsigned int panel_regs_len;
 };
 
 struct nv3052c {
@@ -36,15 +43,10 @@ struct nv3052c {
 	struct gpio_desc *reset_gpio;
 };
 
-struct nv3052c_reg {
-	u8 cmd;
-	u8 val;
-};
-
-static const struct nv3052c_reg nv3052c_panel_regs[] = {
-	{ 0xff, 0x30 },
-	{ 0xff, 0x52 },
-	{ 0xff, 0x01 },
+static const struct nv3052c_reg ltk035c5444t_panel_regs[] = {
+	// EXTC Command set enable, select page 1
+	{ 0xff, 0x30 }, { 0xff, 0x52 }, { 0xff, 0x01 },
+	// Mostly unknown registers
 	{ 0xe3, 0x00 },
 	{ 0x40, 0x00 },
 	{ 0x03, 0x40 },
@@ -62,15 +64,15 @@ static const struct nv3052c_reg nv3052c_panel_regs[] = {
 	{ 0x25, 0x06 },
 	{ 0x26, 0x14 },
 	{ 0x27, 0x14 },
-	{ 0x38, 0xcc },
-	{ 0x39, 0xd7 },
-	{ 0x3a, 0x4a },
+	{ 0x38, 0xcc }, // VCOM_ADJ1
+	{ 0x39, 0xd7 }, // VCOM_ADJ2
+	{ 0x3a, 0x4a }, // VCOM_ADJ3
 	{ 0x28, 0x40 },
 	{ 0x29, 0x01 },
 	{ 0x2a, 0xdf },
 	{ 0x49, 0x3c },
-	{ 0x91, 0x77 },
-	{ 0x92, 0x77 },
+	{ 0x91, 0x77 }, // EXTPW_CTRL2
+	{ 0x92, 0x77 }, // EXTPW_CTRL3
 	{ 0xa0, 0x55 },
 	{ 0xa1, 0x50 },
 	{ 0xa4, 0x9c },
@@ -94,123 +96,321 @@ static const struct nv3052c_reg nv3052c_panel_regs[] = {
 	{ 0xb8, 0x26 },
 	{ 0xf0, 0x00 },
 	{ 0xf6, 0xc0 },
-	{ 0xff, 0x30 },
-	{ 0xff, 0x52 },
-	{ 0xff, 0x02 },
-	{ 0xb0, 0x0b },
-	{ 0xb1, 0x16 },
-	{ 0xb2, 0x17 },
-	{ 0xb3, 0x2c },
-	{ 0xb4, 0x32 },
-	{ 0xb5, 0x3b },
-	{ 0xb6, 0x29 },
-	{ 0xb7, 0x40 },
-	{ 0xb8, 0x0d },
-	{ 0xb9, 0x05 },
-	{ 0xba, 0x12 },
-	{ 0xbb, 0x10 },
-	{ 0xbc, 0x12 },
-	{ 0xbd, 0x15 },
-	{ 0xbe, 0x19 },
-	{ 0xbf, 0x0e },
-	{ 0xc0, 0x16 },
-	{ 0xc1, 0x0a },
-	{ 0xd0, 0x0c },
-	{ 0xd1, 0x17 },
-	{ 0xd2, 0x14 },
-	{ 0xd3, 0x2e },
-	{ 0xd4, 0x32 },
-	{ 0xd5, 0x3c },
-	{ 0xd6, 0x22 },
-	{ 0xd7, 0x3d },
-	{ 0xd8, 0x0d },
-	{ 0xd9, 0x07 },
-	{ 0xda, 0x13 },
-	{ 0xdb, 0x13 },
-	{ 0xdc, 0x11 },
-	{ 0xdd, 0x15 },
-	{ 0xde, 0x19 },
-	{ 0xdf, 0x10 },
-	{ 0xe0, 0x17 },
-	{ 0xe1, 0x0a },
-	{ 0xff, 0x30 },
-	{ 0xff, 0x52 },
-	{ 0xff, 0x03 },
-	{ 0x00, 0x2a },
-	{ 0x01, 0x2a },
-	{ 0x02, 0x2a },
-	{ 0x03, 0x2a },
-	{ 0x04, 0x61 },
-	{ 0x05, 0x80 },
-	{ 0x06, 0xc7 },
-	{ 0x07, 0x01 },
-	{ 0x08, 0x03 },
-	{ 0x09, 0x04 },
-	{ 0x70, 0x22 },
-	{ 0x71, 0x80 },
-	{ 0x30, 0x2a },
-	{ 0x31, 0x2a },
-	{ 0x32, 0x2a },
-	{ 0x33, 0x2a },
-	{ 0x34, 0x61 },
-	{ 0x35, 0xc5 },
-	{ 0x36, 0x80 },
-	{ 0x37, 0x23 },
-	{ 0x40, 0x03 },
-	{ 0x41, 0x04 },
-	{ 0x42, 0x05 },
-	{ 0x43, 0x06 },
-	{ 0x44, 0x11 },
-	{ 0x45, 0xe8 },
-	{ 0x46, 0xe9 },
-	{ 0x47, 0x11 },
-	{ 0x48, 0xea },
-	{ 0x49, 0xeb },
-	{ 0x50, 0x07 },
-	{ 0x51, 0x08 },
-	{ 0x52, 0x09 },
-	{ 0x53, 0x0a },
-	{ 0x54, 0x11 },
-	{ 0x55, 0xec },
-	{ 0x56, 0xed },
-	{ 0x57, 0x11 },
-	{ 0x58, 0xef },
-	{ 0x59, 0xf0 },
-	{ 0xb1, 0x01 },
-	{ 0xb4, 0x15 },
-	{ 0xb5, 0x16 },
-	{ 0xb6, 0x09 },
-	{ 0xb7, 0x0f },
-	{ 0xb8, 0x0d },
-	{ 0xb9, 0x0b },
-	{ 0xba, 0x00 },
-	{ 0xc7, 0x02 },
-	{ 0xca, 0x17 },
-	{ 0xcb, 0x18 },
-	{ 0xcc, 0x0a },
-	{ 0xcd, 0x10 },
-	{ 0xce, 0x0e },
-	{ 0xcf, 0x0c },
-	{ 0xd0, 0x00 },
-	{ 0x81, 0x00 },
-	{ 0x84, 0x15 },
-	{ 0x85, 0x16 },
-	{ 0x86, 0x10 },
-	{ 0x87, 0x0a },
-	{ 0x88, 0x0c },
-	{ 0x89, 0x0e },
-	{ 0x8a, 0x02 },
-	{ 0x97, 0x00 },
-	{ 0x9a, 0x17 },
-	{ 0x9b, 0x18 },
-	{ 0x9c, 0x0f },
-	{ 0x9d, 0x09 },
-	{ 0x9e, 0x0b },
-	{ 0x9f, 0x0d },
-	{ 0xa0, 0x01 },
-	{ 0xff, 0x30 },
-	{ 0xff, 0x52 },
-	{ 0xff, 0x02 },
+	// EXTC Command set enable, select page 2
+	{ 0xff, 0x30 }, { 0xff, 0x52 }, { 0xff, 0x02 },
+	// Set gray scale voltage to adjust gamma
+	{ 0xb0, 0x0b }, // PGAMVR0
+	{ 0xb1, 0x16 }, // PGAMVR1
+	{ 0xb2, 0x17 }, // PGAMVR2
+	{ 0xb3, 0x2c }, // PGAMVR3
+	{ 0xb4, 0x32 }, // PGAMVR4
+	{ 0xb5, 0x3b }, // PGAMVR5
+	{ 0xb6, 0x29 }, // PGAMPR0
+	{ 0xb7, 0x40 }, // PGAMPR1
+	{ 0xb8, 0x0d }, // PGAMPK0
+	{ 0xb9, 0x05 }, // PGAMPK1
+	{ 0xba, 0x12 }, // PGAMPK2
+	{ 0xbb, 0x10 }, // PGAMPK3
+	{ 0xbc, 0x12 }, // PGAMPK4
+	{ 0xbd, 0x15 }, // PGAMPK5
+	{ 0xbe, 0x19 }, // PGAMPK6
+	{ 0xbf, 0x0e }, // PGAMPK7
+	{ 0xc0, 0x16 }, // PGAMPK8
+	{ 0xc1, 0x0a }, // PGAMPK9
+	// Set gray scale voltage to adjust gamma
+	{ 0xd0, 0x0c }, // NGAMVR0
+	{ 0xd1, 0x17 }, // NGAMVR0
+	{ 0xd2, 0x14 }, // NGAMVR1
+	{ 0xd3, 0x2e }, // NGAMVR2
+	{ 0xd4, 0x32 }, // NGAMVR3
+	{ 0xd5, 0x3c }, // NGAMVR4
+	{ 0xd6, 0x22 }, // NGAMPR0
+	{ 0xd7, 0x3d }, // NGAMPR1
+	{ 0xd8, 0x0d }, // NGAMPK0
+	{ 0xd9, 0x07 }, // NGAMPK1
+	{ 0xda, 0x13 }, // NGAMPK2
+	{ 0xdb, 0x13 }, // NGAMPK3
+	{ 0xdc, 0x11 }, // NGAMPK4
+	{ 0xdd, 0x15 }, // NGAMPK5
+	{ 0xde, 0x19 }, // NGAMPK6
+	{ 0xdf, 0x10 }, // NGAMPK7
+	{ 0xe0, 0x17 }, // NGAMPK8
+	{ 0xe1, 0x0a }, // NGAMPK9
+	// EXTC Command set enable, select page 3
+	{ 0xff, 0x30 }, { 0xff, 0x52 }, { 0xff, 0x03 },
+	// Set various timing settings
+	{ 0x00, 0x2a }, // GIP_VST_1
+	{ 0x01, 0x2a }, // GIP_VST_2
+	{ 0x02, 0x2a }, // GIP_VST_3
+	{ 0x03, 0x2a }, // GIP_VST_4
+	{ 0x04, 0x61 }, // GIP_VST_5
+	{ 0x05, 0x80 }, // GIP_VST_6
+	{ 0x06, 0xc7 }, // GIP_VST_7
+	{ 0x07, 0x01 }, // GIP_VST_8
+	{ 0x08, 0x03 }, // GIP_VST_9
+	{ 0x09, 0x04 }, // GIP_VST_10
+	{ 0x70, 0x22 }, // GIP_ECLK1
+	{ 0x71, 0x80 }, // GIP_ECLK2
+	{ 0x30, 0x2a }, // GIP_CLK_1
+	{ 0x31, 0x2a }, // GIP_CLK_2
+	{ 0x32, 0x2a }, // GIP_CLK_3
+	{ 0x33, 0x2a }, // GIP_CLK_4
+	{ 0x34, 0x61 }, // GIP_CLK_5
+	{ 0x35, 0xc5 }, // GIP_CLK_6
+	{ 0x36, 0x80 }, // GIP_CLK_7
+	{ 0x37, 0x23 }, // GIP_CLK_8
+	{ 0x40, 0x03 }, // GIP_CLKA_1
+	{ 0x41, 0x04 }, // GIP_CLKA_2
+	{ 0x42, 0x05 }, // GIP_CLKA_3
+	{ 0x43, 0x06 }, // GIP_CLKA_4
+	{ 0x44, 0x11 }, // GIP_CLKA_5
+	{ 0x45, 0xe8 }, // GIP_CLKA_6
+	{ 0x46, 0xe9 }, // GIP_CLKA_7
+	{ 0x47, 0x11 }, // GIP_CLKA_8
+	{ 0x48, 0xea }, // GIP_CLKA_9
+	{ 0x49, 0xeb }, // GIP_CLKA_10
+	{ 0x50, 0x07 }, // GIP_CLKB_1
+	{ 0x51, 0x08 }, // GIP_CLKB_2
+	{ 0x52, 0x09 }, // GIP_CLKB_3
+	{ 0x53, 0x0a }, // GIP_CLKB_4
+	{ 0x54, 0x11 }, // GIP_CLKB_5
+	{ 0x55, 0xec }, // GIP_CLKB_6
+	{ 0x56, 0xed }, // GIP_CLKB_7
+	{ 0x57, 0x11 }, // GIP_CLKB_8
+	{ 0x58, 0xef }, // GIP_CLKB_9
+	{ 0x59, 0xf0 }, // GIP_CLKB_10
+	// Map internal GOA signals to GOA output pad
+	{ 0xb1, 0x01 }, // PANELD2U2
+	{ 0xb4, 0x15 }, // PANELD2U5
+	{ 0xb5, 0x16 }, // PANELD2U6
+	{ 0xb6, 0x09 }, // PANELD2U7
+	{ 0xb7, 0x0f }, // PANELD2U8
+	{ 0xb8, 0x0d }, // PANELD2U9
+	{ 0xb9, 0x0b }, // PANELD2U10
+	{ 0xba, 0x00 }, // PANELD2U11
+	{ 0xc7, 0x02 }, // PANELD2U24
+	{ 0xca, 0x17 }, // PANELD2U27
+	{ 0xcb, 0x18 }, // PANELD2U28
+	{ 0xcc, 0x0a }, // PANELD2U29
+	{ 0xcd, 0x10 }, // PANELD2U30
+	{ 0xce, 0x0e }, // PANELD2U31
+	{ 0xcf, 0x0c }, // PANELD2U32
+	{ 0xd0, 0x00 }, // PANELD2U33
+	// Map internal GOA signals to GOA output pad
+	{ 0x81, 0x00 }, // PANELU2D2
+	{ 0x84, 0x15 }, // PANELU2D5
+	{ 0x85, 0x16 }, // PANELU2D6
+	{ 0x86, 0x10 }, // PANELU2D7
+	{ 0x87, 0x0a }, // PANELU2D8
+	{ 0x88, 0x0c }, // PANELU2D9
+	{ 0x89, 0x0e }, // PANELU2D10
+	{ 0x8a, 0x02 }, // PANELU2D11
+	{ 0x97, 0x00 }, // PANELU2D24
+	{ 0x9a, 0x17 }, // PANELU2D27
+	{ 0x9b, 0x18 }, // PANELU2D28
+	{ 0x9c, 0x0f }, // PANELU2D29
+	{ 0x9d, 0x09 }, // PANELU2D30
+	{ 0x9e, 0x0b }, // PANELU2D31
+	{ 0x9f, 0x0d }, // PANELU2D32
+	{ 0xa0, 0x01 }, // PANELU2D33
+	// EXTC Command set enable, select page 2
+	{ 0xff, 0x30 }, { 0xff, 0x52 }, { 0xff, 0x02 },
+	// Unknown registers
+	{ 0x01, 0x01 },
+	{ 0x02, 0xda },
+	{ 0x03, 0xba },
+	{ 0x04, 0xa8 },
+	{ 0x05, 0x9a },
+	{ 0x06, 0x70 },
+	{ 0x07, 0xff },
+	{ 0x08, 0x91 },
+	{ 0x09, 0x90 },
+	{ 0x0a, 0xff },
+	{ 0x0b, 0x8f },
+	{ 0x0c, 0x60 },
+	{ 0x0d, 0x58 },
+	{ 0x0e, 0x48 },
+	{ 0x0f, 0x38 },
+	{ 0x10, 0x2b },
+	// EXTC Command set enable, select page 0
+	{ 0xff, 0x30 }, { 0xff, 0x52 }, { 0xff, 0x00 },
+	// Display Access Control
+	{ 0x36, 0x0a }, // bgr = 1, ss = 1, gs = 0
+};
+
+static const struct nv3052c_reg fs035vg158_panel_regs[] = {
+	// EXTC Command set enable, select page 1
+	{ 0xff, 0x30 }, { 0xff, 0x52 }, { 0xff, 0x01 },
+	// Mostly unknown registers
+	{ 0xe3, 0x00 },
+	{ 0x40, 0x00 },
+	{ 0x03, 0x40 },
+	{ 0x04, 0x00 },
+	{ 0x05, 0x03 },
+	{ 0x08, 0x00 },
+	{ 0x09, 0x07 },
+	{ 0x0a, 0x01 },
+	{ 0x0b, 0x32 },
+	{ 0x0c, 0x32 },
+	{ 0x0d, 0x0b },
+	{ 0x0e, 0x00 },
+	{ 0x23, 0x20 }, // RGB interface control: DE MODE PCLK-N
+	{ 0x24, 0x0c },
+	{ 0x25, 0x06 },
+	{ 0x26, 0x14 },
+	{ 0x27, 0x14 },
+	{ 0x38, 0x9c }, //VCOM_ADJ1, different to ltk035c5444t
+	{ 0x39, 0xa7 }, //VCOM_ADJ2, different to ltk035c5444t
+	{ 0x3a, 0x50 }, //VCOM_ADJ3, different to ltk035c5444t
+	{ 0x28, 0x40 },
+	{ 0x29, 0x01 },
+	{ 0x2a, 0xdf },
+	{ 0x49, 0x3c },
+	{ 0x91, 0x57 }, //EXTPW_CTRL2, different to ltk035c5444t
+	{ 0x92, 0x57 }, //EXTPW_CTRL3, different to ltk035c5444t
+	{ 0xa0, 0x55 },
+	{ 0xa1, 0x50 },
+	{ 0xa4, 0x9c },
+	{ 0xa7, 0x02 },
+	{ 0xa8, 0x01 },
+	{ 0xa9, 0x01 },
+	{ 0xaa, 0xfc },
+	{ 0xab, 0x28 },
+	{ 0xac, 0x06 },
+	{ 0xad, 0x06 },
+	{ 0xae, 0x06 },
+	{ 0xaf, 0x03 },
+	{ 0xb0, 0x08 },
+	{ 0xb1, 0x26 },
+	{ 0xb2, 0x28 },
+	{ 0xb3, 0x28 },
+	{ 0xb4, 0x03 }, // Unknown, different to ltk035c5444
+	{ 0xb5, 0x08 },
+	{ 0xb6, 0x26 },
+	{ 0xb7, 0x08 },
+	{ 0xb8, 0x26 },
+	{ 0xf0, 0x00 },
+	{ 0xf6, 0xc0 },
+	// EXTC Command set enable, select page 0
+	{ 0xff, 0x30 }, { 0xff, 0x52 }, { 0xff, 0x02 },
+	// Set gray scale voltage to adjust gamma
+	{ 0xb0, 0x0b }, // PGAMVR0
+	{ 0xb1, 0x16 }, // PGAMVR1
+	{ 0xb2, 0x17 }, // PGAMVR2
+	{ 0xb3, 0x2c }, // PGAMVR3
+	{ 0xb4, 0x32 }, // PGAMVR4
+	{ 0xb5, 0x3b }, // PGAMVR5
+	{ 0xb6, 0x29 }, // PGAMPR0
+	{ 0xb7, 0x40 }, // PGAMPR1
+	{ 0xb8, 0x0d }, // PGAMPK0
+	{ 0xb9, 0x05 }, // PGAMPK1
+	{ 0xba, 0x12 }, // PGAMPK2
+	{ 0xbb, 0x10 }, // PGAMPK3
+	{ 0xbc, 0x12 }, // PGAMPK4
+	{ 0xbd, 0x15 }, // PGAMPK5
+	{ 0xbe, 0x19 }, // PGAMPK6
+	{ 0xbf, 0x0e }, // PGAMPK7
+	{ 0xc0, 0x16 }, // PGAMPK8
+	{ 0xc1, 0x0a }, // PGAMPK9
+	// Set gray scale voltage to adjust gamma
+	{ 0xd0, 0x0c }, // NGAMVR0
+	{ 0xd1, 0x17 }, // NGAMVR0
+	{ 0xd2, 0x14 }, // NGAMVR1
+	{ 0xd3, 0x2e }, // NGAMVR2
+	{ 0xd4, 0x32 }, // NGAMVR3
+	{ 0xd5, 0x3c }, // NGAMVR4
+	{ 0xd6, 0x22 }, // NGAMPR0
+	{ 0xd7, 0x3d }, // NGAMPR1
+	{ 0xd8, 0x0d }, // NGAMPK0
+	{ 0xd9, 0x07 }, // NGAMPK1
+	{ 0xda, 0x13 }, // NGAMPK2
+	{ 0xdb, 0x13 }, // NGAMPK3
+	{ 0xdc, 0x11 }, // NGAMPK4
+	{ 0xdd, 0x15 }, // NGAMPK5
+	{ 0xde, 0x19 }, // NGAMPK6
+	{ 0xdf, 0x10 }, // NGAMPK7
+	{ 0xe0, 0x17 }, // NGAMPK8
+	{ 0xe1, 0x0a }, // NGAMPK9
+	// EXTC Command set enable, select page 3
+	{ 0xff, 0x30 }, { 0xff, 0x52 }, { 0xff, 0x03 },
+	// Set various timing settings
+	{ 0x00, 0x2a }, // GIP_VST_1
+	{ 0x01, 0x2a }, // GIP_VST_2
+	{ 0x02, 0x2a }, // GIP_VST_3
+	{ 0x03, 0x2a }, // GIP_VST_4
+	{ 0x04, 0x61 }, // GIP_VST_5
+	{ 0x05, 0x80 }, // GIP_VST_6
+	{ 0x06, 0xc7 }, // GIP_VST_7
+	{ 0x07, 0x01 }, // GIP_VST_8
+	{ 0x08, 0x03 }, // GIP_VST_9
+	{ 0x09, 0x04 }, // GIP_VST_10
+	{ 0x70, 0x22 }, // GIP_ECLK1
+	{ 0x71, 0x80 }, // GIP_ECLK2
+	{ 0x30, 0x2a }, // GIP_CLK_1
+	{ 0x31, 0x2a }, // GIP_CLK_2
+	{ 0x32, 0x2a }, // GIP_CLK_3
+	{ 0x33, 0x2a }, // GIP_CLK_4
+	{ 0x34, 0x61 }, // GIP_CLK_5
+	{ 0x35, 0xc5 }, // GIP_CLK_6
+	{ 0x36, 0x80 }, // GIP_CLK_7
+	{ 0x37, 0x23 }, // GIP_CLK_8
+	{ 0x40, 0x03 }, // GIP_CLKA_1
+	{ 0x41, 0x04 }, // GIP_CLKA_2
+	{ 0x42, 0x05 }, // GIP_CLKA_3
+	{ 0x43, 0x06 }, // GIP_CLKA_4
+	{ 0x44, 0x11 }, // GIP_CLKA_5
+	{ 0x45, 0xe8 }, // GIP_CLKA_6
+	{ 0x46, 0xe9 }, // GIP_CLKA_7
+	{ 0x47, 0x11 }, // GIP_CLKA_8
+	{ 0x48, 0xea }, // GIP_CLKA_9
+	{ 0x49, 0xeb }, // GIP_CLKA_10
+	{ 0x50, 0x07 }, // GIP_CLKB_1
+	{ 0x51, 0x08 }, // GIP_CLKB_2
+	{ 0x52, 0x09 }, // GIP_CLKB_3
+	{ 0x53, 0x0a }, // GIP_CLKB_4
+	{ 0x54, 0x11 }, // GIP_CLKB_5
+	{ 0x55, 0xec }, // GIP_CLKB_6
+	{ 0x56, 0xed }, // GIP_CLKB_7
+	{ 0x57, 0x11 }, // GIP_CLKB_8
+	{ 0x58, 0xef }, // GIP_CLKB_9
+	{ 0x59, 0xf0 }, // GIP_CLKB_10
+	// Map internal GOA signals to GOA output pad
+	{ 0xb1, 0x01 }, // PANELD2U2
+	{ 0xb4, 0x15 }, // PANELD2U5
+	{ 0xb5, 0x16 }, // PANELD2U6
+	{ 0xb6, 0x09 }, // PANELD2U7
+	{ 0xb7, 0x0f }, // PANELD2U8
+	{ 0xb8, 0x0d }, // PANELD2U9
+	{ 0xb9, 0x0b }, // PANELD2U10
+	{ 0xba, 0x00 }, // PANELD2U11
+	{ 0xc7, 0x02 }, // PANELD2U24
+	{ 0xca, 0x17 }, // PANELD2U27
+	{ 0xcb, 0x18 }, // PANELD2U28
+	{ 0xcc, 0x0a }, // PANELD2U29
+	{ 0xcd, 0x10 }, // PANELD2U30
+	{ 0xce, 0x0e }, // PANELD2U31
+	{ 0xcf, 0x0c }, // PANELD2U32
+	{ 0xd0, 0x00 }, // PANELD2U33
+	// Map internal GOA signals to GOA output pad
+	{ 0x81, 0x00 }, // PANELU2D2
+	{ 0x84, 0x15 }, // PANELU2D5
+	{ 0x85, 0x16 }, // PANELU2D6
+	{ 0x86, 0x10 }, // PANELU2D7
+	{ 0x87, 0x0a }, // PANELU2D8
+	{ 0x88, 0x0c }, // PANELU2D9
+	{ 0x89, 0x0e }, // PANELU2D10
+	{ 0x8a, 0x02 }, // PANELU2D11
+	{ 0x97, 0x00 }, // PANELU2D24
+	{ 0x9a, 0x17 }, // PANELU2D27
+	{ 0x9b, 0x18 }, // PANELU2D28
+	{ 0x9c, 0x0f }, // PANELU2D29
+	{ 0x9d, 0x09 }, // PANELU2D30
+	{ 0x9e, 0x0b }, // PANELU2D31
+	{ 0x9f, 0x0d }, // PANELU2D32
+	{ 0xa0, 0x01 }, // PANELU2D33
+	// EXTC Command set enable, select page 2
+	{ 0xff, 0x30 }, { 0xff, 0x52 }, { 0xff, 0x02 },
+	// Unknown registers
 	{ 0x01, 0x01 },
 	{ 0x02, 0xda },
 	{ 0x03, 0xba },
@@ -227,10 +427,10 @@ static const struct nv3052c_reg nv3052c_panel_regs[] = {
 	{ 0x0e, 0x48 },
 	{ 0x0f, 0x38 },
 	{ 0x10, 0x2b },
-	{ 0xff, 0x30 },
-	{ 0xff, 0x52 },
-	{ 0xff, 0x00 },
-	{ 0x36, 0x0a },
+	// EXTC Command set enable, select page 0
+	{ 0xff, 0x30 }, { 0xff, 0x52 }, { 0xff, 0x00 },
+	// Display Access Control
+	{ 0x36, 0x0a }, // bgr = 1, ss = 1, gs = 0
 };
 
 static inline struct nv3052c *to_nv3052c(struct drm_panel *panel)
@@ -241,6 +441,8 @@ static inline struct nv3052c *to_nv3052c(struct drm_panel *panel)
 static int nv3052c_prepare(struct drm_panel *panel)
 {
 	struct nv3052c *priv = to_nv3052c(panel);
+	const struct nv3052c_reg *panel_regs = priv->panel_info->panel_regs;
+	unsigned int panel_regs_len = priv->panel_info->panel_regs_len;
 	struct mipi_dbi *dbi = &priv->dbi;
 	unsigned int i;
 	int err;
@@ -257,9 +459,9 @@ static int nv3052c_prepare(struct drm_panel *panel)
 	gpiod_set_value_cansleep(priv->reset_gpio, 0);
 	usleep_range(5000, 20000);
 
-	for (i = 0; i < ARRAY_SIZE(nv3052c_panel_regs); i++) {
-		err = mipi_dbi_command(dbi, nv3052c_panel_regs[i].cmd,
-				       nv3052c_panel_regs[i].val);
+	for (i = 0; i < panel_regs_len; i++) {
+		err = mipi_dbi_command(dbi, panel_regs[i].cmd,
+				       panel_regs[i].val);
 
 		if (err) {
 			dev_err(priv->dev, "Unable to set register: %d\n", err);
@@ -453,6 +655,21 @@ static const struct drm_display_mode ltk035c5444t_modes[] = {
 	},
 };
 
+static const struct drm_display_mode fs035vg158_modes[] = {
+	{ /* 60 Hz */
+		.clock = 21000,
+		.hdisplay = 640,
+		.hsync_start = 640 + 34,
+		.hsync_end = 640 + 34 + 4,
+		.htotal = 640 + 34 + 4 + 20,
+		.vdisplay = 480,
+		.vsync_start = 480 + 12,
+		.vsync_end = 480 + 12 + 4,
+		.vtotal = 480 + 12 + 4 + 6,
+		.flags = DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC,
+	},
+};
+
 static const struct nv3052c_panel_info ltk035c5444t_panel_info = {
 	.display_modes = ltk035c5444t_modes,
 	.num_modes = ARRAY_SIZE(ltk035c5444t_modes),
@@ -460,10 +677,31 @@ static const struct nv3052c_panel_info ltk035c5444t_panel_info = {
 	.height_mm = 64,
 	.bus_format = MEDIA_BUS_FMT_RGB888_1X24,
 	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE,
+	.panel_regs = ltk035c5444t_panel_regs,
+	.panel_regs_len = ARRAY_SIZE(ltk035c5444t_panel_regs),
+};
+
+static const struct nv3052c_panel_info fs035vg158_panel_info = {
+	.display_modes = fs035vg158_modes,
+	.num_modes = ARRAY_SIZE(fs035vg158_modes),
+	.width_mm = 70,
+	.height_mm = 53,
+	.bus_format = MEDIA_BUS_FMT_RGB888_1X24,
+	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE,
+	.panel_regs = fs035vg158_panel_regs,
+	.panel_regs_len = ARRAY_SIZE(fs035vg158_panel_regs),
+};
+
+static const struct spi_device_id nv3052c_ids[] = {
+	{ "ltk035c5444t", },
+	{ "fs035vg158", },
+	{ /* sentinel */ }
 };
+MODULE_DEVICE_TABLE(spi, nv3052c_ids);
 
 static const struct of_device_id nv3052c_of_match[] = {
 	{ .compatible = "leadtek,ltk035c5444t", .data = &ltk035c5444t_panel_info },
+	{ .compatible = "fascontek,fs035vg158", .data = &fs035vg158_panel_info },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, nv3052c_of_match);
@@ -473,6 +711,7 @@ static struct spi_driver nv3052c_driver = {
 		.name = "nv3052c",
 		.of_match_table = nv3052c_of_match,
 	},
+	.id_table = nv3052c_ids,
 	.probe = nv3052c_probe,
 	.remove = nv3052c_remove,
 };
diff --git a/drivers/gpu/drm/panel/panel-novatek-nt36523.c b/drivers/gpu/drm/panel/panel-novatek-nt36523.c
index 9b9a7eb1bc60..a189ce236328 100644
--- a/drivers/gpu/drm/panel/panel-novatek-nt36523.c
+++ b/drivers/gpu/drm/panel/panel-novatek-nt36523.c
@@ -1254,9 +1254,9 @@ static int nt36523_probe(struct mipi_dsi_device *dsi)
 			return dev_err_probe(dev, -EPROBE_DEFER, "cannot get secondary DSI host\n");
 
 		pinfo->dsi[1] = mipi_dsi_device_register_full(dsi1_host, info);
-		if (!pinfo->dsi[1]) {
+		if (IS_ERR(pinfo->dsi[1])) {
 			dev_err(dev, "cannot get secondary DSI device\n");
-			return -ENODEV;
+			return PTR_ERR(pinfo->dsi[1]);
 		}
 	}
 
diff --git a/drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c b/drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c
index ea5a85779382..f23d8832a1ad 100644
--- a/drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c
+++ b/drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c
@@ -309,7 +309,7 @@ static const struct s6d7aa0_panel_desc s6d7aa0_lsl080al02_desc = {
 	.off_func = s6d7aa0_lsl080al02_off,
 	.drm_mode = &s6d7aa0_lsl080al02_mode,
 	.mode_flags = MIPI_DSI_MODE_VSYNC_FLUSH | MIPI_DSI_MODE_VIDEO_NO_HFP,
-	.bus_flags = DRM_BUS_FLAG_DE_HIGH,
+	.bus_flags = 0,
 
 	.has_backlight = false,
 	.use_passwd3 = false,
diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index 8017ad33cf18..d493ee735c73 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -1134,6 +1134,37 @@ static const struct panel_desc auo_g133han01 = {
 	.connector_type = DRM_MODE_CONNECTOR_LVDS,
 };
 
+static const struct display_timing auo_g156han04_timings = {
+	.pixelclock = { 137000000, 141000000, 146000000 },
+	.hactive = { 1920, 1920, 1920 },
+	.hfront_porch = { 60, 60, 60 },
+	.hback_porch = { 90, 92, 111 },
+	.hsync_len =  { 32, 32, 32 },
+	.vactive = { 1080, 1080, 1080 },
+	.vfront_porch = { 12, 12, 12 },
+	.vback_porch = { 24, 36, 56 },
+	.vsync_len = { 8, 8, 8 },
+};
+
+static const struct panel_desc auo_g156han04 = {
+	.timings = &auo_g156han04_timings,
+	.num_timings = 1,
+	.bpc = 8,
+	.size = {
+		.width = 344,
+		.height = 194,
+	},
+	.delay = {
+		.prepare = 50,		/* T2 */
+		.enable = 200,		/* T3 */
+		.disable = 110,		/* T10 */
+		.unprepare = 1000,	/* T13 */
+	},
+	.bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG,
+	.bus_flags = DRM_BUS_FLAG_DE_HIGH,
+	.connector_type = DRM_MODE_CONNECTOR_LVDS,
+};
+
 static const struct drm_display_mode auo_g156xtn01_mode = {
 	.clock = 76000,
 	.hdisplay = 1366,
@@ -3917,6 +3948,7 @@ static const struct panel_desc tianma_tm070jdhg30 = {
 	},
 	.bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG,
 	.connector_type = DRM_MODE_CONNECTOR_LVDS,
+	.bus_flags = DRM_BUS_FLAG_DE_HIGH,
 };
 
 static const struct panel_desc tianma_tm070jvhg33 = {
@@ -3929,6 +3961,7 @@ static const struct panel_desc tianma_tm070jvhg33 = {
 	},
 	.bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG,
 	.connector_type = DRM_MODE_CONNECTOR_LVDS,
+	.bus_flags = DRM_BUS_FLAG_DE_HIGH,
 };
 
 static const struct display_timing tianma_tm070rvhg71_timing = {
@@ -4289,6 +4322,9 @@ static const struct of_device_id platform_of_match[] = {
 		.compatible = "auo,g133han01",
 		.data = &auo_g133han01,
 	}, {
+		.compatible = "auo,g156han04",
+		.data = &auo_g156han04,
+	}, {
 		.compatible = "auo,g156xtn01",
 		.data = &auo_g156xtn01,
 	}, {
diff --git a/drivers/gpu/drm/panel/panel-sitronix-st7701.c b/drivers/gpu/drm/panel/panel-sitronix-st7701.c
index 0459965e1b4f..421eb4592b61 100644
--- a/drivers/gpu/drm/panel/panel-sitronix-st7701.c
+++ b/drivers/gpu/drm/panel/panel-sitronix-st7701.c
@@ -288,7 +288,7 @@ static void st7701_init_sequence(struct st7701 *st7701)
 		   FIELD_PREP(DSI_CMD2_BK1_PWRCTRL2_AVDD_MASK,
 			      DIV_ROUND_CLOSEST(desc->avdd_mv - 6200, 200)) |
 		   FIELD_PREP(DSI_CMD2_BK1_PWRCTRL2_AVCL_MASK,
-			      DIV_ROUND_CLOSEST(-4400 + desc->avcl_mv, 200)));
+			      DIV_ROUND_CLOSEST(-4400 - desc->avcl_mv, 200)));
 
 	/* T2D = 0.2us * T2D[3:0] */
 	ST7701_DSI(st7701, DSI_CMD2_BK1_SPD1,
@@ -423,6 +423,42 @@ static void kd50t048a_gip_sequence(struct st7701 *st7701)
 		   0xFF, 0xFF, 0xFF, 0xFF, 0x10, 0x45, 0x67, 0x98, 0xBA);
 }
 
+static void rg_arc_gip_sequence(struct st7701 *st7701)
+{
+	st7701_switch_cmd_bkx(st7701, true, 3);
+	ST7701_DSI(st7701, 0xEF, 0x08);
+	st7701_switch_cmd_bkx(st7701, true, 0);
+	ST7701_DSI(st7701, 0xC7, 0x04);
+	ST7701_DSI(st7701, 0xCC, 0x38);
+	st7701_switch_cmd_bkx(st7701, true, 1);
+	ST7701_DSI(st7701, 0xB9, 0x10);
+	ST7701_DSI(st7701, 0xBC, 0x03);
+	ST7701_DSI(st7701, 0xC0, 0x89);
+	ST7701_DSI(st7701, 0xE0, 0x00, 0x00, 0x02);
+	ST7701_DSI(st7701, 0xE1, 0x04, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00,
+		   0x00, 0x00, 0x20, 0x20);
+	ST7701_DSI(st7701, 0xE2, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		   0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+	ST7701_DSI(st7701, 0xE3, 0x00, 0x00, 0x33, 0x00);
+	ST7701_DSI(st7701, 0xE4, 0x22, 0x00);
+	ST7701_DSI(st7701, 0xE5, 0x04, 0x5C, 0xA0, 0xA0, 0x06, 0x5C, 0xA0,
+		   0xA0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+	ST7701_DSI(st7701, 0xE6, 0x00, 0x00, 0x33, 0x00);
+	ST7701_DSI(st7701, 0xE7, 0x22, 0x00);
+	ST7701_DSI(st7701, 0xE8, 0x05, 0x5C, 0xA0, 0xA0, 0x07, 0x5C, 0xA0,
+		   0xA0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+	ST7701_DSI(st7701, 0xEB, 0x02, 0x00, 0x40, 0x40, 0x00, 0x00, 0x00);
+	ST7701_DSI(st7701, 0xEC, 0x00, 0x00);
+	ST7701_DSI(st7701, 0xED, 0xFA, 0x45, 0x0B, 0xFF, 0xFF, 0xFF, 0xFF,
+		   0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xB0, 0x54, 0xAF);
+	ST7701_DSI(st7701, 0xEF, 0x08, 0x08, 0x08, 0x45, 0x3F, 0x54);
+	st7701_switch_cmd_bkx(st7701, false, 0);
+	ST7701_DSI(st7701, MIPI_DCS_SET_ADDRESS_MODE, 0x17);
+	ST7701_DSI(st7701, MIPI_DCS_SET_PIXEL_FORMAT, 0x77);
+	ST7701_DSI(st7701, MIPI_DCS_EXIT_SLEEP_MODE, 0x00);
+	msleep(120);
+}
+
 static int st7701_prepare(struct drm_panel *panel)
 {
 	struct st7701 *st7701 = panel_to_st7701(panel);
@@ -839,6 +875,105 @@ static const struct st7701_panel_desc kd50t048a_desc = {
 	.gip_sequence = kd50t048a_gip_sequence,
 };
 
+static const struct drm_display_mode rg_arc_mode = {
+	.clock          = 25600,
+
+	.hdisplay	= 480,
+	.hsync_start	= 480 + 60,
+	.hsync_end	= 480 + 60 + 42,
+	.htotal         = 480 + 60 + 42 + 60,
+
+	.vdisplay	= 640,
+	.vsync_start	= 640 + 10,
+	.vsync_end	= 640 + 10 + 4,
+	.vtotal         = 640 + 10 + 4 + 16,
+
+	.width_mm	= 63,
+	.height_mm	= 84,
+
+	.type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED,
+};
+
+static const struct st7701_panel_desc rg_arc_desc = {
+	.mode = &rg_arc_mode,
+	.lanes = 2,
+	.format = MIPI_DSI_FMT_RGB888,
+	.panel_sleep_delay = 80,
+
+	.pv_gamma = {
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0x01) |
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC0_MASK, 0),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) |
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC4_MASK, 0x16),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) |
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC8_MASK, 0x1d),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC16_MASK, 0x0e),
+
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) |
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC24_MASK, 0x12),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC52_MASK, 0x06),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC80_MASK, 0x0c),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC108_MASK, 0x0a),
+
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC147_MASK, 0x09),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC175_MASK, 0x25),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC203_MASK, 0x00),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) |
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC231_MASK, 0x03),
+
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC239_MASK, 0x00),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) |
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC247_MASK, 0x3f),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) |
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC251_MASK, 0x3f),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) |
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC255_MASK, 0x1c)
+	},
+	.nv_gamma = {
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0x01) |
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC0_MASK, 0),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) |
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC4_MASK, 0x16),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) |
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC8_MASK, 0x1e),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC16_MASK, 0x0e),
+
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) |
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC24_MASK, 0x11),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC52_MASK, 0x06),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC80_MASK, 0x0c),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC108_MASK, 0x08),
+
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC147_MASK, 0x09),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC175_MASK, 0x26),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC203_MASK, 0x00),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) |
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC231_MASK, 0x15),
+
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC239_MASK, 0x00),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) |
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC247_MASK, 0x3f),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) |
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC251_MASK, 0x3f),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) |
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC255_MASK, 0x1c)
+	},
+	.nlinv = 0,
+	.vop_uv = 4500000,
+	.vcom_uv = 762500,
+	.vgh_mv = 15000,
+	.vgl_mv = -9510,
+	.avdd_mv = 6600,
+	.avcl_mv = -4400,
+	.gamma_op_bias = OP_BIAS_MIDDLE,
+	.input_op_bias = OP_BIAS_MIN,
+	.output_op_bias = OP_BIAS_MIN,
+	.t2d_ns = 1600,
+	.t3d_ns = 10400,
+	.eot_en = true,
+	.gip_sequence = rg_arc_gip_sequence,
+};
+
 static int st7701_dsi_probe(struct mipi_dsi_device *dsi)
 {
 	const struct st7701_panel_desc *desc;
@@ -917,6 +1052,7 @@ static void st7701_dsi_remove(struct mipi_dsi_device *dsi)
 }
 
 static const struct of_device_id st7701_of_match[] = {
+	{ .compatible = "anbernic,rg-arc-panel", .data = &rg_arc_desc },
 	{ .compatible = "densitron,dmt028vghmcmi-1a", .data = &dmt028vghmcmi_1a_desc },
 	{ .compatible = "elida,kd50t048a", .data = &kd50t048a_desc },
 	{ .compatible = "techstar,ts8550b", .data = &ts8550b_desc },
diff --git a/drivers/gpu/drm/panel/panel-synaptics-r63353.c b/drivers/gpu/drm/panel/panel-synaptics-r63353.c
new file mode 100644
index 000000000000..169c629746c7
--- /dev/null
+++ b/drivers/gpu/drm/panel/panel-synaptics-r63353.c
@@ -0,0 +1,362 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Synaptics R63353 Controller driver
+ *
+ * Copyright (C) 2020 BSH Hausgerate GmbH
+ */
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/media-bus-format.h>
+
+#include <linux/gpio/consumer.h>
+#include <linux/regulator/consumer.h>
+
+#include <drm/drm_mipi_dsi.h>
+#include <drm/drm_modes.h>
+#include <drm/drm_panel.h>
+
+#include <video/mipi_display.h>
+
+#define R63353_INSTR(...) { \
+		.len = sizeof((u8[]) {__VA_ARGS__}), \
+		.data = (u8[]){__VA_ARGS__} \
+	}
+
+struct r63353_instr {
+	size_t len;
+	const u8 *data;
+};
+
+static const struct r63353_instr sharp_ls068b3sx02_init[] = {
+	R63353_INSTR(0x51, 0xff),
+	R63353_INSTR(0x53, 0x0c),
+	R63353_INSTR(0x55, 0x00),
+	R63353_INSTR(0x84, 0x00),
+	R63353_INSTR(0x29),
+};
+
+struct r63353_desc {
+	const char *name;
+	const struct r63353_instr *init;
+	const size_t init_length;
+	const struct drm_display_mode *mode;
+	u32 width_mm;
+	u32 height_mm;
+};
+
+struct r63353_panel {
+	struct drm_panel base;
+	struct mipi_dsi_device *dsi;
+
+	struct gpio_desc *reset_gpio;
+	struct regulator *dvdd;
+	struct regulator *avdd;
+
+	struct r63353_desc *pdata;
+};
+
+static inline struct r63353_panel *to_r63353_panel(struct drm_panel *panel)
+{
+	return container_of(panel, struct r63353_panel, base);
+}
+
+static int r63353_panel_power_on(struct r63353_panel *rpanel)
+{
+	struct mipi_dsi_device *dsi = rpanel->dsi;
+	struct device *dev = &dsi->dev;
+	int ret;
+
+	ret = regulator_enable(rpanel->avdd);
+	if (ret) {
+		dev_err(dev, "Failed to enable avdd regulator (%d)\n", ret);
+		return ret;
+	}
+
+	usleep_range(15000, 25000);
+
+	ret = regulator_enable(rpanel->dvdd);
+	if (ret) {
+		dev_err(dev, "Failed to enable dvdd regulator (%d)\n", ret);
+		regulator_disable(rpanel->avdd);
+		return ret;
+	}
+
+	usleep_range(300000, 350000);
+	gpiod_set_value(rpanel->reset_gpio, 1);
+	usleep_range(15000, 25000);
+
+	return 0;
+}
+
+static int r63353_panel_power_off(struct r63353_panel *rpanel)
+{
+	gpiod_set_value(rpanel->reset_gpio, 0);
+	regulator_disable(rpanel->dvdd);
+	regulator_disable(rpanel->avdd);
+
+	return 0;
+}
+
+static int r63353_panel_activate(struct r63353_panel *rpanel)
+{
+	struct mipi_dsi_device *dsi = rpanel->dsi;
+	struct device *dev = &dsi->dev;
+	int i, ret;
+
+	ret = mipi_dsi_dcs_soft_reset(dsi);
+	if (ret < 0) {
+		dev_err(dev, "Failed to do Software Reset (%d)\n", ret);
+		goto fail;
+	}
+
+	usleep_range(15000, 17000);
+
+	ret = mipi_dsi_dcs_enter_sleep_mode(dsi);
+	if (ret < 0) {
+		dev_err(dev, "Failed to enter sleep mode (%d)\n", ret);
+		goto fail;
+	}
+
+	for (i = 0; i < rpanel->pdata->init_length; i++) {
+		const struct r63353_instr *instr = &rpanel->pdata->init[i];
+
+		ret = mipi_dsi_dcs_write_buffer(dsi, instr->data, instr->len);
+		if (ret < 0)
+			goto fail;
+	}
+
+	msleep(120);
+
+	ret = mipi_dsi_dcs_exit_sleep_mode(dsi);
+	if (ret < 0) {
+		dev_err(dev, "Failed to exit sleep mode (%d)\n", ret);
+		goto fail;
+	}
+
+	usleep_range(5000, 10000);
+
+	ret = mipi_dsi_dcs_set_display_on(dsi);
+	if (ret < 0) {
+		dev_err(dev, "Failed to set display ON (%d)\n", ret);
+		goto fail;
+	}
+
+	return 0;
+
+fail:
+	gpiod_set_value(rpanel->reset_gpio, 0);
+
+	return ret;
+}
+
+static int r63353_panel_prepare(struct drm_panel *panel)
+{
+	struct r63353_panel *rpanel = to_r63353_panel(panel);
+	struct mipi_dsi_device *dsi = rpanel->dsi;
+	struct device *dev = &dsi->dev;
+	int ret;
+
+	dev_dbg(dev, "Preparing\n");
+
+	ret = r63353_panel_power_on(rpanel);
+	if (ret)
+		return ret;
+
+	ret = r63353_panel_activate(rpanel);
+	if (ret) {
+		r63353_panel_power_off(rpanel);
+		return ret;
+	}
+
+	dev_dbg(dev, "Prepared\n");
+	return 0;
+}
+
+static int r63353_panel_deactivate(struct r63353_panel *rpanel)
+{
+	struct mipi_dsi_device *dsi = rpanel->dsi;
+	struct device *dev = &dsi->dev;
+	int ret;
+
+	ret = mipi_dsi_dcs_set_display_off(dsi);
+	if (ret < 0) {
+		dev_err(dev, "Failed to set display OFF (%d)\n", ret);
+		return ret;
+	}
+
+	usleep_range(5000, 10000);
+
+	ret = mipi_dsi_dcs_enter_sleep_mode(dsi);
+	if (ret < 0) {
+		dev_err(dev, "Failed to enter sleep mode (%d)\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int r63353_panel_unprepare(struct drm_panel *panel)
+{
+	struct r63353_panel *rpanel = to_r63353_panel(panel);
+
+	r63353_panel_deactivate(rpanel);
+	r63353_panel_power_off(rpanel);
+
+	return 0;
+}
+
+static const struct drm_display_mode sharp_ls068b3sx02_timing = {
+	.clock = 70000,
+	.hdisplay = 640,
+	.hsync_start = 640 + 35,
+	.hsync_end = 640 + 35 + 2,
+	.htotal = 640 + 35 + 2 + 150,
+	.vdisplay = 1280,
+	.vsync_start = 1280 + 2,
+	.vsync_end = 1280 + 2 + 4,
+	.vtotal = 1280 + 2 + 4 + 0,
+};
+
+static int r63353_panel_get_modes(struct drm_panel *panel,
+				  struct drm_connector *connector)
+{
+	struct r63353_panel *rpanel = to_r63353_panel(panel);
+	struct drm_display_mode *mode;
+	static const u32 bus_format = MEDIA_BUS_FMT_RGB888_1X24;
+
+	mode = drm_mode_duplicate(connector->dev, rpanel->pdata->mode);
+	if (!mode)
+		return -ENOMEM;
+
+	drm_mode_set_name(mode);
+	drm_mode_probed_add(connector, mode);
+
+	mode->type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED;
+	connector->display_info.width_mm = rpanel->pdata->width_mm;
+	connector->display_info.height_mm = rpanel->pdata->height_mm;
+
+	drm_display_info_set_bus_formats(&connector->display_info,
+					 &bus_format, 1);
+
+	return 1;
+}
+
+static const struct drm_panel_funcs r63353_panel_funcs = {
+	.prepare = r63353_panel_prepare,
+	.unprepare = r63353_panel_unprepare,
+	.get_modes = r63353_panel_get_modes,
+};
+
+static int r63353_panel_probe(struct mipi_dsi_device *dsi)
+{
+	int ret = 0;
+	struct device *dev = &dsi->dev;
+	struct r63353_panel *panel;
+
+	panel = devm_kzalloc(&dsi->dev, sizeof(*panel), GFP_KERNEL);
+	if (!panel)
+		return -ENOMEM;
+
+	mipi_dsi_set_drvdata(dsi, panel);
+	panel->dsi = dsi;
+	panel->pdata = (struct r63353_desc *)of_device_get_match_data(dev);
+
+	dev_info(dev, "Panel %s\n", panel->pdata->name);
+
+	dsi->lanes = 2;
+	dsi->format = MIPI_DSI_FMT_RGB888;
+	dsi->mode_flags = MIPI_DSI_MODE_VIDEO_HSE | MIPI_DSI_MODE_VIDEO |
+			  MIPI_DSI_CLOCK_NON_CONTINUOUS | MIPI_DSI_MODE_LPM |
+			  MIPI_DSI_MODE_VIDEO_SYNC_PULSE | MIPI_DSI_MODE_NO_EOT_PACKET;
+
+	panel->dvdd = devm_regulator_get(dev, "dvdd");
+	if (IS_ERR(panel->dvdd))
+		return PTR_ERR(panel->dvdd);
+	panel->avdd = devm_regulator_get(dev, "avdd");
+	if (IS_ERR(panel->avdd))
+		return PTR_ERR(panel->avdd);
+
+	panel->reset_gpio = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW);
+	if (IS_ERR(panel->reset_gpio)) {
+		dev_err(dev, "failed to get RESET GPIO\n");
+		return PTR_ERR(panel->reset_gpio);
+	}
+
+	drm_panel_init(&panel->base, dev, &r63353_panel_funcs,
+		       DRM_MODE_CONNECTOR_DSI);
+
+	panel->base.prepare_prev_first = true;
+	ret = drm_panel_of_backlight(&panel->base);
+	if (ret)
+		return ret;
+
+	drm_panel_add(&panel->base);
+
+	ret = mipi_dsi_attach(dsi);
+	if (ret < 0) {
+		dev_err(dev, "mipi_dsi_attach failed: %d\n", ret);
+		drm_panel_remove(&panel->base);
+		return ret;
+	}
+
+	return ret;
+}
+
+static void r63353_panel_remove(struct mipi_dsi_device *dsi)
+{
+	struct r63353_panel *rpanel = mipi_dsi_get_drvdata(dsi);
+	struct device *dev = &dsi->dev;
+	int ret;
+
+	ret = mipi_dsi_detach(dsi);
+	if (ret < 0)
+		dev_err(dev, "Failed to detach from host (%d)\n", ret);
+
+	drm_panel_remove(&rpanel->base);
+}
+
+static void r63353_panel_shutdown(struct mipi_dsi_device *dsi)
+{
+	struct r63353_panel *rpanel = mipi_dsi_get_drvdata(dsi);
+
+	r63353_panel_unprepare(&rpanel->base);
+}
+
+static const struct r63353_desc sharp_ls068b3sx02_data = {
+	.name = "Sharp LS068B3SX02",
+	.mode = &sharp_ls068b3sx02_timing,
+	.init = sharp_ls068b3sx02_init,
+	.init_length = ARRAY_SIZE(sharp_ls068b3sx02_init),
+	.width_mm = 68,
+	.height_mm = 159,
+};
+
+static const struct of_device_id r63353_of_match[] = {
+	{ .compatible = "sharp,ls068b3sx02", .data = &sharp_ls068b3sx02_data },
+	{ }
+};
+
+MODULE_DEVICE_TABLE(of, r63353_of_match);
+
+static struct mipi_dsi_driver r63353_panel_driver = {
+	.driver = {
+		   .name = "r63353-dsi",
+		   .of_match_table = r63353_of_match,
+	},
+	.probe = r63353_panel_probe,
+	.remove = r63353_panel_remove,
+	.shutdown = r63353_panel_shutdown,
+};
+
+module_mipi_dsi_driver(r63353_panel_driver);
+
+MODULE_AUTHOR("Matthias Proske <Matthias.Proske@bshg.com>");
+MODULE_AUTHOR("Michael Trimarchi <michael@amarulasolutions.com>");
+MODULE_DESCRIPTION("Synaptics R63353 Controller Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/panfrost/panfrost_devfreq.c b/drivers/gpu/drm/panfrost/panfrost_devfreq.c
index f59c82ea8870..2d30da38c2c3 100644
--- a/drivers/gpu/drm/panfrost/panfrost_devfreq.c
+++ b/drivers/gpu/drm/panfrost/panfrost_devfreq.c
@@ -29,14 +29,20 @@ static void panfrost_devfreq_update_utilization(struct panfrost_devfreq *pfdevfr
 static int panfrost_devfreq_target(struct device *dev, unsigned long *freq,
 				   u32 flags)
 {
+	struct panfrost_device *ptdev = dev_get_drvdata(dev);
 	struct dev_pm_opp *opp;
+	int err;
 
 	opp = devfreq_recommended_opp(dev, freq, flags);
 	if (IS_ERR(opp))
 		return PTR_ERR(opp);
 	dev_pm_opp_put(opp);
 
-	return dev_pm_opp_set_rate(dev, *freq);
+	err =  dev_pm_opp_set_rate(dev, *freq);
+	if (!err)
+		ptdev->pfdevfreq.current_frequency = *freq;
+
+	return err;
 }
 
 static void panfrost_devfreq_reset(struct panfrost_devfreq *pfdevfreq)
@@ -58,7 +64,6 @@ static int panfrost_devfreq_get_dev_status(struct device *dev,
 	spin_lock_irqsave(&pfdevfreq->lock, irqflags);
 
 	panfrost_devfreq_update_utilization(pfdevfreq);
-	pfdevfreq->current_frequency = status->current_frequency;
 
 	status->total_time = ktime_to_ns(ktime_add(pfdevfreq->busy_time,
 						   pfdevfreq->idle_time));
@@ -165,6 +170,14 @@ int panfrost_devfreq_init(struct panfrost_device *pfdev)
 	panfrost_devfreq_profile.initial_freq = cur_freq;
 
 	/*
+	 * We could wait until panfrost_devfreq_target() to set this value, but
+	 * since the simple_ondemand governor works asynchronously, there's a
+	 * chance by the time someone opens the device's fdinfo file, current
+	 * frequency hasn't been updated yet, so let's just do an early set.
+	 */
+	pfdevfreq->current_frequency = cur_freq;
+
+	/*
 	 * Set the recommend OPP this will enable and configure the regulator
 	 * if any and will avoid a switch off by regulator_late_cleanup()
 	 */
diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c
index 0cf64456e29a..d47b40b82b0b 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gem.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gem.c
@@ -200,7 +200,7 @@ static enum drm_gem_object_status panfrost_gem_status(struct drm_gem_object *obj
 	struct panfrost_gem_object *bo = to_panfrost_bo(obj);
 	enum drm_gem_object_status res = 0;
 
-	if (bo->base.pages)
+	if (bo->base.base.import_attach || bo->base.pages)
 		res |= DRM_GEM_OBJECT_RESIDENT;
 
 	if (bo->base.madv == PANFROST_MADV_DONTNEED)
diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c
index 4aca09cab4b8..6e537c5bd295 100644
--- a/drivers/gpu/drm/radeon/atombios_encoders.c
+++ b/drivers/gpu/drm/radeon/atombios_encoders.c
@@ -29,6 +29,7 @@
 #include <linux/pci.h>
 
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_edid.h>
 #include <drm/drm_file.h>
 #include <drm/drm_modeset_helper_vtables.h>
 #include <drm/radeon_drm.h>
diff --git a/drivers/gpu/drm/radeon/clearstate_evergreen.h b/drivers/gpu/drm/radeon/clearstate_evergreen.h
index 63a1ffbb3ced..3b645558f133 100644
--- a/drivers/gpu/drm/radeon/clearstate_evergreen.h
+++ b/drivers/gpu/drm/radeon/clearstate_evergreen.h
@@ -1049,7 +1049,7 @@ static const struct cs_extent_def SECT_CONTEXT_defs[] =
     {SECT_CONTEXT_def_5, 0x0000a29e, 5 },
     {SECT_CONTEXT_def_6, 0x0000a2a5, 56 },
     {SECT_CONTEXT_def_7, 0x0000a2de, 290 },
-    { 0, 0, 0 }
+    { NULL, 0, 0 }
 };
 static const u32 SECT_CLEAR_def_1[] =
 {
@@ -1060,7 +1060,7 @@ static const u32 SECT_CLEAR_def_1[] =
 static const struct cs_extent_def SECT_CLEAR_defs[] =
 {
     {SECT_CLEAR_def_1, 0x0000ffc0, 3 },
-    { 0, 0, 0 }
+    { NULL, 0, 0 }
 };
 static const u32 SECT_CTRLCONST_def_1[] =
 {
@@ -1070,11 +1070,11 @@ static const u32 SECT_CTRLCONST_def_1[] =
 static const struct cs_extent_def SECT_CTRLCONST_defs[] =
 {
     {SECT_CTRLCONST_def_1, 0x0000f3fc, 2 },
-    { 0, 0, 0 }
+    { NULL, 0, 0 }
 };
 static const struct cs_section_def evergreen_cs_data[] = {
     { SECT_CONTEXT_defs, SECT_CONTEXT },
     { SECT_CLEAR_defs, SECT_CLEAR },
     { SECT_CTRLCONST_defs, SECT_CTRLCONST },
-    { 0, SECT_NONE }
+    { NULL, SECT_NONE }
 };
diff --git a/drivers/gpu/drm/radeon/dce3_1_afmt.c b/drivers/gpu/drm/radeon/dce3_1_afmt.c
index e8fe239b9d79..324e9b765098 100644
--- a/drivers/gpu/drm/radeon/dce3_1_afmt.c
+++ b/drivers/gpu/drm/radeon/dce3_1_afmt.c
@@ -21,6 +21,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 #include <linux/hdmi.h>
+#include <drm/drm_edid.h>
 
 #include "radeon.h"
 #include "radeon_asic.h"
diff --git a/drivers/gpu/drm/radeon/dce6_afmt.c b/drivers/gpu/drm/radeon/dce6_afmt.c
index 4a1d5447eac1..4c06f47453fd 100644
--- a/drivers/gpu/drm/radeon/dce6_afmt.c
+++ b/drivers/gpu/drm/radeon/dce6_afmt.c
@@ -21,6 +21,7 @@
  *
  */
 #include <linux/hdmi.h>
+#include <drm/drm_edid.h>
 
 #include "dce6_afmt.h"
 #include "radeon.h"
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index f0ae087be914..a424b86008b8 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -26,6 +26,7 @@
 #include <linux/pci.h>
 #include <linux/slab.h>
 
+#include <drm/drm_edid.h>
 #include <drm/drm_vblank.h>
 #include <drm/radeon_drm.h>
 #include <drm/drm_fourcc.h>
diff --git a/drivers/gpu/drm/radeon/evergreen_hdmi.c b/drivers/gpu/drm/radeon/evergreen_hdmi.c
index 5f3078f8ab95..681119c91d94 100644
--- a/drivers/gpu/drm/radeon/evergreen_hdmi.c
+++ b/drivers/gpu/drm/radeon/evergreen_hdmi.c
@@ -26,6 +26,7 @@
  */
 #include <linux/hdmi.h>
 
+#include <drm/drm_edid.h>
 #include <drm/radeon_drm.h>
 #include "evergreen_hdmi.h"
 #include "radeon.h"
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index affa9e0309b2..cfeca2694d5f 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -2321,7 +2321,7 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
 	switch (prim_walk) {
 	case 1:
 		for (i = 0; i < track->num_arrays; i++) {
-			size = track->arrays[i].esize * track->max_indx * 4;
+			size = track->arrays[i].esize * track->max_indx * 4UL;
 			if (track->arrays[i].robj == NULL) {
 				DRM_ERROR("(PW %u) Vertex array %u no buffer "
 					  "bound\n", prim_walk, i);
@@ -2340,7 +2340,7 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
 		break;
 	case 2:
 		for (i = 0; i < track->num_arrays; i++) {
-			size = track->arrays[i].esize * (nverts - 1) * 4;
+			size = track->arrays[i].esize * (nverts - 1) * 4UL;
 			if (track->arrays[i].robj == NULL) {
 				DRM_ERROR("(PW %u) Vertex array %u no buffer "
 					  "bound\n", prim_walk, i);
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index 638f861af80f..6cf54a747749 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -1275,7 +1275,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 			return -EINVAL;
 		}
 		tmp = (reg - CB_COLOR0_BASE) / 4;
-		track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8;
+		track->cb_color_bo_offset[tmp] = (u64)radeon_get_ib_value(p, idx) << 8;
 		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
 		track->cb_color_base_last[tmp] = ib[idx];
 		track->cb_color_bo[tmp] = reloc->robj;
@@ -1302,7 +1302,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 					"0x%04X\n", reg);
 			return -EINVAL;
 		}
-		track->htile_offset = radeon_get_ib_value(p, idx) << 8;
+		track->htile_offset = (u64)radeon_get_ib_value(p, idx) << 8;
 		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
 		track->htile_bo = reloc->robj;
 		track->db_dirty = true;
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index 85c4bb186203..3596ea4a8b60 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -27,6 +27,7 @@
 #include <linux/pci.h>
 
 #include <drm/drm_device.h>
+#include <drm/drm_edid.h>
 #include <drm/radeon_drm.h>
 
 #include "radeon.h"
diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c
index 279bf130a18c..91b58fbc2be7 100644
--- a/drivers/gpu/drm/radeon/radeon_audio.c
+++ b/drivers/gpu/drm/radeon/radeon_audio.c
@@ -27,6 +27,7 @@
 
 #include <drm/drm_crtc.h>
 #include <drm/drm_eld.h>
+#include <drm/drm_edid.h>
 #include "dce6_afmt.h"
 #include "evergreen_hdmi.h"
 #include "radeon.h"
diff --git a/drivers/gpu/drm/radeon/radeon_audio.h b/drivers/gpu/drm/radeon/radeon_audio.h
index 05e67867469b..dacaaa007051 100644
--- a/drivers/gpu/drm/radeon/radeon_audio.h
+++ b/drivers/gpu/drm/radeon/radeon_audio.h
@@ -27,7 +27,9 @@
 
 #include <linux/types.h>
 
-#define RREG32_ENDPOINT(block, reg)		\
+struct cea_sad;
+
+#define RREG32_ENDPOINT(block, reg)				\
 	radeon_audio_endpoint_rreg(rdev, (block), (reg))
 #define WREG32_ENDPOINT(block, reg, v)	\
 	radeon_audio_endpoint_wreg(rdev, (block), (reg), (v))
diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c
index 2620efc7c675..6952b1273b0f 100644
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c
@@ -28,6 +28,7 @@
 #include <linux/pci.h>
 
 #include <drm/drm_device.h>
+#include <drm/drm_edid.h>
 #include <drm/radeon_drm.h>
 
 #include "radeon.h"
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 901e75ec70ff..efd18c8d84c8 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -687,11 +687,16 @@ static void radeon_crtc_init(struct drm_device *dev, int index)
 	if (radeon_crtc == NULL)
 		return;
 
+	radeon_crtc->flip_queue = alloc_workqueue("radeon-crtc", WQ_HIGHPRI, 0);
+	if (!radeon_crtc->flip_queue) {
+		kfree(radeon_crtc);
+		return;
+	}
+
 	drm_crtc_init(dev, &radeon_crtc->base, &radeon_crtc_funcs);
 
 	drm_mode_crtc_set_gamma_size(&radeon_crtc->base, 256);
 	radeon_crtc->crtc_id = index;
-	radeon_crtc->flip_queue = alloc_workqueue("radeon-crtc", WQ_HIGHPRI, 0);
 	rdev->mode_info.crtcs[index] = radeon_crtc;
 
 	if (rdev->family >= CHIP_BONAIRE) {
diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c b/drivers/gpu/drm/radeon/radeon_encoders.c
index 9cb6401fe97e..3de3dce9e89d 100644
--- a/drivers/gpu/drm/radeon/radeon_encoders.c
+++ b/drivers/gpu/drm/radeon/radeon_encoders.c
@@ -26,6 +26,7 @@
 
 #include <linux/pci.h>
 
+#include <drm/drm_edid.h>
 #include <drm/drm_device.h>
 #include <drm/radeon_drm.h>
 
diff --git a/drivers/gpu/drm/radeon/radeon_i2c.c b/drivers/gpu/drm/radeon/radeon_i2c.c
index 314d066e68e9..3d174390a8af 100644
--- a/drivers/gpu/drm/radeon/radeon_i2c.c
+++ b/drivers/gpu/drm/radeon/radeon_i2c.c
@@ -918,7 +918,6 @@ struct radeon_i2c_chan *radeon_i2c_create(struct drm_device *dev,
 
 	i2c->rec = *rec;
 	i2c->adapter.owner = THIS_MODULE;
-	i2c->adapter.class = I2C_CLASS_DDC;
 	i2c->adapter.dev.parent = dev->dev;
 	i2c->dev = dev;
 	i2c_set_adapdata(&i2c->adapter, i2c);
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index 1decdcec0264..59c4db13d90a 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -32,13 +32,13 @@
 
 #include <drm/display/drm_dp_helper.h>
 #include <drm/drm_crtc.h>
-#include <drm/drm_edid.h>
 #include <drm/drm_encoder.h>
 #include <drm/drm_fixed.h>
 #include <drm/drm_modeset_helper_vtables.h>
 #include <linux/i2c.h>
 #include <linux/i2c-algo-bit.h>
 
+struct edid;
 struct radeon_bo;
 struct radeon_device;
 
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index e6534fa9f1fb..38048593bb4a 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -413,6 +413,7 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsig
 			dev_err(rdev->dev, "(%d) ring map failed\n", r);
 			return r;
 		}
+		radeon_debugfs_ring_init(rdev, ring);
 	}
 	ring->ptr_mask = (ring->ring_size / 4) - 1;
 	ring->ring_free_dw = ring->ring_size / 4;
@@ -421,7 +422,6 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsig
 		ring->next_rptr_gpu_addr = rdev->wb.gpu_addr + index;
 		ring->next_rptr_cpu_addr = &rdev->wb.wb[index/4];
 	}
-	radeon_debugfs_ring_init(rdev, ring);
 	radeon_ring_lockup_update(rdev, ring);
 	return 0;
 }
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
index 987cabbf1318..c38b4d5d6a14 100644
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -1204,13 +1204,17 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
 	r = radeon_bo_create(rdev, pd_size, align, true,
 			     RADEON_GEM_DOMAIN_VRAM, 0, NULL,
 			     NULL, &vm->page_directory);
-	if (r)
+	if (r) {
+		kfree(vm->page_tables);
+		vm->page_tables = NULL;
 		return r;
-
+	}
 	r = radeon_vm_clear_bo(rdev, vm->page_directory);
 	if (r) {
 		radeon_bo_unref(&vm->page_directory);
 		vm->page_directory = NULL;
+		kfree(vm->page_tables);
+		vm->page_tables = NULL;
 		return r;
 	}
 
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index a91012447b56..85e9cba49cec 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -3611,6 +3611,10 @@ static int si_cp_start(struct radeon_device *rdev)
 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
 		ring = &rdev->ring[i];
 		r = radeon_ring_lock(rdev, ring, 2);
+		if (r) {
+			DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
+			return r;
+		}
 
 		/* clear the compute context state */
 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
diff --git a/drivers/gpu/drm/radeon/sumo_dpm.c b/drivers/gpu/drm/radeon/sumo_dpm.c
index f74f381af05f..d49c145db437 100644
--- a/drivers/gpu/drm/radeon/sumo_dpm.c
+++ b/drivers/gpu/drm/radeon/sumo_dpm.c
@@ -1493,8 +1493,10 @@ static int sumo_parse_power_table(struct radeon_device *rdev)
 		non_clock_array_index = power_state->v2.nonClockInfoIndex;
 		non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *)
 			&non_clock_info_array->nonClockInfo[non_clock_array_index];
-		if (!rdev->pm.power_state[i].clock_info)
+		if (!rdev->pm.power_state[i].clock_info) {
+			kfree(rdev->pm.dpm.ps);
 			return -EINVAL;
+		}
 		ps = kzalloc(sizeof(struct sumo_ps), GFP_KERNEL);
 		if (ps == NULL) {
 			kfree(rdev->pm.dpm.ps);
diff --git a/drivers/gpu/drm/radeon/trinity_dpm.c b/drivers/gpu/drm/radeon/trinity_dpm.c
index 08ea1c864cb2..ef1cc7bad20a 100644
--- a/drivers/gpu/drm/radeon/trinity_dpm.c
+++ b/drivers/gpu/drm/radeon/trinity_dpm.c
@@ -1726,8 +1726,10 @@ static int trinity_parse_power_table(struct radeon_device *rdev)
 		non_clock_array_index = power_state->v2.nonClockInfoIndex;
 		non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *)
 			&non_clock_info_array->nonClockInfo[non_clock_array_index];
-		if (!rdev->pm.power_state[i].clock_info)
+		if (!rdev->pm.power_state[i].clock_info) {
+			kfree(rdev->pm.dpm.ps);
 			return -EINVAL;
+		}
 		ps = kzalloc(sizeof(struct sumo_ps), GFP_KERNEL);
 		if (ps == NULL) {
 			kfree(rdev->pm.dpm.ps);
diff --git a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c
index 84aa811ca1e9..bd08d57486fe 100644
--- a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c
+++ b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c
@@ -30,7 +30,6 @@
 #include <drm/drm_simple_kms_helper.h>
 
 #include "rockchip_drm_drv.h"
-#include "rockchip_drm_vop.h"
 
 #define RK3288_GRF_SOC_CON6		0x25c
 #define RK3288_EDP_LCDC_SEL		BIT(5)
diff --git a/drivers/gpu/drm/rockchip/cdn-dp-core.c b/drivers/gpu/drm/rockchip/cdn-dp-core.c
index 21254e4e107a..a855c45ae7f3 100644
--- a/drivers/gpu/drm/rockchip/cdn-dp-core.c
+++ b/drivers/gpu/drm/rockchip/cdn-dp-core.c
@@ -24,7 +24,6 @@
 
 #include "cdn-dp-core.h"
 #include "cdn-dp-reg.h"
-#include "rockchip_drm_vop.h"
 
 static inline struct cdn_dp_device *connector_to_dp(struct drm_connector *connector)
 {
diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
index 6396f9324dab..4cc8ed8f4fbd 100644
--- a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
+++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
@@ -26,7 +26,6 @@
 #include <drm/drm_simple_kms_helper.h>
 
 #include "rockchip_drm_drv.h"
-#include "rockchip_drm_vop.h"
 
 #define DSI_PHY_RSTZ			0xa0
 #define PHY_DISFORCEPLL			0
diff --git a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
index 341550199111..fe33092abbe7 100644
--- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
+++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
@@ -18,7 +18,6 @@
 #include <drm/drm_simple_kms_helper.h>
 
 #include "rockchip_drm_drv.h"
-#include "rockchip_drm_vop.h"
 
 #define RK3228_GRF_SOC_CON2		0x0408
 #define RK3228_HDMI_SDAIN_MSK		BIT(14)
diff --git a/drivers/gpu/drm/rockchip/inno_hdmi.c b/drivers/gpu/drm/rockchip/inno_hdmi.c
index 6e5b922a121e..e6fbe040ccf6 100644
--- a/drivers/gpu/drm/rockchip/inno_hdmi.c
+++ b/drivers/gpu/drm/rockchip/inno_hdmi.c
@@ -23,7 +23,6 @@
 #include <drm/drm_simple_kms_helper.h>
 
 #include "rockchip_drm_drv.h"
-#include "rockchip_drm_vop.h"
 
 #include "inno_hdmi.h"
 
@@ -793,7 +792,6 @@ static struct i2c_adapter *inno_hdmi_i2c_adapter(struct inno_hdmi *hdmi)
 	init_completion(&i2c->cmp);
 
 	adap = &i2c->adap;
-	adap->class = I2C_CLASS_DDC;
 	adap->owner = THIS_MODULE;
 	adap->dev.parent = hdmi->dev;
 	adap->dev.of_node = hdmi->dev->of_node;
diff --git a/drivers/gpu/drm/rockchip/rk3066_hdmi.c b/drivers/gpu/drm/rockchip/rk3066_hdmi.c
index 7d561c5a650f..95cd1b49eda8 100644
--- a/drivers/gpu/drm/rockchip/rk3066_hdmi.c
+++ b/drivers/gpu/drm/rockchip/rk3066_hdmi.c
@@ -18,7 +18,6 @@
 #include "rk3066_hdmi.h"
 
 #include "rockchip_drm_drv.h"
-#include "rockchip_drm_vop.h"
 
 #define DEFAULT_PLLA_RATE 30000000
 
@@ -716,7 +715,6 @@ static struct i2c_adapter *rk3066_hdmi_i2c_adapter(struct rk3066_hdmi *hdmi)
 	init_completion(&i2c->cmpltn);
 
 	adap = &i2c->adap;
-	adap->class = I2C_CLASS_DDC;
 	adap->owner = THIS_MODULE;
 	adap->dev.parent = hdmi->dev;
 	adap->dev.of_node = hdmi->dev->of_node;
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
index aeb03a57240f..bbb9e0bf6804 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
@@ -20,6 +20,23 @@
 #define ROCKCHIP_MAX_CONNECTOR	2
 #define ROCKCHIP_MAX_CRTC	4
 
+/*
+ * display output interface supported by rockchip lcdc
+ */
+#define ROCKCHIP_OUT_MODE_P888		0
+#define ROCKCHIP_OUT_MODE_BT1120	0
+#define ROCKCHIP_OUT_MODE_P666		1
+#define ROCKCHIP_OUT_MODE_P565		2
+#define ROCKCHIP_OUT_MODE_BT656		5
+#define ROCKCHIP_OUT_MODE_S888		8
+#define ROCKCHIP_OUT_MODE_S888_DUMMY	12
+#define ROCKCHIP_OUT_MODE_YUV420	14
+/* for use special outface */
+#define ROCKCHIP_OUT_MODE_AAAA		15
+
+/* output flags */
+#define ROCKCHIP_OUTPUT_DSI_DUAL	BIT(0)
+
 struct drm_device;
 struct drm_connector;
 struct iommu_domain;
@@ -31,6 +48,7 @@ struct rockchip_crtc_state {
 	int output_bpc;
 	int output_flags;
 	bool enable_afbc;
+	bool yuv_overlay;
 	u32 bus_format;
 	u32 bus_flags;
 	int color_space;
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.h b/drivers/gpu/drm/rockchip/rockchip_drm_vop.h
index 4b2daefeb8c1..b33e5bdc26be 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.h
@@ -277,18 +277,6 @@ struct vop_data {
 /* dst alpha ctrl define */
 #define DST_FACTOR_M0(x)		(((x) & 0x7) << 6)
 
-/*
- * display output interface supported by rockchip lcdc
- */
-#define ROCKCHIP_OUT_MODE_P888	0
-#define ROCKCHIP_OUT_MODE_P666	1
-#define ROCKCHIP_OUT_MODE_P565	2
-/* for use special outface */
-#define ROCKCHIP_OUT_MODE_AAAA	15
-
-/* output flags */
-#define ROCKCHIP_OUTPUT_DSI_DUAL	BIT(0)
-
 enum alpha_mode {
 	ALPHA_STRAIGHT,
 	ALPHA_INVERSE,
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
index 312da5783362..fdd768bbd487 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -35,7 +35,6 @@
 
 #include "rockchip_drm_drv.h"
 #include "rockchip_drm_gem.h"
-#include "rockchip_drm_fb.h"
 #include "rockchip_drm_vop2.h"
 #include "rockchip_rgb.h"
 
@@ -190,7 +189,10 @@ struct vop2 {
 	void __iomem *regs;
 	struct regmap *map;
 
-	struct regmap *grf;
+	struct regmap *sys_grf;
+	struct regmap *vop_grf;
+	struct regmap *vo1_grf;
+	struct regmap *sys_pmu;
 
 	/* physical map length of vop2 register */
 	u32 len;
@@ -209,6 +211,7 @@ struct vop2 {
 	unsigned int enable_count;
 	struct clk *hclk;
 	struct clk *aclk;
+	struct clk *pclk;
 
 	/* optional internal rgb encoder */
 	struct rockchip_rgb *rgb;
@@ -217,6 +220,25 @@ struct vop2 {
 	struct vop2_win win[];
 };
 
+#define vop2_output_if_is_hdmi(x)	((x) == ROCKCHIP_VOP2_EP_HDMI0 || \
+					 (x) == ROCKCHIP_VOP2_EP_HDMI1)
+
+#define vop2_output_if_is_dp(x)		((x) == ROCKCHIP_VOP2_EP_DP0 || \
+					 (x) == ROCKCHIP_VOP2_EP_DP1)
+
+#define vop2_output_if_is_edp(x)	((x) == ROCKCHIP_VOP2_EP_EDP0 || \
+					 (x) == ROCKCHIP_VOP2_EP_EDP1)
+
+#define vop2_output_if_is_mipi(x)	((x) == ROCKCHIP_VOP2_EP_MIPI0 || \
+					 (x) == ROCKCHIP_VOP2_EP_MIPI1)
+
+#define vop2_output_if_is_lvds(x)	((x) == ROCKCHIP_VOP2_EP_LVDS0 || \
+					 (x) == ROCKCHIP_VOP2_EP_LVDS1)
+
+#define vop2_output_if_is_dpi(x)	((x) == ROCKCHIP_VOP2_EP_RGB0)
+
+static const struct regmap_config vop2_regmap_config;
+
 static struct vop2_video_port *to_vop2_video_port(struct drm_crtc *crtc)
 {
 	return container_of(crtc, struct vop2_video_port, crtc);
@@ -266,12 +288,23 @@ static bool vop2_cluster_window(const struct vop2_win *win)
 	return win->data->feature & WIN_FEATURE_CLUSTER;
 }
 
+/*
+ * Note:
+ * The write mask function is documented but missing on rk3566/8, writes
+ * to these bits have no effect. For newer soc(rk3588 and following) the
+ * write mask is needed for register writes.
+ *
+ * GLB_CFG_DONE_EN has no write mask bit.
+ *
+ */
 static void vop2_cfg_done(struct vop2_video_port *vp)
 {
 	struct vop2 *vop2 = vp->vop2;
+	u32 val = RK3568_REG_CFG_DONE__GLB_CFG_DONE_EN;
+
+	val |= BIT(vp->id) | (BIT(vp->id) << 16);
 
-	regmap_set_bits(vop2->map, RK3568_REG_CFG_DONE,
-			BIT(vp->id) | RK3568_REG_CFG_DONE__GLB_CFG_DONE_EN);
+	regmap_set_bits(vop2->map, RK3568_REG_CFG_DONE, val);
 }
 
 static void vop2_win_disable(struct vop2_win *win)
@@ -462,6 +495,17 @@ static bool vop2_output_uv_swap(u32 bus_format, u32 output_mode)
 		return false;
 }
 
+static bool vop2_output_rg_swap(struct vop2 *vop2, u32 bus_format)
+{
+	if (vop2->data->soc_id == 3588) {
+		if (bus_format == MEDIA_BUS_FMT_YUV8_1X24 ||
+		    bus_format == MEDIA_BUS_FMT_YUV10_1X30)
+			return true;
+	}
+
+	return false;
+}
+
 static bool is_yuv_output(u32 bus_format)
 {
 	switch (bus_format) {
@@ -519,6 +563,18 @@ static bool rockchip_vop2_mod_supported(struct drm_plane *plane, u32 format,
 	return vop2_convert_afbc_format(format) >= 0;
 }
 
+/*
+ * 0: Full mode, 16 lines for one tail
+ * 1: half block mode, 8 lines one tail
+ */
+static bool vop2_half_block_enable(struct drm_plane_state *pstate)
+{
+	if (pstate->rotation & (DRM_MODE_ROTATE_270 | DRM_MODE_ROTATE_90))
+		return false;
+	else
+		return true;
+}
+
 static u32 vop2_afbc_transform_offset(struct drm_plane_state *pstate,
 				      bool afbc_half_block_en)
 {
@@ -854,13 +910,32 @@ static int vop2_core_clks_prepare_enable(struct vop2 *vop2)
 		goto err;
 	}
 
+	ret = clk_prepare_enable(vop2->pclk);
+	if (ret < 0) {
+		drm_err(vop2->drm, "failed to enable pclk - %d\n", ret);
+		goto err1;
+	}
+
 	return 0;
+err1:
+	clk_disable_unprepare(vop2->aclk);
 err:
 	clk_disable_unprepare(vop2->hclk);
 
 	return ret;
 }
 
+static void rk3588_vop2_power_domain_enable_all(struct vop2 *vop2)
+{
+	u32 pd;
+
+	pd = vop2_readl(vop2, RK3588_SYS_PD_CTRL);
+	pd &= ~(VOP2_PD_CLUSTER0 | VOP2_PD_CLUSTER1 | VOP2_PD_CLUSTER2 |
+		VOP2_PD_CLUSTER3 | VOP2_PD_ESMART);
+
+	vop2_writel(vop2, RK3588_SYS_PD_CTRL, pd);
+}
+
 static void vop2_enable(struct vop2 *vop2)
 {
 	int ret;
@@ -883,11 +958,12 @@ static void vop2_enable(struct vop2 *vop2)
 		return;
 	}
 
-	regcache_sync(vop2->map);
-
 	if (vop2->data->soc_id == 3566)
 		vop2_writel(vop2, RK3568_OTP_WIN_EN, 1);
 
+	if (vop2->data->soc_id == 3588)
+		rk3588_vop2_power_domain_enable_all(vop2);
+
 	vop2_writel(vop2, RK3568_REG_CFG_DONE, RK3568_REG_CFG_DONE__GLB_CFG_DONE_EN);
 
 	/*
@@ -913,8 +989,9 @@ static void vop2_disable(struct vop2 *vop2)
 
 	pm_runtime_put_sync(vop2->dev);
 
-	regcache_mark_dirty(vop2->map);
+	regcache_drop_region(vop2->map, 0, vop2_regmap_config.max_register);
 
+	clk_disable_unprepare(vop2->pclk);
 	clk_disable_unprepare(vop2->aclk);
 	clk_disable_unprepare(vop2->hclk);
 }
@@ -1140,6 +1217,7 @@ static void vop2_plane_atomic_update(struct drm_plane *plane,
 	bool rotate_90 = pstate->rotation & DRM_MODE_ROTATE_90;
 	struct rockchip_gem_object *rk_obj;
 	unsigned long offset;
+	bool half_block_en;
 	bool afbc_en;
 	dma_addr_t yrgb_mst;
 	dma_addr_t uv_mst;
@@ -1232,6 +1310,7 @@ static void vop2_plane_atomic_update(struct drm_plane *plane,
 	dsp_info = (dsp_h - 1) << 16 | ((dsp_w - 1) & 0xffff);
 
 	format = vop2_convert_format(fb->format->format);
+	half_block_en = vop2_half_block_enable(pstate);
 
 	drm_dbg(vop2->drm, "vp%d update %s[%dx%d->%dx%d@%dx%d] fmt[%p4cc_%s] addr[%pad]\n",
 		vp->id, win->data->name, actual_w, actual_h, dsp_w, dsp_h,
@@ -1239,6 +1318,9 @@ static void vop2_plane_atomic_update(struct drm_plane *plane,
 		&fb->format->format,
 		afbc_en ? "AFBC" : "", &yrgb_mst);
 
+	if (vop2_cluster_window(win))
+		vop2_win_write(win, VOP2_WIN_AFBC_HALF_BLOCK_EN, half_block_en);
+
 	if (afbc_en) {
 		u32 stride;
 
@@ -1277,15 +1359,21 @@ static void vop2_plane_atomic_update(struct drm_plane *plane,
 			vop2_win_write(win, VOP2_WIN_AFBC_ENABLE, 1);
 		vop2_win_write(win, VOP2_WIN_AFBC_FORMAT, afbc_format);
 		vop2_win_write(win, VOP2_WIN_AFBC_UV_SWAP, uv_swap);
-		vop2_win_write(win, VOP2_WIN_AFBC_AUTO_GATING_EN, 0);
+		/*
+		 * On rk3566/8, this bit is auto gating enable,
+		 * but this function is not work well so we need
+		 * to disable it for these two platform.
+		 * On rk3588, and the following new soc(rk3528/rk3576),
+		 * this bit is gating disable, we should write 1 to
+		 * disable gating when enable afbc.
+		 */
+		if (vop2->data->soc_id == 3566 || vop2->data->soc_id == 3568)
+			vop2_win_write(win, VOP2_WIN_AFBC_AUTO_GATING_EN, 0);
+		else
+			vop2_win_write(win, VOP2_WIN_AFBC_AUTO_GATING_EN, 1);
+
 		vop2_win_write(win, VOP2_WIN_AFBC_BLOCK_SPLIT_EN, 0);
-		if (pstate->rotation & (DRM_MODE_ROTATE_270 | DRM_MODE_ROTATE_90)) {
-			vop2_win_write(win, VOP2_WIN_AFBC_HALF_BLOCK_EN, 0);
-			transform_offset = vop2_afbc_transform_offset(pstate, false);
-		} else {
-			vop2_win_write(win, VOP2_WIN_AFBC_HALF_BLOCK_EN, 1);
-			transform_offset = vop2_afbc_transform_offset(pstate, true);
-		}
+		transform_offset = vop2_afbc_transform_offset(pstate, half_block_en);
 		vop2_win_write(win, VOP2_WIN_AFBC_HDR_PTR, yrgb_mst);
 		vop2_win_write(win, VOP2_WIN_AFBC_PIC_SIZE, act_info);
 		vop2_win_write(win, VOP2_WIN_AFBC_TRANSFORM_OFFSET, transform_offset);
@@ -1297,6 +1385,11 @@ static void vop2_plane_atomic_update(struct drm_plane *plane,
 		vop2_win_write(win, VOP2_WIN_AFBC_ROTATE_270, rotate_270);
 		vop2_win_write(win, VOP2_WIN_AFBC_ROTATE_90, rotate_90);
 	} else {
+		if (vop2_cluster_window(win)) {
+			vop2_win_write(win, VOP2_WIN_AFBC_ENABLE, 0);
+			vop2_win_write(win, VOP2_WIN_AFBC_TRANSFORM_OFFSET, 0);
+		}
+
 		vop2_win_write(win, VOP2_WIN_YRGB_VIR, DIV_ROUND_UP(fb->pitches[0], 4));
 	}
 
@@ -1429,8 +1522,18 @@ static void vop2_post_config(struct drm_crtc *crtc)
 	u32 top_margin = 100, bottom_margin = 100;
 	u16 hsize = hdisplay * (left_margin + right_margin) / 200;
 	u16 vsize = vdisplay * (top_margin + bottom_margin) / 200;
+	u16 hsync_len = mode->crtc_hsync_end - mode->crtc_hsync_start;
 	u16 hact_end, vact_end;
 	u32 val;
+	u32 bg_dly;
+	u32 pre_scan_dly;
+
+	bg_dly = vp->data->pre_scan_max_dly[3];
+	vop2_writel(vp->vop2, RK3568_VP_BG_MIX_CTRL(vp->id),
+		    FIELD_PREP(RK3568_VP_BG_MIX_CTRL__BG_DLY, bg_dly));
+
+	pre_scan_dly = ((bg_dly + (hdisplay >> 1) - 1) << 16) | hsync_len;
+	vop2_vp_write(vp, RK3568_VP_PRE_SCAN_HTIMING, pre_scan_dly);
 
 	vsize = rounddown(vsize, 2);
 	hsize = rounddown(hsize, 2);
@@ -1466,10 +1569,10 @@ static void vop2_post_config(struct drm_crtc *crtc)
 	vop2_vp_write(vp, RK3568_VP_DSP_BG, 0);
 }
 
-static void rk3568_set_intf_mux(struct vop2_video_port *vp, int id,
-				u32 polflags)
+static unsigned long rk3568_set_intf_mux(struct vop2_video_port *vp, int id, u32 polflags)
 {
 	struct vop2 *vop2 = vp->vop2;
+	struct drm_crtc *crtc = &vp->crtc;
 	u32 die, dip;
 
 	die = vop2_readl(vop2, RK3568_DSP_IF_EN);
@@ -1483,9 +1586,9 @@ static void rk3568_set_intf_mux(struct vop2_video_port *vp, int id,
 		dip &= ~RK3568_DSP_IF_POL__RGB_LVDS_PIN_POL;
 		dip |= FIELD_PREP(RK3568_DSP_IF_POL__RGB_LVDS_PIN_POL, polflags);
 		if (polflags & POLFLAG_DCLK_INV)
-			regmap_write(vop2->grf, RK3568_GRF_VO_CON1, BIT(3 + 16) | BIT(3));
+			regmap_write(vop2->sys_grf, RK3568_GRF_VO_CON1, BIT(3 + 16) | BIT(3));
 		else
-			regmap_write(vop2->grf, RK3568_GRF_VO_CON1, BIT(3 + 16));
+			regmap_write(vop2->sys_grf, RK3568_GRF_VO_CON1, BIT(3 + 16));
 		break;
 	case ROCKCHIP_VOP2_EP_HDMI0:
 		die &= ~RK3568_SYS_DSP_INFACE_EN_HDMI_MUX;
@@ -1531,13 +1634,280 @@ static void rk3568_set_intf_mux(struct vop2_video_port *vp, int id,
 		break;
 	default:
 		drm_err(vop2->drm, "Invalid interface id %d on vp%d\n", id, vp->id);
-		return;
+		return 0;
 	}
 
 	dip |= RK3568_DSP_IF_POL__CFG_DONE_IMD;
 
 	vop2_writel(vop2, RK3568_DSP_IF_EN, die);
 	vop2_writel(vop2, RK3568_DSP_IF_POL, dip);
+
+	return crtc->state->adjusted_mode.crtc_clock  * 1000LL;
+}
+
+/*
+ * calc the dclk on rk3588
+ * the available div of dclk is 1, 2, 4
+ */
+static unsigned long rk3588_calc_dclk(unsigned long child_clk, unsigned long max_dclk)
+{
+	if (child_clk * 4 <= max_dclk)
+		return child_clk * 4;
+	else if (child_clk * 2 <= max_dclk)
+		return child_clk * 2;
+	else if (child_clk <= max_dclk)
+		return child_clk;
+	else
+		return 0;
+}
+
+/*
+ * 4 pixclk/cycle on rk3588
+ * RGB/eDP/HDMI: if_pixclk >= dclk_core
+ * DP: dp_pixclk = dclk_out <= dclk_core
+ * DSI: mipi_pixclk <= dclk_out <= dclk_core
+ */
+static unsigned long rk3588_calc_cru_cfg(struct vop2_video_port *vp, int id,
+					 int *dclk_core_div, int *dclk_out_div,
+					 int *if_pixclk_div, int *if_dclk_div)
+{
+	struct vop2 *vop2 = vp->vop2;
+	struct drm_crtc *crtc = &vp->crtc;
+	struct drm_display_mode *adjusted_mode = &crtc->state->adjusted_mode;
+	struct rockchip_crtc_state *vcstate = to_rockchip_crtc_state(crtc->state);
+	int output_mode = vcstate->output_mode;
+	unsigned long v_pixclk = adjusted_mode->crtc_clock * 1000LL; /* video timing pixclk */
+	unsigned long dclk_core_rate = v_pixclk >> 2;
+	unsigned long dclk_rate = v_pixclk;
+	unsigned long dclk_out_rate;
+	unsigned long if_pixclk_rate;
+	int K = 1;
+
+	if (vop2_output_if_is_hdmi(id)) {
+		/*
+		 * K = 2: dclk_core = if_pixclk_rate > if_dclk_rate
+		 * K = 1: dclk_core = hdmie_edp_dclk > if_pixclk_rate
+		 */
+		if (output_mode == ROCKCHIP_OUT_MODE_YUV420) {
+			dclk_rate = dclk_rate >> 1;
+			K = 2;
+		}
+
+		if_pixclk_rate = (dclk_core_rate << 1) / K;
+		/*
+		 * if_dclk_rate = dclk_core_rate / K;
+		 * *if_pixclk_div = dclk_rate / if_pixclk_rate;
+		 * *if_dclk_div = dclk_rate / if_dclk_rate;
+		 */
+		*if_pixclk_div = 2;
+		*if_dclk_div = 4;
+	} else if (vop2_output_if_is_edp(id)) {
+		/*
+		 * edp_pixclk = edp_dclk > dclk_core
+		 */
+		if_pixclk_rate = v_pixclk / K;
+		dclk_rate = if_pixclk_rate * K;
+		/*
+		 * *if_pixclk_div = dclk_rate / if_pixclk_rate;
+		 * *if_dclk_div = *if_pixclk_div;
+		 */
+		*if_pixclk_div = K;
+		*if_dclk_div = K;
+	} else if (vop2_output_if_is_dp(id)) {
+		if (output_mode == ROCKCHIP_OUT_MODE_YUV420)
+			dclk_out_rate = v_pixclk >> 3;
+		else
+			dclk_out_rate = v_pixclk >> 2;
+
+		dclk_rate = rk3588_calc_dclk(dclk_out_rate, 600000);
+		if (!dclk_rate) {
+			drm_err(vop2->drm, "DP dclk_out_rate out of range, dclk_out_rate: %ld KHZ\n",
+				dclk_out_rate);
+			return 0;
+		}
+		*dclk_out_div = dclk_rate / dclk_out_rate;
+	} else if (vop2_output_if_is_mipi(id)) {
+		if_pixclk_rate = dclk_core_rate / K;
+		/*
+		 * dclk_core = dclk_out * K = if_pixclk * K = v_pixclk / 4
+		 */
+		dclk_out_rate = if_pixclk_rate;
+		/*
+		 * dclk_rate = N * dclk_core_rate N = (1,2,4 ),
+		 * we get a little factor here
+		 */
+		dclk_rate = rk3588_calc_dclk(dclk_out_rate, 600000);
+		if (!dclk_rate) {
+			drm_err(vop2->drm, "MIPI dclk out of range, dclk_out_rate: %ld KHZ\n",
+				dclk_out_rate);
+			return 0;
+		}
+		*dclk_out_div = dclk_rate / dclk_out_rate;
+		/*
+		 * mipi pixclk == dclk_out
+		 */
+		*if_pixclk_div = 1;
+	} else if (vop2_output_if_is_dpi(id)) {
+		dclk_rate = v_pixclk;
+	}
+
+	*dclk_core_div = dclk_rate / dclk_core_rate;
+	*if_pixclk_div = ilog2(*if_pixclk_div);
+	*if_dclk_div = ilog2(*if_dclk_div);
+	*dclk_core_div = ilog2(*dclk_core_div);
+	*dclk_out_div = ilog2(*dclk_out_div);
+
+	drm_dbg(vop2->drm, "dclk: %ld, pixclk_div: %d, dclk_div: %d\n",
+		dclk_rate, *if_pixclk_div, *if_dclk_div);
+
+	return dclk_rate;
+}
+
+/*
+ * MIPI port mux on rk3588:
+ * 0: Video Port2
+ * 1: Video Port3
+ * 3: Video Port 1(MIPI1 only)
+ */
+static u32 rk3588_get_mipi_port_mux(int vp_id)
+{
+	if (vp_id == 1)
+		return 3;
+	else if (vp_id == 3)
+		return 1;
+	else
+		return 0;
+}
+
+static u32 rk3588_get_hdmi_pol(u32 flags)
+{
+	u32 val;
+
+	val = (flags & DRM_MODE_FLAG_NHSYNC) ? BIT(HSYNC_POSITIVE) : 0;
+	val |= (flags & DRM_MODE_FLAG_NVSYNC) ? BIT(VSYNC_POSITIVE) : 0;
+
+	return val;
+}
+
+static unsigned long rk3588_set_intf_mux(struct vop2_video_port *vp, int id, u32 polflags)
+{
+	struct vop2 *vop2 = vp->vop2;
+	int dclk_core_div, dclk_out_div, if_pixclk_div, if_dclk_div;
+	unsigned long clock;
+	u32 die, dip, div, vp_clk_div, val;
+
+	clock = rk3588_calc_cru_cfg(vp, id, &dclk_core_div, &dclk_out_div,
+				    &if_pixclk_div, &if_dclk_div);
+	if (!clock)
+		return 0;
+
+	vp_clk_div = FIELD_PREP(RK3588_VP_CLK_CTRL__DCLK_CORE_DIV, dclk_core_div);
+	vp_clk_div |= FIELD_PREP(RK3588_VP_CLK_CTRL__DCLK_OUT_DIV, dclk_out_div);
+
+	die = vop2_readl(vop2, RK3568_DSP_IF_EN);
+	dip = vop2_readl(vop2, RK3568_DSP_IF_POL);
+	div = vop2_readl(vop2, RK3568_DSP_IF_CTRL);
+
+	switch (id) {
+	case ROCKCHIP_VOP2_EP_HDMI0:
+		div &= ~RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV;
+		div &= ~RK3588_DSP_IF_EDP_HDMI0_PCLK_DIV;
+		div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV, if_dclk_div);
+		div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI0_PCLK_DIV, if_pixclk_div);
+		die &= ~RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX;
+		die |= RK3588_SYS_DSP_INFACE_EN_HDMI0 |
+			    FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX, vp->id);
+		val = rk3588_get_hdmi_pol(polflags);
+		regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, HIWORD_UPDATE(1, 1, 1));
+		regmap_write(vop2->vo1_grf, RK3588_GRF_VO1_CON0, HIWORD_UPDATE(val, 6, 5));
+		break;
+	case ROCKCHIP_VOP2_EP_HDMI1:
+		div &= ~RK3588_DSP_IF_EDP_HDMI1_DCLK_DIV;
+		div &= ~RK3588_DSP_IF_EDP_HDMI1_PCLK_DIV;
+		div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI1_DCLK_DIV, if_dclk_div);
+		div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI1_PCLK_DIV, if_pixclk_div);
+		die &= ~RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX;
+		die |= RK3588_SYS_DSP_INFACE_EN_HDMI1 |
+			    FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX, vp->id);
+		val = rk3588_get_hdmi_pol(polflags);
+		regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, HIWORD_UPDATE(1, 4, 4));
+		regmap_write(vop2->vo1_grf, RK3588_GRF_VO1_CON0, HIWORD_UPDATE(val, 8, 7));
+		break;
+	case ROCKCHIP_VOP2_EP_EDP0:
+		div &= ~RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV;
+		div &= ~RK3588_DSP_IF_EDP_HDMI0_PCLK_DIV;
+		div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV, if_dclk_div);
+		div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI0_PCLK_DIV, if_pixclk_div);
+		die &= ~RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX;
+		die |= RK3588_SYS_DSP_INFACE_EN_EDP0 |
+			   FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX, vp->id);
+		regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, HIWORD_UPDATE(1, 0, 0));
+		break;
+	case ROCKCHIP_VOP2_EP_EDP1:
+		div &= ~RK3588_DSP_IF_EDP_HDMI1_DCLK_DIV;
+		div &= ~RK3588_DSP_IF_EDP_HDMI1_PCLK_DIV;
+		div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV, if_dclk_div);
+		div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI0_PCLK_DIV, if_pixclk_div);
+		die &= ~RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX;
+		die |= RK3588_SYS_DSP_INFACE_EN_EDP1 |
+			   FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX, vp->id);
+		regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, HIWORD_UPDATE(1, 3, 3));
+		break;
+	case ROCKCHIP_VOP2_EP_MIPI0:
+		div &= ~RK3588_DSP_IF_MIPI0_PCLK_DIV;
+		div |= FIELD_PREP(RK3588_DSP_IF_MIPI0_PCLK_DIV, if_pixclk_div);
+		die &= ~RK3588_SYS_DSP_INFACE_EN_MIPI0_MUX;
+		val = rk3588_get_mipi_port_mux(vp->id);
+		die |= RK3588_SYS_DSP_INFACE_EN_MIPI0 |
+			   FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_MIPI0_MUX, !!val);
+		break;
+	case ROCKCHIP_VOP2_EP_MIPI1:
+		div &= ~RK3588_DSP_IF_MIPI1_PCLK_DIV;
+		div |= FIELD_PREP(RK3588_DSP_IF_MIPI1_PCLK_DIV, if_pixclk_div);
+		die &= ~RK3588_SYS_DSP_INFACE_EN_MIPI1_MUX;
+		val = rk3588_get_mipi_port_mux(vp->id);
+		die |= RK3588_SYS_DSP_INFACE_EN_MIPI1 |
+			   FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_MIPI1_MUX, val);
+		break;
+	case ROCKCHIP_VOP2_EP_DP0:
+		die &= ~RK3588_SYS_DSP_INFACE_EN_DP0_MUX;
+		die |= RK3588_SYS_DSP_INFACE_EN_DP0 |
+			   FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_DP0_MUX, vp->id);
+		dip &= ~RK3588_DSP_IF_POL__DP0_PIN_POL;
+		dip |= FIELD_PREP(RK3588_DSP_IF_POL__DP0_PIN_POL, polflags);
+		break;
+	case ROCKCHIP_VOP2_EP_DP1:
+		die &= ~RK3588_SYS_DSP_INFACE_EN_MIPI1_MUX;
+		die |= RK3588_SYS_DSP_INFACE_EN_MIPI1 |
+			   FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_MIPI1_MUX, vp->id);
+		dip &= ~RK3588_DSP_IF_POL__DP1_PIN_POL;
+		dip |= FIELD_PREP(RK3588_DSP_IF_POL__DP1_PIN_POL, polflags);
+		break;
+	default:
+		drm_err(vop2->drm, "Invalid interface id %d on vp%d\n", id, vp->id);
+		return 0;
+	}
+
+	dip |= RK3568_DSP_IF_POL__CFG_DONE_IMD;
+
+	vop2_vp_write(vp, RK3588_VP_CLK_CTRL, vp_clk_div);
+	vop2_writel(vop2, RK3568_DSP_IF_EN, die);
+	vop2_writel(vop2, RK3568_DSP_IF_CTRL, div);
+	vop2_writel(vop2, RK3568_DSP_IF_POL, dip);
+
+	return clock;
+}
+
+static unsigned long vop2_set_intf_mux(struct vop2_video_port *vp, int ep_id, u32 polflags)
+{
+	struct vop2 *vop2 = vp->vop2;
+
+	if (vop2->data->soc_id == 3566 || vop2->data->soc_id == 3568)
+		return rk3568_set_intf_mux(vp, ep_id, polflags);
+	else if (vop2->data->soc_id == 3588)
+		return rk3588_set_intf_mux(vp, ep_id, polflags);
+	else
+		return 0;
 }
 
 static int us_to_vertical_line(struct drm_display_mode *mode, int us)
@@ -1592,6 +1962,8 @@ static void vop2_crtc_atomic_enable(struct drm_crtc *crtc,
 
 	vop2->enable_count++;
 
+	vcstate->yuv_overlay = is_yuv_output(vcstate->bus_format);
+
 	vop2_crtc_enable_irq(vp, VP_INT_POST_BUF_EMPTY);
 
 	polflags = 0;
@@ -1605,11 +1977,21 @@ static void vop2_crtc_atomic_enable(struct drm_crtc *crtc,
 	drm_for_each_encoder_mask(encoder, crtc->dev, crtc_state->encoder_mask) {
 		struct rockchip_encoder *rkencoder = to_rockchip_encoder(encoder);
 
-		rk3568_set_intf_mux(vp, rkencoder->crtc_endpoint_id, polflags);
+		/*
+		 * for drive a high resolution(4KP120, 8K), vop on rk3588/rk3576 need
+		 * process multi(1/2/4/8) pixels per cycle, so the dclk feed by the
+		 * system cru may be the 1/2 or 1/4 of mode->clock.
+		 */
+		clock = vop2_set_intf_mux(vp, rkencoder->crtc_endpoint_id, polflags);
+	}
+
+	if (!clock) {
+		vop2_unlock(vop2);
+		return;
 	}
 
 	if (vcstate->output_mode == ROCKCHIP_OUT_MODE_AAAA &&
-	    !(vp_data->feature & VOP_FEATURE_OUTPUT_10BIT))
+	    !(vp_data->feature & VOP2_VP_FEATURE_OUTPUT_10BIT))
 		out_mode = ROCKCHIP_OUT_MODE_P888;
 	else
 		out_mode = vcstate->output_mode;
@@ -1618,8 +2000,10 @@ static void vop2_crtc_atomic_enable(struct drm_crtc *crtc,
 
 	if (vop2_output_uv_swap(vcstate->bus_format, vcstate->output_mode))
 		dsp_ctrl |= RK3568_VP_DSP_CTRL__DSP_RB_SWAP;
+	if (vop2_output_rg_swap(vop2, vcstate->bus_format))
+		dsp_ctrl |= RK3568_VP_DSP_CTRL__DSP_RG_SWAP;
 
-	if (is_yuv_output(vcstate->bus_format))
+	if (vcstate->yuv_overlay)
 		dsp_ctrl |= RK3568_VP_DSP_CTRL__POST_DSP_OUT_R2Y;
 
 	vop2_dither_setup(crtc, &dsp_ctrl);
@@ -1923,28 +2307,22 @@ static void vop2_setup_layer_mixer(struct vop2_video_port *vp)
 	u32 layer_sel = 0;
 	u32 port_sel;
 	unsigned int nlayer, ofs;
-	struct drm_display_mode *adjusted_mode;
-	u16 hsync_len;
-	u16 hdisplay;
-	u32 bg_dly;
-	u32 pre_scan_dly;
+	u32 ovl_ctrl;
 	int i;
 	struct vop2_video_port *vp0 = &vop2->vps[0];
 	struct vop2_video_port *vp1 = &vop2->vps[1];
 	struct vop2_video_port *vp2 = &vop2->vps[2];
+	struct rockchip_crtc_state *vcstate = to_rockchip_crtc_state(vp->crtc.state);
 
-	adjusted_mode = &vp->crtc.state->adjusted_mode;
-	hsync_len = adjusted_mode->crtc_hsync_end - adjusted_mode->crtc_hsync_start;
-	hdisplay = adjusted_mode->crtc_hdisplay;
-
-	bg_dly = vp->data->pre_scan_max_dly[3];
-	vop2_writel(vop2, RK3568_VP_BG_MIX_CTRL(vp->id),
-		    FIELD_PREP(RK3568_VP_BG_MIX_CTRL__BG_DLY, bg_dly));
+	ovl_ctrl = vop2_readl(vop2, RK3568_OVL_CTRL);
+	ovl_ctrl |= RK3568_OVL_CTRL__LAYERSEL_REGDONE_IMD;
+	if (vcstate->yuv_overlay)
+		ovl_ctrl |= RK3568_OVL_CTRL__YUV_MODE(vp->id);
+	else
+		ovl_ctrl &= ~RK3568_OVL_CTRL__YUV_MODE(vp->id);
 
-	pre_scan_dly = ((bg_dly + (hdisplay >> 1) - 1) << 16) | hsync_len;
-	vop2_vp_write(vp, RK3568_VP_PRE_SCAN_HTIMING, pre_scan_dly);
+	vop2_writel(vop2, RK3568_OVL_CTRL, ovl_ctrl);
 
-	vop2_writel(vop2, RK3568_OVL_CTRL, 0);
 	port_sel = vop2_readl(vop2, RK3568_OVL_PORT_SEL);
 	port_sel &= RK3568_OVL_PORT_SEL__SEL_PORT;
 
@@ -1985,6 +2363,14 @@ static void vop2_setup_layer_mixer(struct vop2_video_port *vp)
 			port_sel &= ~RK3568_OVL_PORT_SEL__CLUSTER1;
 			port_sel |= FIELD_PREP(RK3568_OVL_PORT_SEL__CLUSTER1, vp->id);
 			break;
+		case ROCKCHIP_VOP2_CLUSTER2:
+			port_sel &= ~RK3588_OVL_PORT_SEL__CLUSTER2;
+			port_sel |= FIELD_PREP(RK3588_OVL_PORT_SEL__CLUSTER2, vp->id);
+			break;
+		case ROCKCHIP_VOP2_CLUSTER3:
+			port_sel &= ~RK3588_OVL_PORT_SEL__CLUSTER3;
+			port_sel |= FIELD_PREP(RK3588_OVL_PORT_SEL__CLUSTER3, vp->id);
+			break;
 		case ROCKCHIP_VOP2_ESMART0:
 			port_sel &= ~RK3568_OVL_PORT_SEL__ESMART0;
 			port_sel |= FIELD_PREP(RK3568_OVL_PORT_SEL__ESMART0, vp->id);
@@ -1993,6 +2379,14 @@ static void vop2_setup_layer_mixer(struct vop2_video_port *vp)
 			port_sel &= ~RK3568_OVL_PORT_SEL__ESMART1;
 			port_sel |= FIELD_PREP(RK3568_OVL_PORT_SEL__ESMART1, vp->id);
 			break;
+		case ROCKCHIP_VOP2_ESMART2:
+			port_sel &= ~RK3588_OVL_PORT_SEL__ESMART2;
+			port_sel |= FIELD_PREP(RK3588_OVL_PORT_SEL__ESMART2, vp->id);
+			break;
+		case ROCKCHIP_VOP2_ESMART3:
+			port_sel &= ~RK3588_OVL_PORT_SEL__ESMART3;
+			port_sel |= FIELD_PREP(RK3588_OVL_PORT_SEL__ESMART3, vp->id);
+			break;
 		case ROCKCHIP_VOP2_SMART0:
 			port_sel &= ~RK3568_OVL_PORT_SEL__SMART0;
 			port_sel |= FIELD_PREP(RK3568_OVL_PORT_SEL__SMART0, vp->id);
@@ -2018,7 +2412,6 @@ static void vop2_setup_layer_mixer(struct vop2_video_port *vp)
 
 	vop2_writel(vop2, RK3568_OVL_LAYER_SEL, layer_sel);
 	vop2_writel(vop2, RK3568_OVL_PORT_SEL, port_sel);
-	vop2_writel(vop2, RK3568_OVL_CTRL, RK3568_OVL_CTRL__LAYERSEL_REGDONE_IMD);
 }
 
 static void vop2_setup_dly_for_windows(struct vop2 *vop2)
@@ -2730,8 +3123,29 @@ static int vop2_bind(struct device *dev, struct device *master, void *data)
 		if (IS_ERR(vop2->lut_regs))
 			return PTR_ERR(vop2->lut_regs);
 	}
+	if (vop2_data->feature & VOP2_FEATURE_HAS_SYS_GRF) {
+		vop2->sys_grf = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,grf");
+		if (IS_ERR(vop2->sys_grf))
+			return dev_err_probe(dev, PTR_ERR(vop2->sys_grf), "cannot get sys_grf");
+	}
 
-	vop2->grf = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,grf");
+	if (vop2_data->feature & VOP2_FEATURE_HAS_VOP_GRF) {
+		vop2->vop_grf = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,vop-grf");
+		if (IS_ERR(vop2->vop_grf))
+			return dev_err_probe(dev, PTR_ERR(vop2->vop_grf), "cannot get vop_grf");
+	}
+
+	if (vop2_data->feature & VOP2_FEATURE_HAS_VO1_GRF) {
+		vop2->vo1_grf = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,vo1-grf");
+		if (IS_ERR(vop2->vo1_grf))
+			return dev_err_probe(dev, PTR_ERR(vop2->vo1_grf), "cannot get vo1_grf");
+	}
+
+	if (vop2_data->feature & VOP2_FEATURE_HAS_SYS_PMU) {
+		vop2->sys_pmu = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,pmu");
+		if (IS_ERR(vop2->sys_pmu))
+			return dev_err_probe(dev, PTR_ERR(vop2->sys_pmu), "cannot get sys_pmu");
+	}
 
 	vop2->hclk = devm_clk_get(vop2->dev, "hclk");
 	if (IS_ERR(vop2->hclk)) {
@@ -2745,6 +3159,12 @@ static int vop2_bind(struct device *dev, struct device *master, void *data)
 		return PTR_ERR(vop2->aclk);
 	}
 
+	vop2->pclk = devm_clk_get_optional(vop2->dev, "pclk_vop");
+	if (IS_ERR(vop2->pclk)) {
+		drm_err(vop2->drm, "failed to get pclk source\n");
+		return PTR_ERR(vop2->pclk);
+	}
+
 	vop2->irq = platform_get_irq(pdev, 0);
 	if (vop2->irq < 0) {
 		drm_err(vop2->drm, "cannot find irq for vop2\n");
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h
index 56fd31e05238..615a16196aff 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h
@@ -7,16 +7,22 @@
 #ifndef _ROCKCHIP_DRM_VOP2_H
 #define _ROCKCHIP_DRM_VOP2_H
 
-#include "rockchip_drm_vop.h"
-
 #include <linux/regmap.h>
 #include <drm/drm_modes.h>
+#include "rockchip_drm_vop.h"
 
-#define VOP_FEATURE_OUTPUT_10BIT        BIT(0)
+#define VOP2_VP_FEATURE_OUTPUT_10BIT        BIT(0)
+
+#define VOP2_FEATURE_HAS_SYS_GRF	BIT(0)
+#define VOP2_FEATURE_HAS_VO0_GRF	BIT(1)
+#define VOP2_FEATURE_HAS_VO1_GRF	BIT(2)
+#define VOP2_FEATURE_HAS_VOP_GRF	BIT(3)
+#define VOP2_FEATURE_HAS_SYS_PMU	BIT(4)
 
 #define WIN_FEATURE_AFBDC		BIT(0)
 #define WIN_FEATURE_CLUSTER		BIT(1)
 
+#define HIWORD_UPDATE(v, h, l)  ((GENMASK(h, l) << 16) | ((v) << (l)))
 /*
  *  the delay number of a window in different mode.
  */
@@ -39,6 +45,18 @@ enum vop2_scale_down_mode {
 	VOP2_SCALE_DOWN_AVG,
 };
 
+/*
+ * vop2 internal power domain id,
+ * should be all none zero, 0 will be treat as invalid;
+ */
+#define VOP2_PD_CLUSTER0	BIT(0)
+#define VOP2_PD_CLUSTER1	BIT(1)
+#define VOP2_PD_CLUSTER2	BIT(2)
+#define VOP2_PD_CLUSTER3	BIT(3)
+#define VOP2_PD_DSC_8K		BIT(5)
+#define VOP2_PD_DSC_4K		BIT(6)
+#define VOP2_PD_ESMART		BIT(7)
+
 enum vop2_win_regs {
 	VOP2_WIN_ENABLE,
 	VOP2_WIN_FORMAT,
@@ -139,6 +157,7 @@ struct vop2_video_port_data {
 
 struct vop2_data {
 	u8 nr_vps;
+	u64 feature;
 	const struct vop2_win_data *win;
 	const struct vop2_video_port_data *vp;
 	struct vop_rect max_input;
@@ -166,19 +185,6 @@ struct vop2_data {
 #define WB_YRGB_FIFO_FULL_INTR		BIT(18)
 #define WB_COMPLETE_INTR		BIT(19)
 
-/*
- * display output interface supported by rockchip lcdc
- */
-#define ROCKCHIP_OUT_MODE_P888		0
-#define ROCKCHIP_OUT_MODE_BT1120	0
-#define ROCKCHIP_OUT_MODE_P666		1
-#define ROCKCHIP_OUT_MODE_P565		2
-#define ROCKCHIP_OUT_MODE_BT656		5
-#define ROCKCHIP_OUT_MODE_S888		8
-#define ROCKCHIP_OUT_MODE_S888_DUMMY	12
-#define ROCKCHIP_OUT_MODE_YUV420	14
-/* for use special outface */
-#define ROCKCHIP_OUT_MODE_AAAA		15
 
 enum vop_csc_format {
 	CSC_BT601L,
@@ -206,6 +212,11 @@ enum dst_factor_mode {
 };
 
 #define RK3568_GRF_VO_CON1			0x0364
+
+#define RK3588_GRF_SOC_CON1			0x0304
+#define RK3588_GRF_VOP_CON2			0x08
+#define RK3588_GRF_VO1_CON0			0x00
+
 /* System registers definition */
 #define RK3568_REG_CFG_DONE			0x000
 #define RK3568_VERSION_INFO			0x004
@@ -214,6 +225,7 @@ enum dst_factor_mode {
 #define RK3568_DSP_IF_EN			0x028
 #define RK3568_DSP_IF_CTRL			0x02c
 #define RK3568_DSP_IF_POL			0x030
+#define RK3588_SYS_PD_CTRL			0x034
 #define RK3568_WB_CTRL				0x40
 #define RK3568_WB_XSCAL_FACTOR			0x44
 #define RK3568_WB_YRGB_MST			0x48
@@ -234,9 +246,14 @@ enum dst_factor_mode {
 #define RK3568_VP_INT_RAW_STATUS(vp)		(0xAC + (vp) * 0x10)
 
 /* Video Port registers definition */
+#define RK3568_VP0_CTRL_BASE			0x0C00
+#define RK3568_VP1_CTRL_BASE			0x0D00
+#define RK3568_VP2_CTRL_BASE			0x0E00
+#define RK3588_VP3_CTRL_BASE			0x0F00
 #define RK3568_VP_DSP_CTRL			0x00
 #define RK3568_VP_MIPI_CTRL			0x04
 #define RK3568_VP_COLOR_BAR_CTRL		0x08
+#define RK3588_VP_CLK_CTRL			0x0C
 #define RK3568_VP_3D_LUT_CTRL			0x10
 #define RK3568_VP_3D_LUT_MST			0x20
 #define RK3568_VP_DSP_BG			0x2C
@@ -278,6 +295,17 @@ enum dst_factor_mode {
 #define RK3568_SMART_DLY_NUM			0x6F8
 
 /* Cluster register definition, offset relative to window base */
+#define RK3568_CLUSTER0_CTRL_BASE		0x1000
+#define RK3568_CLUSTER1_CTRL_BASE		0x1200
+#define RK3588_CLUSTER2_CTRL_BASE		0x1400
+#define RK3588_CLUSTER3_CTRL_BASE		0x1600
+#define RK3568_ESMART0_CTRL_BASE		0x1800
+#define RK3568_ESMART1_CTRL_BASE		0x1A00
+#define RK3568_SMART0_CTRL_BASE			0x1C00
+#define RK3568_SMART1_CTRL_BASE			0x1E00
+#define RK3588_ESMART2_CTRL_BASE		0x1C00
+#define RK3588_ESMART3_CTRL_BASE		0x1E00
+
 #define RK3568_CLUSTER_WIN_CTRL0		0x00
 #define RK3568_CLUSTER_WIN_CTRL1		0x04
 #define RK3568_CLUSTER_WIN_YRGB_MST		0x10
@@ -371,13 +399,18 @@ enum dst_factor_mode {
 #define RK3568_VP_DSP_CTRL__DITHER_DOWN_EN		BIT(17)
 #define RK3568_VP_DSP_CTRL__PRE_DITHER_DOWN_EN		BIT(16)
 #define RK3568_VP_DSP_CTRL__POST_DSP_OUT_R2Y		BIT(15)
+#define RK3568_VP_DSP_CTRL__DSP_RG_SWAP			BIT(10)
 #define RK3568_VP_DSP_CTRL__DSP_RB_SWAP			BIT(9)
+#define RK3568_VP_DSP_CTRL__DSP_BG_SWAP			BIT(8)
 #define RK3568_VP_DSP_CTRL__DSP_INTERLACE		BIT(7)
 #define RK3568_VP_DSP_CTRL__DSP_FILED_POL		BIT(6)
 #define RK3568_VP_DSP_CTRL__P2I_EN			BIT(5)
 #define RK3568_VP_DSP_CTRL__CORE_DCLK_DIV		BIT(4)
 #define RK3568_VP_DSP_CTRL__OUT_MODE			GENMASK(3, 0)
 
+#define RK3588_VP_CLK_CTRL__DCLK_OUT_DIV		GENMASK(3, 2)
+#define RK3588_VP_CLK_CTRL__DCLK_CORE_DIV		GENMASK(1, 0)
+
 #define RK3568_VP_POST_SCL_CTRL__VSCALEDOWN		BIT(1)
 #define RK3568_VP_POST_SCL_CTRL__HSCALEDOWN		BIT(0)
 
@@ -396,11 +429,37 @@ enum dst_factor_mode {
 #define RK3568_SYS_DSP_INFACE_EN_HDMI			BIT(1)
 #define RK3568_SYS_DSP_INFACE_EN_RGB			BIT(0)
 
+#define RK3588_SYS_DSP_INFACE_EN_MIPI1_MUX		GENMASK(22, 21)
+#define RK3588_SYS_DSP_INFACE_EN_MIPI0_MUX		GENMASK(20, 20)
+#define RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX		GENMASK(19, 18)
+#define RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX		GENMASK(17, 16)
+#define RK3588_SYS_DSP_INFACE_EN_DP1_MUX		GENMASK(15, 14)
+#define RK3588_SYS_DSP_INFACE_EN_DP0_MUX		GENMASK(13, 12)
+#define RK3588_SYS_DSP_INFACE_EN_DPI			GENMASK(9, 8)
+#define RK3588_SYS_DSP_INFACE_EN_MIPI1			BIT(7)
+#define RK3588_SYS_DSP_INFACE_EN_MIPI0			BIT(6)
+#define RK3588_SYS_DSP_INFACE_EN_HDMI1			BIT(5)
+#define RK3588_SYS_DSP_INFACE_EN_EDP1			BIT(4)
+#define RK3588_SYS_DSP_INFACE_EN_HDMI0			BIT(3)
+#define RK3588_SYS_DSP_INFACE_EN_EDP0			BIT(2)
+#define RK3588_SYS_DSP_INFACE_EN_DP1			BIT(1)
+#define RK3588_SYS_DSP_INFACE_EN_DP0			BIT(0)
+
+#define RK3588_DSP_IF_MIPI1_PCLK_DIV			GENMASK(27, 26)
+#define RK3588_DSP_IF_MIPI0_PCLK_DIV			GENMASK(25, 24)
+#define RK3588_DSP_IF_EDP_HDMI1_PCLK_DIV		GENMASK(22, 22)
+#define RK3588_DSP_IF_EDP_HDMI1_DCLK_DIV		GENMASK(21, 20)
+#define RK3588_DSP_IF_EDP_HDMI0_PCLK_DIV		GENMASK(18, 18)
+#define RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV		GENMASK(17, 16)
+
 #define RK3568_DSP_IF_POL__MIPI_PIN_POL			GENMASK(19, 16)
 #define RK3568_DSP_IF_POL__EDP_PIN_POL			GENMASK(15, 12)
 #define RK3568_DSP_IF_POL__HDMI_PIN_POL			GENMASK(7, 4)
 #define RK3568_DSP_IF_POL__RGB_LVDS_PIN_POL		GENMASK(3, 0)
 
+#define RK3588_DSP_IF_POL__DP1_PIN_POL			GENMASK(14, 12)
+#define RK3588_DSP_IF_POL__DP0_PIN_POL			GENMASK(10, 8)
+
 #define RK3568_VP0_MIPI_CTRL__DCLK_DIV2_PHASE_LOCK	BIT(5)
 #define RK3568_VP0_MIPI_CTRL__DCLK_DIV2			BIT(4)
 
@@ -415,14 +474,19 @@ enum dst_factor_mode {
 #define VOP2_COLOR_KEY_MASK				BIT(31)
 
 #define RK3568_OVL_CTRL__LAYERSEL_REGDONE_IMD		BIT(28)
+#define RK3568_OVL_CTRL__YUV_MODE(vp)			BIT(vp)
 
 #define RK3568_VP_BG_MIX_CTRL__BG_DLY			GENMASK(31, 24)
 
 #define RK3568_OVL_PORT_SEL__SEL_PORT			GENMASK(31, 16)
 #define RK3568_OVL_PORT_SEL__SMART1			GENMASK(31, 30)
 #define RK3568_OVL_PORT_SEL__SMART0			GENMASK(29, 28)
+#define RK3588_OVL_PORT_SEL__ESMART3			GENMASK(31, 30)
+#define RK3588_OVL_PORT_SEL__ESMART2			GENMASK(29, 28)
 #define RK3568_OVL_PORT_SEL__ESMART1			GENMASK(27, 26)
 #define RK3568_OVL_PORT_SEL__ESMART0			GENMASK(25, 24)
+#define RK3588_OVL_PORT_SEL__CLUSTER3			GENMASK(23, 22)
+#define RK3588_OVL_PORT_SEL__CLUSTER2			GENMASK(21, 20)
 #define RK3568_OVL_PORT_SEL__CLUSTER1			GENMASK(19, 18)
 #define RK3568_OVL_PORT_SEL__CLUSTER0			GENMASK(17, 16)
 #define RK3568_OVL_PORT_SET__PORT2_MUX			GENMASK(11, 8)
@@ -435,6 +499,10 @@ enum dst_factor_mode {
 #define RK3568_CLUSTER_DLY_NUM__CLUSTER0_1		GENMASK(15, 8)
 #define RK3568_CLUSTER_DLY_NUM__CLUSTER0_0		GENMASK(7, 0)
 
+#define RK3568_CLUSTER_WIN_CTRL0__WIN0_EN		BIT(0)
+
+#define RK3568_SMART_REGION0_CTRL__WIN0_EN		BIT(0)
+
 #define RK3568_SMART_DLY_NUM__SMART1			GENMASK(31, 24)
 #define RK3568_SMART_DLY_NUM__SMART0			GENMASK(23, 16)
 #define RK3568_SMART_DLY_NUM__ESMART1			GENMASK(15, 8)
diff --git a/drivers/gpu/drm/rockchip/rockchip_lvds.c b/drivers/gpu/drm/rockchip/rockchip_lvds.c
index f0f47e9abf5a..59341654ec32 100644
--- a/drivers/gpu/drm/rockchip/rockchip_lvds.c
+++ b/drivers/gpu/drm/rockchip/rockchip_lvds.c
@@ -27,7 +27,6 @@
 #include <drm/drm_simple_kms_helper.h>
 
 #include "rockchip_drm_drv.h"
-#include "rockchip_drm_vop.h"
 #include "rockchip_lvds.h"
 
 #define DISPLAY_OUTPUT_RGB		0
diff --git a/drivers/gpu/drm/rockchip/rockchip_rgb.c b/drivers/gpu/drm/rockchip/rockchip_rgb.c
index c677b71ae516..dbfbde24698e 100644
--- a/drivers/gpu/drm/rockchip/rockchip_rgb.c
+++ b/drivers/gpu/drm/rockchip/rockchip_rgb.c
@@ -19,7 +19,6 @@
 #include <drm/drm_simple_kms_helper.h>
 
 #include "rockchip_drm_drv.h"
-#include "rockchip_drm_vop.h"
 #include "rockchip_rgb.h"
 
 struct rockchip_rgb {
diff --git a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
index 2c45d81983a5..48170694ac6b 100644
--- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
+++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
@@ -34,6 +34,30 @@ static const uint32_t formats_cluster[] = {
 	DRM_FORMAT_Y210, /* yuv422_10bit non-Linear mode only */
 };
 
+static const uint32_t formats_esmart[] = {
+	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_ARGB8888,
+	DRM_FORMAT_XBGR8888,
+	DRM_FORMAT_ABGR8888,
+	DRM_FORMAT_RGB888,
+	DRM_FORMAT_BGR888,
+	DRM_FORMAT_RGB565,
+	DRM_FORMAT_BGR565,
+	DRM_FORMAT_NV12, /* yuv420_8bit linear mode, 2 plane */
+	DRM_FORMAT_NV21, /* yvu420_8bit linear mode, 2 plane */
+	DRM_FORMAT_NV16, /* yuv422_8bit linear mode, 2 plane */
+	DRM_FORMAT_NV61, /* yvu422_8bit linear mode, 2 plane */
+	DRM_FORMAT_NV20, /* yuv422_10bit linear mode, 2 plane, no padding */
+	DRM_FORMAT_NV24, /* yuv444_8bit linear mode, 2 plane */
+	DRM_FORMAT_NV42, /* yvu444_8bit linear mode, 2 plane */
+	DRM_FORMAT_NV30, /* yuv444_10bit linear mode, 2 plane, no padding */
+	DRM_FORMAT_NV15, /* yuv420_10bit linear mode, 2 plane, no padding */
+	DRM_FORMAT_YVYU, /* yuv422_8bit[YVYU] linear mode */
+	DRM_FORMAT_VYUY, /* yuv422_8bit[VYUY] linear mode */
+	DRM_FORMAT_YUYV, /* yuv422_8bit[YUYV] linear mode */
+	DRM_FORMAT_UYVY, /* yuv422_8bit[UYVY] linear mode */
+};
+
 static const uint32_t formats_rk356x_esmart[] = {
 	DRM_FORMAT_XRGB8888,
 	DRM_FORMAT_ARGB8888,
@@ -112,7 +136,7 @@ static const uint64_t format_modifiers_afbc[] = {
 static const struct vop2_video_port_data rk3568_vop_video_ports[] = {
 	{
 		.id = 0,
-		.feature = VOP_FEATURE_OUTPUT_10BIT,
+		.feature = VOP2_VP_FEATURE_OUTPUT_10BIT,
 		.gamma_lut_len = 1024,
 		.cubic_lut_len = 9 * 9 * 9,
 		.max_output = { 4096, 2304 },
@@ -236,7 +260,188 @@ static const struct vop2_win_data rk3568_vop_win_data[] = {
 	},
 };
 
+static const struct vop2_video_port_data rk3588_vop_video_ports[] = {
+	{
+		.id = 0,
+		.feature = VOP2_VP_FEATURE_OUTPUT_10BIT,
+		.gamma_lut_len = 1024,
+		.cubic_lut_len = 9 * 9 * 9, /* 9x9x9 */
+		.max_output = { 4096, 2304 },
+		/* hdr2sdr sdr2hdr hdr2hdr sdr2sdr */
+		.pre_scan_max_dly = { 76, 65, 65, 54 },
+		.offset = 0xc00,
+	}, {
+		.id = 1,
+		.feature = VOP2_VP_FEATURE_OUTPUT_10BIT,
+		.gamma_lut_len = 1024,
+		.cubic_lut_len = 729, /* 9x9x9 */
+		.max_output = { 4096, 2304 },
+		.pre_scan_max_dly = { 76, 65, 65, 54 },
+		.offset = 0xd00,
+	}, {
+		.id = 2,
+		.feature = VOP2_VP_FEATURE_OUTPUT_10BIT,
+		.gamma_lut_len = 1024,
+		.cubic_lut_len = 17 * 17 * 17, /* 17x17x17 */
+		.max_output = { 4096, 2304 },
+		.pre_scan_max_dly = { 52, 52, 52, 52 },
+		.offset = 0xe00,
+	}, {
+		.id = 3,
+		.gamma_lut_len = 1024,
+		.max_output = { 2048, 1536 },
+		.pre_scan_max_dly = { 52, 52, 52, 52 },
+		.offset = 0xf00,
+	},
+};
+
+/*
+ * rk3588 vop with 4 cluster, 4 esmart win.
+ * Every cluster can work as 4K win or split into two win.
+ * All win in cluster support AFBCD.
+ *
+ * Every esmart win and smart win support 4 Multi-region.
+ *
+ * Scale filter mode:
+ *
+ * * Cluster:  bicubic for horizontal scale up, others use bilinear
+ * * ESmart:
+ *    * nearest-neighbor/bilinear/bicubic for scale up
+ *    * nearest-neighbor/bilinear/average for scale down
+ *
+ * AXI Read ID assignment:
+ * Two AXI bus:
+ * AXI0 is a read/write bus with a higher performance.
+ * AXI1 is a read only bus.
+ *
+ * Every window on a AXI bus must assigned two unique
+ * read id(yrgb_id/uv_id, valid id are 0x1~0xe).
+ *
+ * AXI0:
+ * Cluster0/1, Esmart0/1, WriteBack
+ *
+ * AXI 1:
+ * Cluster2/3, Esmart2/3
+ *
+ */
+static const struct vop2_win_data rk3588_vop_win_data[] = {
+	{
+		.name = "Cluster0-win0",
+		.phys_id = ROCKCHIP_VOP2_CLUSTER0,
+		.base = 0x1000,
+		.formats = formats_cluster,
+		.nformats = ARRAY_SIZE(formats_cluster),
+		.format_modifiers = format_modifiers_afbc,
+		.layer_sel_id = 0,
+		.supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 |
+				       DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y,
+		.max_upscale_factor = 4,
+		.max_downscale_factor = 4,
+		.dly = { 4, 26, 29 },
+		.type = DRM_PLANE_TYPE_PRIMARY,
+		.feature = WIN_FEATURE_AFBDC | WIN_FEATURE_CLUSTER,
+	}, {
+		.name = "Cluster1-win0",
+		.phys_id = ROCKCHIP_VOP2_CLUSTER1,
+		.base = 0x1200,
+		.formats = formats_cluster,
+		.nformats = ARRAY_SIZE(formats_cluster),
+		.format_modifiers = format_modifiers_afbc,
+		.layer_sel_id = 1,
+		.supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 |
+				       DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y,
+		.type = DRM_PLANE_TYPE_PRIMARY,
+		.max_upscale_factor = 4,
+		.max_downscale_factor = 4,
+		.dly = { 4, 26, 29 },
+		.feature = WIN_FEATURE_AFBDC | WIN_FEATURE_CLUSTER,
+	}, {
+		.name = "Cluster2-win0",
+		.phys_id = ROCKCHIP_VOP2_CLUSTER2,
+		.base = 0x1400,
+		.formats = formats_cluster,
+		.nformats = ARRAY_SIZE(formats_cluster),
+		.format_modifiers = format_modifiers_afbc,
+		.layer_sel_id = 4,
+		.supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 |
+				       DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y,
+		.type = DRM_PLANE_TYPE_PRIMARY,
+		.max_upscale_factor = 4,
+		.max_downscale_factor = 4,
+		.dly = { 4, 26, 29 },
+		.feature = WIN_FEATURE_AFBDC | WIN_FEATURE_CLUSTER,
+	}, {
+		.name = "Cluster3-win0",
+		.phys_id = ROCKCHIP_VOP2_CLUSTER3,
+		.base = 0x1600,
+		.formats = formats_cluster,
+		.nformats = ARRAY_SIZE(formats_cluster),
+		.format_modifiers = format_modifiers_afbc,
+		.layer_sel_id = 5,
+		.supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 |
+				       DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y,
+		.type = DRM_PLANE_TYPE_PRIMARY,
+		.max_upscale_factor = 4,
+		.max_downscale_factor = 4,
+		.dly = { 4, 26, 29 },
+		.feature = WIN_FEATURE_AFBDC | WIN_FEATURE_CLUSTER,
+	}, {
+		.name = "Esmart0-win0",
+		.phys_id = ROCKCHIP_VOP2_ESMART0,
+		.formats = formats_esmart,
+		.nformats = ARRAY_SIZE(formats_esmart),
+		.format_modifiers = format_modifiers,
+		.base = 0x1800,
+		.layer_sel_id = 2,
+		.supported_rotations = DRM_MODE_REFLECT_Y,
+		.type = DRM_PLANE_TYPE_OVERLAY,
+		.max_upscale_factor = 8,
+		.max_downscale_factor = 8,
+		.dly = { 23, 45, 48 },
+	}, {
+		.name = "Esmart1-win0",
+		.phys_id = ROCKCHIP_VOP2_ESMART1,
+		.formats = formats_esmart,
+		.nformats = ARRAY_SIZE(formats_esmart),
+		.format_modifiers = format_modifiers,
+		.base = 0x1a00,
+		.layer_sel_id = 3,
+		.supported_rotations = DRM_MODE_REFLECT_Y,
+		.type = DRM_PLANE_TYPE_OVERLAY,
+		.max_upscale_factor = 8,
+		.max_downscale_factor = 8,
+		.dly = { 23, 45, 48 },
+	}, {
+		.name = "Esmart2-win0",
+		.phys_id = ROCKCHIP_VOP2_ESMART2,
+		.base = 0x1c00,
+		.formats = formats_esmart,
+		.nformats = ARRAY_SIZE(formats_esmart),
+		.format_modifiers = format_modifiers,
+		.layer_sel_id = 6,
+		.supported_rotations = DRM_MODE_REFLECT_Y,
+		.type = DRM_PLANE_TYPE_OVERLAY,
+		.max_upscale_factor = 8,
+		.max_downscale_factor = 8,
+		.dly = { 23, 45, 48 },
+	}, {
+		.name = "Esmart3-win0",
+		.phys_id = ROCKCHIP_VOP2_ESMART3,
+		.formats = formats_esmart,
+		.nformats = ARRAY_SIZE(formats_esmart),
+		.format_modifiers = format_modifiers,
+		.base = 0x1e00,
+		.layer_sel_id = 7,
+		.supported_rotations = DRM_MODE_REFLECT_Y,
+		.type = DRM_PLANE_TYPE_OVERLAY,
+		.max_upscale_factor = 8,
+		.max_downscale_factor = 8,
+		.dly = { 23, 45, 48 },
+	},
+};
+
 static const struct vop2_data rk3566_vop = {
+	.feature = VOP2_FEATURE_HAS_SYS_GRF,
 	.nr_vps = 3,
 	.max_input = { 4096, 2304 },
 	.max_output = { 4096, 2304 },
@@ -247,6 +452,7 @@ static const struct vop2_data rk3566_vop = {
 };
 
 static const struct vop2_data rk3568_vop = {
+	.feature = VOP2_FEATURE_HAS_SYS_GRF,
 	.nr_vps = 3,
 	.max_input = { 4096, 2304 },
 	.max_output = { 4096, 2304 },
@@ -256,6 +462,18 @@ static const struct vop2_data rk3568_vop = {
 	.soc_id = 3568,
 };
 
+static const struct vop2_data rk3588_vop = {
+	.feature = VOP2_FEATURE_HAS_SYS_GRF | VOP2_FEATURE_HAS_VO1_GRF |
+		   VOP2_FEATURE_HAS_VOP_GRF | VOP2_FEATURE_HAS_SYS_PMU,
+	.nr_vps = 4,
+	.max_input = { 4096, 4320 },
+	.max_output = { 4096, 4320 },
+	.vp = rk3588_vop_video_ports,
+	.win = rk3588_vop_win_data,
+	.win_size = ARRAY_SIZE(rk3588_vop_win_data),
+	.soc_id = 3588,
+};
+
 static const struct of_device_id vop2_dt_match[] = {
 	{
 		.compatible = "rockchip,rk3566-vop",
@@ -264,6 +482,9 @@ static const struct of_device_id vop2_dt_match[] = {
 		.compatible = "rockchip,rk3568-vop",
 		.data = &rk3568_vop,
 	}, {
+		.compatible = "rockchip,rk3588-vop",
+		.data = &rk3588_vop
+	}, {
 	},
 };
 MODULE_DEVICE_TABLE(of, vop2_dt_match);
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 550492a7a031..d442b893275b 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -1184,14 +1184,16 @@ static void drm_sched_run_job_work(struct work_struct *w)
 	if (READ_ONCE(sched->pause_submit))
 		return;
 
+	/* Find entity with a ready job */
 	entity = drm_sched_select_entity(sched);
 	if (!entity)
-		return;
+		return;	/* No more work */
 
 	sched_job = drm_sched_entity_pop_job(entity);
 	if (!sched_job) {
 		complete_all(&entity->entity_idle);
-		return;	/* No more work */
+		drm_sched_run_job_queue(sched);
+		return;
 	}
 
 	s_fence = sched_job->s_fence;
diff --git a/drivers/gpu/drm/solomon/ssd130x.c b/drivers/gpu/drm/solomon/ssd130x.c
index bef293922b98..3d0e093a7e6e 100644
--- a/drivers/gpu/drm/solomon/ssd130x.c
+++ b/drivers/gpu/drm/solomon/ssd130x.c
@@ -319,7 +319,7 @@ static int ssd130x_pwm_enable(struct ssd130x_device *ssd130x)
 
 	pwm_init_state(ssd130x->pwm, &pwmstate);
 	pwm_set_relative_duty_cycle(&pwmstate, 50, 100);
-	pwm_apply_state(ssd130x->pwm, &pwmstate);
+	pwm_apply_might_sleep(ssd130x->pwm, &pwmstate);
 
 	/* Enable the PWM */
 	pwm_enable(ssd130x->pwm);
diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_i2c.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_i2c.c
index d1a65a921f5a..f5f62eb0eeca 100644
--- a/drivers/gpu/drm/sun4i/sun4i_hdmi_i2c.c
+++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_i2c.c
@@ -302,7 +302,6 @@ int sun4i_hdmi_i2c_create(struct device *dev, struct sun4i_hdmi *hdmi)
 		return -ENOMEM;
 
 	adap->owner = THIS_MODULE;
-	adap->class = I2C_CLASS_DDC;
 	adap->algo = &sun4i_hdmi_i2c_algorithm;
 	strscpy(adap->name, "sun4i_hdmi_i2c adapter", sizeof(adap->name));
 	i2c_set_adapdata(adap, hdmi);
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index ff36171c8fb7..03d1c76aec2d 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -960,7 +960,8 @@ int host1x_client_iommu_attach(struct host1x_client *client)
 	 * not the shared IOMMU domain, don't try to attach it to a different
 	 * domain. This allows using the IOMMU-backed DMA API.
 	 */
-	if (domain && domain != tegra->domain)
+	if (domain && domain->type != IOMMU_DOMAIN_IDENTITY &&
+	    domain != tegra->domain)
 		return 0;
 
 	if (tegra->domain) {
@@ -1242,9 +1243,26 @@ static int host1x_drm_probe(struct host1x_device *dev)
 
 	drm_mode_config_reset(drm);
 
-	err = drm_aperture_remove_framebuffers(&tegra_drm_driver);
-	if (err < 0)
-		goto hub;
+	/*
+	 * Only take over from a potential firmware framebuffer if any CRTCs
+	 * have been registered. This must not be a fatal error because there
+	 * are other accelerators that are exposed via this driver.
+	 *
+	 * Another case where this happens is on Tegra234 where the display
+	 * hardware is no longer part of the host1x complex, so this driver
+	 * will not expose any modesetting features.
+	 */
+	if (drm->mode_config.num_crtc > 0) {
+		err = drm_aperture_remove_framebuffers(&tegra_drm_driver);
+		if (err < 0)
+			goto hub;
+	} else {
+		/*
+		 * Indicate to userspace that this doesn't expose any display
+		 * capabilities.
+		 */
+		drm->driver_features &= ~(DRIVER_MODESET | DRIVER_ATOMIC);
+	}
 
 	err = drm_dev_register(drm, 0);
 	if (err < 0)
diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c b/drivers/gpu/drm/tests/drm_buddy_test.c
index ea2af6bd9abe..be2d9d7764be 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -8,16 +8,308 @@
 
 #include <linux/prime_numbers.h>
 #include <linux/sched/signal.h>
+#include <linux/sizes.h>
 
 #include <drm/drm_buddy.h>
 
 #include "../lib/drm_random.h"
 
+static unsigned int random_seed;
+
 static inline u64 get_size(int order, u64 chunk_size)
 {
 	return (1 << order) * chunk_size;
 }
 
+static void drm_test_buddy_alloc_range_bias(struct kunit *test)
+{
+	u32 mm_size, ps, bias_size, bias_start, bias_end, bias_rem;
+	DRM_RND_STATE(prng, random_seed);
+	unsigned int i, count, *order;
+	struct drm_buddy mm;
+	LIST_HEAD(allocated);
+
+	bias_size = SZ_1M;
+	ps = roundup_pow_of_two(prandom_u32_state(&prng) % bias_size);
+	ps = max(SZ_4K, ps);
+	mm_size = (SZ_8M-1) & ~(ps-1); /* Multiple roots */
+
+	kunit_info(test, "mm_size=%u, ps=%u\n", mm_size, ps);
+
+	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, ps),
+			       "buddy_init failed\n");
+
+	count = mm_size / bias_size;
+	order = drm_random_order(count, &prng);
+	KUNIT_EXPECT_TRUE(test, order);
+
+	/*
+	 * Idea is to split the address space into uniform bias ranges, and then
+	 * in some random order allocate within each bias, using various
+	 * patterns within. This should detect if allocations leak out from a
+	 * given bias, for example.
+	 */
+
+	for (i = 0; i < count; i++) {
+		LIST_HEAD(tmp);
+		u32 size;
+
+		bias_start = order[i] * bias_size;
+		bias_end = bias_start + bias_size;
+		bias_rem = bias_size;
+
+		/* internal round_up too big */
+		KUNIT_ASSERT_TRUE_MSG(test,
+				      drm_buddy_alloc_blocks(&mm, bias_start,
+							     bias_end, bias_size + ps, bias_size,
+							     &allocated,
+							     DRM_BUDDY_RANGE_ALLOCATION),
+				      "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
+				      bias_start, bias_end, bias_size, bias_size);
+
+		/* size too big */
+		KUNIT_ASSERT_TRUE_MSG(test,
+				      drm_buddy_alloc_blocks(&mm, bias_start,
+							     bias_end, bias_size + ps, ps,
+							     &allocated,
+							     DRM_BUDDY_RANGE_ALLOCATION),
+				      "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n",
+				      bias_start, bias_end, bias_size + ps, ps);
+
+		/* bias range too small for size */
+		KUNIT_ASSERT_TRUE_MSG(test,
+				      drm_buddy_alloc_blocks(&mm, bias_start + ps,
+							     bias_end, bias_size, ps,
+							     &allocated,
+							     DRM_BUDDY_RANGE_ALLOCATION),
+				      "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n",
+				      bias_start + ps, bias_end, bias_size, ps);
+
+		/* bias misaligned */
+		KUNIT_ASSERT_TRUE_MSG(test,
+				      drm_buddy_alloc_blocks(&mm, bias_start + ps,
+							     bias_end - ps,
+							     bias_size >> 1, bias_size >> 1,
+							     &allocated,
+							     DRM_BUDDY_RANGE_ALLOCATION),
+				      "buddy_alloc h didn't fail with bias(%x-%x), size=%u, ps=%u\n",
+				      bias_start + ps, bias_end - ps, bias_size >> 1, bias_size >> 1);
+
+		/* single big page */
+		KUNIT_ASSERT_FALSE_MSG(test,
+				       drm_buddy_alloc_blocks(&mm, bias_start,
+							      bias_end, bias_size, bias_size,
+							      &tmp,
+							      DRM_BUDDY_RANGE_ALLOCATION),
+				       "buddy_alloc i failed with bias(%x-%x), size=%u, ps=%u\n",
+				       bias_start, bias_end, bias_size, bias_size);
+		drm_buddy_free_list(&mm, &tmp);
+
+		/* single page with internal round_up */
+		KUNIT_ASSERT_FALSE_MSG(test,
+				       drm_buddy_alloc_blocks(&mm, bias_start,
+							      bias_end, ps, bias_size,
+							      &tmp,
+							      DRM_BUDDY_RANGE_ALLOCATION),
+				       "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
+				       bias_start, bias_end, ps, bias_size);
+		drm_buddy_free_list(&mm, &tmp);
+
+		/* random size within */
+		size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps);
+		if (size)
+			KUNIT_ASSERT_FALSE_MSG(test,
+					       drm_buddy_alloc_blocks(&mm, bias_start,
+								      bias_end, size, ps,
+								      &tmp,
+								      DRM_BUDDY_RANGE_ALLOCATION),
+					       "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
+					       bias_start, bias_end, size, ps);
+
+		bias_rem -= size;
+		/* too big for current avail */
+		KUNIT_ASSERT_TRUE_MSG(test,
+				      drm_buddy_alloc_blocks(&mm, bias_start,
+							     bias_end, bias_rem + ps, ps,
+							     &allocated,
+							     DRM_BUDDY_RANGE_ALLOCATION),
+				      "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n",
+				      bias_start, bias_end, bias_rem + ps, ps);
+
+		if (bias_rem) {
+			/* random fill of the remainder */
+			size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps);
+			size = max(size, ps);
+
+			KUNIT_ASSERT_FALSE_MSG(test,
+					       drm_buddy_alloc_blocks(&mm, bias_start,
+								      bias_end, size, ps,
+								      &allocated,
+								      DRM_BUDDY_RANGE_ALLOCATION),
+					       "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
+					       bias_start, bias_end, size, ps);
+			/*
+			 * Intentionally allow some space to be left
+			 * unallocated, and ideally not always on the bias
+			 * boundaries.
+			 */
+			drm_buddy_free_list(&mm, &tmp);
+		} else {
+			list_splice_tail(&tmp, &allocated);
+		}
+	}
+
+	kfree(order);
+	drm_buddy_free_list(&mm, &allocated);
+	drm_buddy_fini(&mm);
+
+	/*
+	 * Something more free-form. Idea is to pick a random starting bias
+	 * range within the address space and then start filling it up. Also
+	 * randomly grow the bias range in both directions as we go along. This
+	 * should give us bias start/end which is not always uniform like above,
+	 * and in some cases will require the allocator to jump over already
+	 * allocated nodes in the middle of the address space.
+	 */
+
+	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, ps),
+			       "buddy_init failed\n");
+
+	bias_start = round_up(prandom_u32_state(&prng) % (mm_size - ps), ps);
+	bias_end = round_up(bias_start + prandom_u32_state(&prng) % (mm_size - bias_start), ps);
+	bias_end = max(bias_end, bias_start + ps);
+	bias_rem = bias_end - bias_start;
+
+	do {
+		u32 size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps);
+
+		KUNIT_ASSERT_FALSE_MSG(test,
+				       drm_buddy_alloc_blocks(&mm, bias_start,
+							      bias_end, size, ps,
+							      &allocated,
+							      DRM_BUDDY_RANGE_ALLOCATION),
+				       "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
+				       bias_start, bias_end, size);
+		bias_rem -= size;
+
+		/*
+		 * Try to randomly grow the bias range in both directions, or
+		 * only one, or perhaps don't grow at all.
+		 */
+		do {
+			u32 old_bias_start = bias_start;
+			u32 old_bias_end = bias_end;
+
+			if (bias_start)
+				bias_start -= round_up(prandom_u32_state(&prng) % bias_start, ps);
+			if (bias_end != mm_size)
+				bias_end += round_up(prandom_u32_state(&prng) % (mm_size - bias_end), ps);
+
+			bias_rem += old_bias_start - bias_start;
+			bias_rem += bias_end - old_bias_end;
+		} while (!bias_rem && (bias_start || bias_end != mm_size));
+	} while (bias_rem);
+
+	KUNIT_ASSERT_EQ(test, bias_start, 0);
+	KUNIT_ASSERT_EQ(test, bias_end, mm_size);
+	KUNIT_ASSERT_TRUE_MSG(test,
+			      drm_buddy_alloc_blocks(&mm, bias_start, bias_end,
+						     ps, ps,
+						     &allocated,
+						     DRM_BUDDY_RANGE_ALLOCATION),
+			      "buddy_alloc passed with bias(%x-%x), size=%u\n",
+			      bias_start, bias_end, ps);
+
+	drm_buddy_free_list(&mm, &allocated);
+	drm_buddy_fini(&mm);
+}
+
+static void drm_test_buddy_alloc_contiguous(struct kunit *test)
+{
+	const unsigned long ps = SZ_4K, mm_size = 16 * 3 * SZ_4K;
+	unsigned long i, n_pages, total;
+	struct drm_buddy_block *block;
+	struct drm_buddy mm;
+	LIST_HEAD(left);
+	LIST_HEAD(middle);
+	LIST_HEAD(right);
+	LIST_HEAD(allocated);
+
+	KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, mm_size, ps));
+
+	/*
+	 * Idea is to fragment the address space by alternating block
+	 * allocations between three different lists; one for left, middle and
+	 * right. We can then free a list to simulate fragmentation. In
+	 * particular we want to exercise the DRM_BUDDY_CONTIGUOUS_ALLOCATION,
+	 * including the try_harder path.
+	 */
+
+	i = 0;
+	n_pages = mm_size / ps;
+	do {
+		struct list_head *list;
+		int slot = i % 3;
+
+		if (slot == 0)
+			list = &left;
+		else if (slot == 1)
+			list = &middle;
+		else
+			list = &right;
+		KUNIT_ASSERT_FALSE_MSG(test,
+				       drm_buddy_alloc_blocks(&mm, 0, mm_size,
+							      ps, ps, list, 0),
+				       "buddy_alloc hit an error size=%u\n",
+				       ps);
+	} while (++i < n_pages);
+
+	KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+							   3 * ps, ps, &allocated,
+							   DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+			       "buddy_alloc didn't error size=%u\n", 3 * ps);
+
+	drm_buddy_free_list(&mm, &middle);
+	KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+							   3 * ps, ps, &allocated,
+							   DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+			       "buddy_alloc didn't error size=%u\n", 3 * ps);
+	KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+							   2 * ps, ps, &allocated,
+							   DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+			       "buddy_alloc didn't error size=%u\n", 2 * ps);
+
+	drm_buddy_free_list(&mm, &right);
+	KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+							   3 * ps, ps, &allocated,
+							   DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+			       "buddy_alloc didn't error size=%u\n", 3 * ps);
+	/*
+	 * At this point we should have enough contiguous space for 2 blocks,
+	 * however they are never buddies (since we freed middle and right) so
+	 * will require the try_harder logic to find them.
+	 */
+	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+							    2 * ps, ps, &allocated,
+							    DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+			       "buddy_alloc hit an error size=%u\n", 2 * ps);
+
+	drm_buddy_free_list(&mm, &left);
+	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+							    3 * ps, ps, &allocated,
+							    DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+			       "buddy_alloc hit an error size=%u\n", 3 * ps);
+
+	total = 0;
+	list_for_each_entry(block, &allocated, link)
+		total += drm_buddy_block_size(&mm, block);
+
+	KUNIT_ASSERT_EQ(test, total, ps * 2 + ps * 3);
+
+	drm_buddy_free_list(&mm, &allocated);
+	drm_buddy_fini(&mm);
+}
+
 static void drm_test_buddy_alloc_pathological(struct kunit *test)
 {
 	u64 mm_size, size, start = 0;
@@ -275,16 +567,30 @@ static void drm_test_buddy_alloc_limit(struct kunit *test)
 	drm_buddy_fini(&mm);
 }
 
+static int drm_buddy_suite_init(struct kunit_suite *suite)
+{
+	while (!random_seed)
+		random_seed = get_random_u32();
+
+	kunit_info(suite, "Testing DRM buddy manager, with random_seed=0x%x\n",
+		   random_seed);
+
+	return 0;
+}
+
 static struct kunit_case drm_buddy_tests[] = {
 	KUNIT_CASE(drm_test_buddy_alloc_limit),
 	KUNIT_CASE(drm_test_buddy_alloc_optimistic),
 	KUNIT_CASE(drm_test_buddy_alloc_pessimistic),
 	KUNIT_CASE(drm_test_buddy_alloc_pathological),
+	KUNIT_CASE(drm_test_buddy_alloc_contiguous),
+	KUNIT_CASE(drm_test_buddy_alloc_range_bias),
 	{}
 };
 
 static struct kunit_suite drm_buddy_test_suite = {
 	.name = "drm_buddy",
+	.suite_init = drm_buddy_suite_init,
 	.test_cases = drm_buddy_tests,
 };
 
diff --git a/drivers/gpu/drm/tests/drm_kunit_helpers.c b/drivers/gpu/drm/tests/drm_kunit_helpers.c
index bccb33b900f3..ca4f8e4c5d5d 100644
--- a/drivers/gpu/drm/tests/drm_kunit_helpers.c
+++ b/drivers/gpu/drm/tests/drm_kunit_helpers.c
@@ -5,6 +5,7 @@
 #include <drm/drm_kunit_helpers.h>
 #include <drm/drm_managed.h>
 
+#include <kunit/device.h>
 #include <kunit/resource.h>
 
 #include <linux/device.h>
@@ -15,40 +16,6 @@
 static const struct drm_mode_config_funcs drm_mode_config_funcs = {
 };
 
-static int fake_probe(struct platform_device *pdev)
-{
-	return 0;
-}
-
-static struct platform_driver fake_platform_driver = {
-	.probe	= fake_probe,
-	.driver = {
-		.name	= KUNIT_DEVICE_NAME,
-	},
-};
-
-static void kunit_action_platform_driver_unregister(void *ptr)
-{
-	struct platform_driver *drv = ptr;
-
-	platform_driver_unregister(drv);
-
-}
-
-static void kunit_action_platform_device_put(void *ptr)
-{
-	struct platform_device *pdev = ptr;
-
-	platform_device_put(pdev);
-}
-
-static void kunit_action_platform_device_del(void *ptr)
-{
-	struct platform_device *pdev = ptr;
-
-	platform_device_del(pdev);
-}
-
 /**
  * drm_kunit_helper_alloc_device - Allocate a mock device for a KUnit test
  * @test: The test context object
@@ -66,34 +33,7 @@ static void kunit_action_platform_device_del(void *ptr)
  */
 struct device *drm_kunit_helper_alloc_device(struct kunit *test)
 {
-	struct platform_device *pdev;
-	int ret;
-
-	ret = platform_driver_register(&fake_platform_driver);
-	KUNIT_ASSERT_EQ(test, ret, 0);
-
-	ret = kunit_add_action_or_reset(test,
-					kunit_action_platform_driver_unregister,
-					&fake_platform_driver);
-	KUNIT_ASSERT_EQ(test, ret, 0);
-
-	pdev = platform_device_alloc(KUNIT_DEVICE_NAME, PLATFORM_DEVID_NONE);
-	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pdev);
-
-	ret = kunit_add_action_or_reset(test,
-					kunit_action_platform_device_put,
-					pdev);
-	KUNIT_ASSERT_EQ(test, ret, 0);
-
-	ret = platform_device_add(pdev);
-	KUNIT_ASSERT_EQ(test, ret, 0);
-
-	ret = kunit_add_action_or_reset(test,
-					kunit_action_platform_device_del,
-					pdev);
-	KUNIT_ASSERT_EQ(test, ret, 0);
-
-	return &pdev->dev;
+	return kunit_device_register(test, KUNIT_DEVICE_NAME);
 }
 EXPORT_SYMBOL_GPL(drm_kunit_helper_alloc_device);
 
@@ -106,19 +46,7 @@ EXPORT_SYMBOL_GPL(drm_kunit_helper_alloc_device);
  */
 void drm_kunit_helper_free_device(struct kunit *test, struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-
-	kunit_release_action(test,
-			     kunit_action_platform_device_del,
-			     pdev);
-
-	kunit_release_action(test,
-			     kunit_action_platform_device_put,
-			     pdev);
-
-	kunit_release_action(test,
-			     kunit_action_platform_driver_unregister,
-			     &fake_platform_driver);
+	kunit_device_unregister(test, dev);
 }
 EXPORT_SYMBOL_GPL(drm_kunit_helper_free_device);
 
diff --git a/drivers/gpu/drm/tests/drm_mm_test.c b/drivers/gpu/drm/tests/drm_mm_test.c
index 4e9247cf9977..1eb0c304f960 100644
--- a/drivers/gpu/drm/tests/drm_mm_test.c
+++ b/drivers/gpu/drm/tests/drm_mm_test.c
@@ -188,13 +188,13 @@ out:
 
 static void drm_test_mm_debug(struct kunit *test)
 {
+	struct drm_printer p = drm_debug_printer(test->name);
 	struct drm_mm mm;
 	struct drm_mm_node nodes[2];
 
 	/* Create a small drm_mm with a couple of nodes and a few holes, and
 	 * check that the debug iterator doesn't explode over a trivial drm_mm.
 	 */
-
 	drm_mm_init(&mm, 0, 4096);
 
 	memset(nodes, 0, sizeof(nodes));
@@ -209,6 +209,9 @@ static void drm_test_mm_debug(struct kunit *test)
 	KUNIT_ASSERT_FALSE_MSG(test, drm_mm_reserve_node(&mm, &nodes[1]),
 			       "failed to reserve node[0] {start=%lld, size=%lld)\n",
 			       nodes[0].start, nodes[0].size);
+
+	drm_mm_print(&mm, &p);
+	KUNIT_SUCCEED(test);
 }
 
 static bool expect_insert(struct kunit *test, struct drm_mm *mm,
diff --git a/drivers/gpu/drm/ttm/tests/ttm_device_test.c b/drivers/gpu/drm/ttm/tests/ttm_device_test.c
index b1b423b68cdf..19eaff22e6ae 100644
--- a/drivers/gpu/drm/ttm/tests/ttm_device_test.c
+++ b/drivers/gpu/drm/ttm/tests/ttm_device_test.c
@@ -175,7 +175,7 @@ static void ttm_device_init_pools(struct kunit *test)
 
 	if (params->pools_init_expected) {
 		for (int i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
-			for (int j = 0; j <= MAX_ORDER; ++j) {
+			for (int j = 0; j < NR_PAGE_ORDERS; ++j) {
 				pt = pool->caching[i].orders[j];
 				KUNIT_EXPECT_PTR_EQ(test, pt.pool, pool);
 				KUNIT_EXPECT_EQ(test, pt.caching, i);
diff --git a/drivers/gpu/drm/ttm/tests/ttm_pool_test.c b/drivers/gpu/drm/ttm/tests/ttm_pool_test.c
index 2d9cae8cd984..cceaa18d4e46 100644
--- a/drivers/gpu/drm/ttm/tests/ttm_pool_test.c
+++ b/drivers/gpu/drm/ttm/tests/ttm_pool_test.c
@@ -109,7 +109,7 @@ static const struct ttm_pool_test_case ttm_pool_basic_cases[] = {
 	},
 	{
 		.description = "Above the allocation limit",
-		.order = MAX_ORDER + 1,
+		.order = MAX_PAGE_ORDER + 1,
 	},
 	{
 		.description = "One page, with coherent DMA mappings enabled",
@@ -118,7 +118,7 @@ static const struct ttm_pool_test_case ttm_pool_basic_cases[] = {
 	},
 	{
 		.description = "Above the allocation limit, with coherent DMA mappings enabled",
-		.order = MAX_ORDER + 1,
+		.order = MAX_PAGE_ORDER + 1,
 		.use_dma_alloc = true,
 	},
 };
@@ -165,7 +165,7 @@ static void ttm_pool_alloc_basic(struct kunit *test)
 	fst_page = tt->pages[0];
 	last_page = tt->pages[tt->num_pages - 1];
 
-	if (params->order <= MAX_ORDER) {
+	if (params->order <= MAX_PAGE_ORDER) {
 		if (params->use_dma_alloc) {
 			KUNIT_ASSERT_NOT_NULL(test, (void *)fst_page->private);
 			KUNIT_ASSERT_NOT_NULL(test, (void *)last_page->private);
@@ -182,7 +182,7 @@ static void ttm_pool_alloc_basic(struct kunit *test)
 			 * order 0 blocks
 			 */
 			KUNIT_ASSERT_EQ(test, fst_page->private,
-					min_t(unsigned int, MAX_ORDER,
+					min_t(unsigned int, MAX_PAGE_ORDER,
 					      params->order));
 			KUNIT_ASSERT_EQ(test, last_page->private, 0);
 		}
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index f5187b384ae9..76027960054f 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -95,11 +95,17 @@ static int ttm_global_init(void)
 	ttm_pool_mgr_init(num_pages);
 	ttm_tt_mgr_init(num_pages, num_dma32);
 
-	glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
+	glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32 |
+					   __GFP_NOWARN);
 
+	/* Retry without GFP_DMA32 for platforms DMA32 is not available */
 	if (unlikely(glob->dummy_read_page == NULL)) {
-		ret = -ENOMEM;
-		goto out;
+		glob->dummy_read_page = alloc_page(__GFP_ZERO);
+		if (unlikely(glob->dummy_read_page == NULL)) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		pr_warn("Using GFP_DMA32 fallback for dummy_read_page\n");
 	}
 
 	INIT_LIST_HEAD(&glob->device_list);
@@ -195,7 +201,7 @@ int ttm_device_init(struct ttm_device *bdev, const struct ttm_device_funcs *func
 		    bool use_dma_alloc, bool use_dma32)
 {
 	struct ttm_global *glob = &ttm_glob;
-	int ret;
+	int ret, nid;
 
 	if (WARN_ON(vma_manager == NULL))
 		return -EINVAL;
@@ -215,7 +221,12 @@ int ttm_device_init(struct ttm_device *bdev, const struct ttm_device_funcs *func
 
 	ttm_sys_man_init(bdev);
 
-	ttm_pool_init(&bdev->pool, dev, dev_to_node(dev), use_dma_alloc, use_dma32);
+	if (dev)
+		nid = dev_to_node(dev);
+	else
+		nid = NUMA_NO_NODE;
+
+	ttm_pool_init(&bdev->pool, dev, nid, use_dma_alloc, use_dma32);
 
 	bdev->vma_manager = vma_manager;
 	spin_lock_init(&bdev->lru_lock);
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index fe610a3cace0..112438d965ff 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -65,11 +65,11 @@ module_param(page_pool_size, ulong, 0644);
 
 static atomic_long_t allocated_pages;
 
-static struct ttm_pool_type global_write_combined[MAX_ORDER + 1];
-static struct ttm_pool_type global_uncached[MAX_ORDER + 1];
+static struct ttm_pool_type global_write_combined[NR_PAGE_ORDERS];
+static struct ttm_pool_type global_uncached[NR_PAGE_ORDERS];
 
-static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER + 1];
-static struct ttm_pool_type global_dma32_uncached[MAX_ORDER + 1];
+static struct ttm_pool_type global_dma32_write_combined[NR_PAGE_ORDERS];
+static struct ttm_pool_type global_dma32_uncached[NR_PAGE_ORDERS];
 
 static spinlock_t shrinker_lock;
 static struct list_head shrinker_list;
@@ -387,7 +387,7 @@ static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt,
 				enum ttm_caching caching,
 				pgoff_t start_page, pgoff_t end_page)
 {
-	struct page **pages = tt->pages;
+	struct page **pages = &tt->pages[start_page];
 	unsigned int order;
 	pgoff_t i, nr;
 
@@ -447,7 +447,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
 	else
 		gfp_flags |= GFP_HIGHUSER;
 
-	for (order = min_t(unsigned int, MAX_ORDER, __fls(num_pages));
+	for (order = min_t(unsigned int, MAX_PAGE_ORDER, __fls(num_pages));
 	     num_pages;
 	     order = min_t(unsigned int, order, __fls(num_pages))) {
 		struct ttm_pool_type *pt;
@@ -568,7 +568,7 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
 
 	if (use_dma_alloc || nid != NUMA_NO_NODE) {
 		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
-			for (j = 0; j <= MAX_ORDER; ++j)
+			for (j = 0; j < NR_PAGE_ORDERS; ++j)
 				ttm_pool_type_init(&pool->caching[i].orders[j],
 						   pool, i, j);
 	}
@@ -601,7 +601,7 @@ void ttm_pool_fini(struct ttm_pool *pool)
 
 	if (pool->use_dma_alloc || pool->nid != NUMA_NO_NODE) {
 		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
-			for (j = 0; j <= MAX_ORDER; ++j)
+			for (j = 0; j < NR_PAGE_ORDERS; ++j)
 				ttm_pool_type_fini(&pool->caching[i].orders[j]);
 	}
 
@@ -656,7 +656,7 @@ static void ttm_pool_debugfs_header(struct seq_file *m)
 	unsigned int i;
 
 	seq_puts(m, "\t ");
-	for (i = 0; i <= MAX_ORDER; ++i)
+	for (i = 0; i < NR_PAGE_ORDERS; ++i)
 		seq_printf(m, " ---%2u---", i);
 	seq_puts(m, "\n");
 }
@@ -667,7 +667,7 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt,
 {
 	unsigned int i;
 
-	for (i = 0; i <= MAX_ORDER; ++i)
+	for (i = 0; i < NR_PAGE_ORDERS; ++i)
 		seq_printf(m, " %8u", ttm_pool_type_count(&pt[i]));
 	seq_puts(m, "\n");
 }
@@ -776,7 +776,7 @@ int ttm_pool_mgr_init(unsigned long num_pages)
 	spin_lock_init(&shrinker_lock);
 	INIT_LIST_HEAD(&shrinker_list);
 
-	for (i = 0; i <= MAX_ORDER; ++i) {
+	for (i = 0; i < NR_PAGE_ORDERS; ++i) {
 		ttm_pool_type_init(&global_write_combined[i], NULL,
 				   ttm_write_combined, i);
 		ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i);
@@ -816,7 +816,7 @@ void ttm_pool_mgr_fini(void)
 {
 	unsigned int i;
 
-	for (i = 0; i <= MAX_ORDER; ++i) {
+	for (i = 0; i < NR_PAGE_ORDERS; ++i) {
 		ttm_pool_type_fini(&global_write_combined[i]);
 		ttm_pool_type_fini(&global_uncached[i]);
 
diff --git a/drivers/gpu/drm/v3d/v3d_debugfs.c b/drivers/gpu/drm/v3d/v3d_debugfs.c
index f843a50d5dce..94eafcecc65b 100644
--- a/drivers/gpu/drm/v3d/v3d_debugfs.c
+++ b/drivers/gpu/drm/v3d/v3d_debugfs.c
@@ -62,9 +62,9 @@ static const struct v3d_reg_def v3d_core_reg_defs[] = {
 	REGDEF(33, 71, V3D_PTB_BPCA),
 	REGDEF(33, 71, V3D_PTB_BPCS),
 
-	REGDEF(33, 41, V3D_GMP_STATUS(33)),
-	REGDEF(33, 41, V3D_GMP_CFG(33)),
-	REGDEF(33, 41, V3D_GMP_VIO_ADDR(33)),
+	REGDEF(33, 42, V3D_GMP_STATUS(33)),
+	REGDEF(33, 42, V3D_GMP_CFG(33)),
+	REGDEF(33, 42, V3D_GMP_VIO_ADDR(33)),
 
 	REGDEF(33, 71, V3D_ERR_FDBGO),
 	REGDEF(33, 71, V3D_ERR_FDBGB),
@@ -74,13 +74,13 @@ static const struct v3d_reg_def v3d_core_reg_defs[] = {
 
 static const struct v3d_reg_def v3d_csd_reg_defs[] = {
 	REGDEF(41, 71, V3D_CSD_STATUS),
-	REGDEF(41, 41, V3D_CSD_CURRENT_CFG0(41)),
-	REGDEF(41, 41, V3D_CSD_CURRENT_CFG1(41)),
-	REGDEF(41, 41, V3D_CSD_CURRENT_CFG2(41)),
-	REGDEF(41, 41, V3D_CSD_CURRENT_CFG3(41)),
-	REGDEF(41, 41, V3D_CSD_CURRENT_CFG4(41)),
-	REGDEF(41, 41, V3D_CSD_CURRENT_CFG5(41)),
-	REGDEF(41, 41, V3D_CSD_CURRENT_CFG6(41)),
+	REGDEF(41, 42, V3D_CSD_CURRENT_CFG0(41)),
+	REGDEF(41, 42, V3D_CSD_CURRENT_CFG1(41)),
+	REGDEF(41, 42, V3D_CSD_CURRENT_CFG2(41)),
+	REGDEF(41, 42, V3D_CSD_CURRENT_CFG3(41)),
+	REGDEF(41, 42, V3D_CSD_CURRENT_CFG4(41)),
+	REGDEF(41, 42, V3D_CSD_CURRENT_CFG5(41)),
+	REGDEF(41, 42, V3D_CSD_CURRENT_CFG6(41)),
 	REGDEF(71, 71, V3D_CSD_CURRENT_CFG0(71)),
 	REGDEF(71, 71, V3D_CSD_CURRENT_CFG1(71)),
 	REGDEF(71, 71, V3D_CSD_CURRENT_CFG2(71)),
diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c
index fcff41dd2315..88f63d526b22 100644
--- a/drivers/gpu/drm/v3d/v3d_submit.c
+++ b/drivers/gpu/drm/v3d/v3d_submit.c
@@ -147,6 +147,13 @@ v3d_job_allocate(void **container, size_t size)
 	return 0;
 }
 
+static void
+v3d_job_deallocate(void **container)
+{
+	kfree(*container);
+	*container = NULL;
+}
+
 static int
 v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
 	     struct v3d_job *job, void (*free)(struct kref *ref),
@@ -273,8 +280,10 @@ v3d_setup_csd_jobs_and_bos(struct drm_file *file_priv,
 
 	ret = v3d_job_init(v3d, file_priv, &(*job)->base,
 			   v3d_job_free, args->in_sync, se, V3D_CSD);
-	if (ret)
+	if (ret) {
+		v3d_job_deallocate((void *)job);
 		return ret;
+	}
 
 	ret = v3d_job_allocate((void *)clean_job, sizeof(**clean_job));
 	if (ret)
@@ -282,8 +291,10 @@ v3d_setup_csd_jobs_and_bos(struct drm_file *file_priv,
 
 	ret = v3d_job_init(v3d, file_priv, *clean_job,
 			   v3d_job_free, 0, NULL, V3D_CACHE_CLEAN);
-	if (ret)
+	if (ret) {
+		v3d_job_deallocate((void *)clean_job);
 		return ret;
+	}
 
 	(*job)->args = *args;
 
@@ -860,8 +871,10 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
 
 	ret = v3d_job_init(v3d, file_priv, &render->base,
 			   v3d_render_job_free, args->in_sync_rcl, &se, V3D_RENDER);
-	if (ret)
+	if (ret) {
+		v3d_job_deallocate((void *)&render);
 		goto fail;
+	}
 
 	render->start = args->rcl_start;
 	render->end = args->rcl_end;
@@ -874,8 +887,10 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
 
 		ret = v3d_job_init(v3d, file_priv, &bin->base,
 				   v3d_job_free, args->in_sync_bcl, &se, V3D_BIN);
-		if (ret)
+		if (ret) {
+			v3d_job_deallocate((void *)&bin);
 			goto fail;
+		}
 
 		bin->start = args->bcl_start;
 		bin->end = args->bcl_end;
@@ -892,8 +907,10 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
 
 		ret = v3d_job_init(v3d, file_priv, clean_job,
 				   v3d_job_free, 0, NULL, V3D_CACHE_CLEAN);
-		if (ret)
+		if (ret) {
+			v3d_job_deallocate((void *)&clean_job);
 			goto fail;
+		}
 
 		last_job = clean_job;
 	} else {
@@ -1015,8 +1032,10 @@ v3d_submit_tfu_ioctl(struct drm_device *dev, void *data,
 
 	ret = v3d_job_init(v3d, file_priv, &job->base,
 			   v3d_job_free, args->in_sync, &se, V3D_TFU);
-	if (ret)
+	if (ret) {
+		v3d_job_deallocate((void *)&job);
 		goto fail;
+	}
 
 	job->base.bo = kcalloc(ARRAY_SIZE(args->bo_handles),
 			       sizeof(*job->base.bo), GFP_KERNEL);
@@ -1233,8 +1252,10 @@ v3d_submit_cpu_ioctl(struct drm_device *dev, void *data,
 
 	ret = v3d_job_init(v3d, file_priv, &cpu_job->base,
 			   v3d_job_free, 0, &se, V3D_CPU);
-	if (ret)
+	if (ret) {
+		v3d_job_deallocate((void *)&cpu_job);
 		goto fail;
+	}
 
 	clean_job = cpu_job->indirect_csd.clean_job;
 	csd_job = cpu_job->indirect_csd.job;
diff --git a/drivers/gpu/drm/vc4/tests/vc4_mock.c b/drivers/gpu/drm/vc4/tests/vc4_mock.c
index 63ca46f4cb35..becb3dbaa548 100644
--- a/drivers/gpu/drm/vc4/tests/vc4_mock.c
+++ b/drivers/gpu/drm/vc4/tests/vc4_mock.c
@@ -153,12 +153,9 @@ static int __build_mock(struct kunit *test, struct drm_device *drm,
 	return 0;
 }
 
-static void kunit_action_drm_dev_unregister(void *ptr)
-{
-	struct drm_device *drm = ptr;
-
-	drm_dev_unregister(drm);
-}
+KUNIT_DEFINE_ACTION_WRAPPER(kunit_action_drm_dev_unregister,
+			    drm_dev_unregister,
+			    struct drm_device *);
 
 static struct vc4_dev *__mock_device(struct kunit *test, bool is_vc5)
 {
diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
index 25c9c71256d3..f05e2c95a60d 100644
--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
@@ -672,11 +672,21 @@ vc4_hdmi_connector_duplicate_state(struct drm_connector *connector)
 	return &new_state->base;
 }
 
+static void vc4_hdmi_connector_destroy_state(struct drm_connector *connector,
+					     struct drm_connector_state *state)
+{
+	struct vc4_hdmi_connector_state *vc4_state =
+		conn_state_to_vc4_hdmi_conn_state(state);
+
+	__drm_atomic_helper_connector_destroy_state(state);
+	kfree(vc4_state);
+}
+
 static const struct drm_connector_funcs vc4_hdmi_connector_funcs = {
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.reset = vc4_hdmi_connector_reset,
 	.atomic_duplicate_state = vc4_hdmi_connector_duplicate_state,
-	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+	.atomic_destroy_state = vc4_hdmi_connector_destroy_state,
 	.atomic_get_property = vc4_hdmi_connector_get_property,
 	.atomic_set_property = vc4_hdmi_connector_set_property,
 };
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c
index f8e9abe647b9..9539aa28937f 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.c
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.c
@@ -94,6 +94,7 @@ static int virtio_gpu_probe(struct virtio_device *vdev)
 			goto err_free;
 	}
 
+	dma_set_max_seg_size(dev->dev, dma_max_mapping_size(dev->dev) ?: UINT_MAX);
 	ret = virtio_gpu_init(vdev, dev);
 	if (ret)
 		goto err_free;
diff --git a/drivers/gpu/drm/xe/.gitignore b/drivers/gpu/drm/xe/.gitignore
new file mode 100644
index 000000000000..8778bf132674
--- /dev/null
+++ b/drivers/gpu/drm/xe/.gitignore
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+*.hdrtest
+/generated
+/xe_gen_wa_oob
diff --git a/drivers/gpu/drm/xe/.kunitconfig b/drivers/gpu/drm/xe/.kunitconfig
new file mode 100644
index 000000000000..9590eac91af3
--- /dev/null
+++ b/drivers/gpu/drm/xe/.kunitconfig
@@ -0,0 +1,13 @@
+# xe dependencies
+CONFIG_KUNIT=y
+CONFIG_PCI=y
+CONFIG_PCI_IOV=y
+CONFIG_DEBUG_FS=y
+CONFIG_DRM=y
+CONFIG_DRM_FBDEV_EMULATION=y
+CONFIG_DRM_KMS_HELPER=y
+CONFIG_DRM_XE=y
+CONFIG_DRM_XE_DISPLAY=n
+CONFIG_EXPERT=y
+CONFIG_FB=y
+CONFIG_DRM_XE_KUNIT_TEST=y
diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig
new file mode 100644
index 000000000000..e36ae1f0d885
--- /dev/null
+++ b/drivers/gpu/drm/xe/Kconfig
@@ -0,0 +1,96 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config DRM_XE
+	tristate "Intel Xe Graphics"
+	depends on DRM && PCI && MMU && (m || (y && KUNIT=y)) && 64BIT
+	select INTERVAL_TREE
+	# we need shmfs for the swappable backing store, and in particular
+	# the shmem_readpage() which depends upon tmpfs
+	select SHMEM
+	select TMPFS
+	select DRM_BUDDY
+	select DRM_EXEC
+	select DRM_KMS_HELPER
+	select DRM_PANEL
+	select DRM_SUBALLOC_HELPER
+	select DRM_DISPLAY_DP_HELPER
+	select DRM_DISPLAY_HDCP_HELPER
+	select DRM_DISPLAY_HDMI_HELPER
+	select DRM_DISPLAY_HELPER
+	select DRM_MIPI_DSI
+	select RELAY
+	select IRQ_WORK
+	# xe depends on ACPI_VIDEO when ACPI is enabled
+	# but for select to work, need to select ACPI_VIDEO's dependencies, ick
+	select BACKLIGHT_CLASS_DEVICE if ACPI
+	select INPUT if ACPI
+	select ACPI_VIDEO if X86 && ACPI
+	select ACPI_BUTTON if ACPI
+	select ACPI_WMI if X86 && ACPI
+	select SYNC_FILE
+	select IOSF_MBI
+	select CRC32
+	select SND_HDA_I915 if SND_HDA_CORE
+	select CEC_CORE if CEC_NOTIFIER
+	select VMAP_PFN
+	select DRM_TTM
+	select DRM_TTM_HELPER
+	select DRM_EXEC
+	select DRM_GPUVM
+	select DRM_SCHED
+	select MMU_NOTIFIER
+	select WANT_DEV_COREDUMP
+	select AUXILIARY_BUS
+	help
+	  Experimental driver for Intel Xe series GPUs
+
+	  If "M" is selected, the module will be called xe.
+
+config DRM_XE_DISPLAY
+	bool "Enable display support"
+	depends on DRM_XE && DRM_XE=m
+	select FB_IOMEM_HELPERS
+	select I2C
+	select I2C_ALGOBIT
+	default y
+	help
+	  Disable this option only if you want to compile out display support.
+
+config DRM_XE_FORCE_PROBE
+	string "Force probe xe for selected Intel hardware IDs"
+	depends on DRM_XE
+	help
+	  This is the default value for the xe.force_probe module
+	  parameter. Using the module parameter overrides this option.
+
+	  Force probe the xe for Intel graphics devices that are
+	  recognized but not properly supported by this kernel version. It is
+	  recommended to upgrade to a kernel version with proper support as soon
+	  as it is available.
+
+	  It can also be used to block the probe of recognized and fully
+	  supported devices.
+
+	  Use "" to disable force probe. If in doubt, use this.
+
+	  Use "<pci-id>[,<pci-id>,...]" to force probe the xe for listed
+	  devices. For example, "4500" or "4500,4571".
+
+	  Use "*" to force probe the driver for all known devices.
+
+	  Use "!" right before the ID to block the probe of the device. For
+	  example, "4500,!4571" forces the probe of 4500 and blocks the probe of
+	  4571.
+
+	  Use "!*" to block the probe of the driver for all known devices.
+
+menu "drm/Xe Debugging"
+depends on DRM_XE
+depends on EXPERT
+source "drivers/gpu/drm/xe/Kconfig.debug"
+endmenu
+
+menu "drm/xe Profile Guided Optimisation"
+	visible if EXPERT
+	depends on DRM_XE
+	source "drivers/gpu/drm/xe/Kconfig.profile"
+endmenu
diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug
new file mode 100644
index 000000000000..549065f57a78
--- /dev/null
+++ b/drivers/gpu/drm/xe/Kconfig.debug
@@ -0,0 +1,107 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config DRM_XE_WERROR
+	bool "Force GCC to throw an error instead of a warning when compiling"
+	# As this may inadvertently break the build, only allow the user
+	# to shoot oneself in the foot iff they aim really hard
+	depends on EXPERT
+	# We use the dependency on !COMPILE_TEST to not be enabled in
+	# allmodconfig or allyesconfig configurations
+	depends on !COMPILE_TEST
+	default n
+	help
+	  Add -Werror to the build flags for (and only for) xe.ko.
+	  Do not enable this unless you are writing code for the xe.ko module.
+
+	  Recommended for driver developers only.
+
+	  If in doubt, say "N".
+
+config DRM_XE_DEBUG
+	bool "Enable additional driver debugging"
+	depends on DRM_XE
+	depends on EXPERT
+	depends on !COMPILE_TEST
+	default n
+	help
+	  Choose this option to turn on extra driver debugging that may affect
+	  performance but will catch some internal issues.
+
+	  Recommended for driver developers only.
+
+	  If in doubt, say "N".
+
+config DRM_XE_DEBUG_VM
+	bool "Enable extra VM debugging info"
+	default n
+	help
+	  Enable extra VM debugging info
+
+	  Recommended for driver developers only.
+
+	  If in doubt, say "N".
+
+config DRM_XE_DEBUG_SRIOV
+	bool "Enable extra SR-IOV debugging"
+	default n
+	help
+	  Enable extra SR-IOV debugging info.
+
+	  Recommended for driver developers only.
+
+	  If in doubt, say "N".
+
+config DRM_XE_DEBUG_MEM
+	bool "Enable passing SYS/VRAM addresses to user space"
+	default n
+	help
+	  Pass object location trough uapi. Intended for extended
+	  testing and development only.
+
+	  Recommended for driver developers only.
+
+	  If in doubt, say "N".
+
+config DRM_XE_SIMPLE_ERROR_CAPTURE
+	bool "Enable simple error capture to dmesg on job timeout"
+	default n
+	help
+	  Choose this option when debugging an unexpected job timeout
+
+	  Recommended for driver developers only.
+
+	  If in doubt, say "N".
+
+config DRM_XE_KUNIT_TEST
+        tristate "KUnit tests for the drm xe driver" if !KUNIT_ALL_TESTS
+	depends on DRM_XE && KUNIT && DEBUG_FS
+	default KUNIT_ALL_TESTS
+	select DRM_EXPORT_FOR_TESTS if m
+	select DRM_KUNIT_TEST_HELPERS
+	help
+	  Choose this option to allow the driver to perform selftests under
+	  the kunit framework
+
+	  Recommended for driver developers only.
+
+	  If in doubt, say "N".
+
+config DRM_XE_LARGE_GUC_BUFFER
+        bool "Enable larger guc log buffer"
+        default n
+        help
+          Choose this option when debugging guc issues.
+          Buffer should be large enough for complex issues.
+
+          Recommended for driver developers only.
+
+          If in doubt, say "N".
+
+config DRM_XE_USERPTR_INVAL_INJECT
+       bool "Inject userptr invalidation -EINVAL errors"
+       default n
+       help
+         Choose this option when debugging error paths that
+	 are hit during checks for userptr invalidations.
+
+	 Recomended for driver developers only.
+	 If in doubt, say "N".
diff --git a/drivers/gpu/drm/xe/Kconfig.profile b/drivers/gpu/drm/xe/Kconfig.profile
new file mode 100644
index 000000000000..ba17a25e8db3
--- /dev/null
+++ b/drivers/gpu/drm/xe/Kconfig.profile
@@ -0,0 +1,54 @@
+config DRM_XE_JOB_TIMEOUT_MAX
+	int "Default max job timeout (ms)"
+	default 10000 # milliseconds
+	help
+	  Configures the default max job timeout after which job will
+	  be forcefully taken away from scheduler.
+config DRM_XE_JOB_TIMEOUT_MIN
+	int "Default min job timeout (ms)"
+	default 1 # milliseconds
+	help
+	  Configures the default min job timeout after which job will
+	  be forcefully taken away from scheduler.
+config DRM_XE_TIMESLICE_MAX
+	int "Default max timeslice duration (us)"
+	default 10000000 # microseconds
+	help
+	  Configures the default max timeslice duration between multiple
+	  contexts by guc scheduling.
+config DRM_XE_TIMESLICE_MIN
+	int "Default min timeslice duration (us)"
+	default 1 # microseconds
+	help
+	  Configures the default min timeslice duration between multiple
+	  contexts by guc scheduling.
+config DRM_XE_PREEMPT_TIMEOUT
+	int "Preempt timeout (us, jiffy granularity)"
+	default 640000 # microseconds
+	help
+	  How long to wait (in microseconds) for a preemption event to occur
+	  when submitting a new context. If the current context does not hit
+	  an arbitration point and yield to HW before the timer expires, the
+	  HW will be reset to allow the more important context to execute.
+config DRM_XE_PREEMPT_TIMEOUT_MAX
+	int "Default max preempt timeout (us)"
+	default 10000000 # microseconds
+	help
+	  Configures the default max preempt timeout after which context
+	  will be forcefully taken away and higher priority context will
+	  run.
+config DRM_XE_PREEMPT_TIMEOUT_MIN
+	int "Default min preempt timeout (us)"
+	default 1 # microseconds
+	help
+	  Configures the default min preempt timeout after which context
+	  will be forcefully taken away and higher priority context will
+	  run.
+config DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT
+	bool "Default configuration of limitation on scheduler timeout"
+	default y
+	help
+	  Configures the enablement of limitation on scheduler timeout
+	  to apply to applicable user. For elevated user, all above MIN
+	  and MAX values will apply when this configuration is enable to
+	  apply limitation. By default limitation is applied.
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
new file mode 100644
index 000000000000..efcf0ab7a1a6
--- /dev/null
+++ b/drivers/gpu/drm/xe/Makefile
@@ -0,0 +1,304 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the drm device driver.  This driver provides support for the
+# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
+
+# Unconditionally enable W=1 warnings locally
+# --- begin copy-paste W=1 warnings from scripts/Makefile.extrawarn
+subdir-ccflags-y += -Wextra -Wunused -Wno-unused-parameter
+subdir-ccflags-y += -Wmissing-declarations
+subdir-ccflags-y += $(call cc-option, -Wrestrict)
+subdir-ccflags-y += -Wmissing-format-attribute
+subdir-ccflags-y += -Wmissing-prototypes
+subdir-ccflags-y += -Wold-style-definition
+subdir-ccflags-y += -Wmissing-include-dirs
+subdir-ccflags-y += $(call cc-option, -Wunused-but-set-variable)
+subdir-ccflags-y += $(call cc-option, -Wunused-const-variable)
+subdir-ccflags-y += $(call cc-option, -Wpacked-not-aligned)
+subdir-ccflags-y += $(call cc-option, -Wformat-overflow)
+subdir-ccflags-y += $(call cc-option, -Wformat-truncation)
+subdir-ccflags-y += $(call cc-option, -Wstringop-truncation)
+# The following turn off the warnings enabled by -Wextra
+ifeq ($(findstring 2, $(KBUILD_EXTRA_WARN)),)
+subdir-ccflags-y += -Wno-missing-field-initializers
+subdir-ccflags-y += -Wno-type-limits
+subdir-ccflags-y += -Wno-shift-negative-value
+endif
+ifeq ($(findstring 3, $(KBUILD_EXTRA_WARN)),)
+subdir-ccflags-y += -Wno-sign-compare
+endif
+# --- end copy-paste
+
+# Enable -Werror in CI and development
+subdir-ccflags-$(CONFIG_DRM_XE_WERROR) += -Werror
+
+subdir-ccflags-y += -I$(obj) -I$(srctree)/$(src)
+
+# generated sources
+hostprogs := xe_gen_wa_oob
+
+generated_oob := $(obj)/generated/xe_wa_oob.c $(obj)/generated/xe_wa_oob.h
+
+quiet_cmd_wa_oob = GEN     $(notdir $(generated_oob))
+      cmd_wa_oob = mkdir -p $(@D); $^ $(generated_oob)
+
+$(generated_oob) &: $(obj)/xe_gen_wa_oob $(srctree)/$(src)/xe_wa_oob.rules
+	$(call cmd,wa_oob)
+
+uses_generated_oob := \
+	$(obj)/xe_gsc.o \
+	$(obj)/xe_guc.o \
+	$(obj)/xe_migrate.o \
+	$(obj)/xe_ring_ops.o \
+	$(obj)/xe_vm.o \
+	$(obj)/xe_wa.o \
+	$(obj)/xe_ttm_stolen_mgr.o
+
+$(uses_generated_oob): $(generated_oob)
+
+# Please keep these build lists sorted!
+
+# core driver code
+
+xe-y += xe_bb.o \
+	xe_bo.o \
+	xe_bo_evict.o \
+	xe_debugfs.o \
+	xe_devcoredump.o \
+	xe_device.o \
+	xe_device_sysfs.o \
+	xe_dma_buf.o \
+	xe_drm_client.o \
+	xe_exec.o \
+	xe_execlist.o \
+	xe_exec_queue.o \
+	xe_force_wake.o \
+	xe_ggtt.o \
+	xe_gpu_scheduler.o \
+	xe_gsc.o \
+	xe_gsc_submit.o \
+	xe_gt.o \
+	xe_gt_ccs_mode.o \
+	xe_gt_clock.o \
+	xe_gt_debugfs.o \
+	xe_gt_freq.o \
+	xe_gt_idle.o \
+	xe_gt_mcr.o \
+	xe_gt_pagefault.o \
+	xe_gt_sysfs.o \
+	xe_gt_throttle_sysfs.o \
+	xe_gt_tlb_invalidation.o \
+	xe_gt_topology.o \
+	xe_guc.o \
+	xe_guc_ads.o \
+	xe_guc_ct.o \
+	xe_guc_debugfs.o \
+	xe_guc_hwconfig.o \
+	xe_guc_log.o \
+	xe_guc_pc.o \
+	xe_guc_submit.o \
+	xe_heci_gsc.o \
+	xe_hw_engine.o \
+	xe_hw_engine_class_sysfs.o \
+	xe_hw_fence.o \
+	xe_huc.o \
+	xe_huc_debugfs.o \
+	xe_irq.o \
+	xe_lrc.o \
+	xe_migrate.o \
+	xe_mmio.o \
+	xe_mocs.o \
+	xe_module.o \
+	xe_pat.o \
+	xe_pci.o \
+	xe_pcode.o \
+	xe_pm.o \
+	xe_preempt_fence.o \
+	xe_pt.o \
+	xe_pt_walk.o \
+	xe_query.o \
+	xe_range_fence.o \
+	xe_reg_sr.o \
+	xe_reg_whitelist.o \
+	xe_rtp.o \
+	xe_ring_ops.o \
+	xe_sa.o \
+	xe_sched_job.o \
+	xe_step.o \
+	xe_sync.o \
+	xe_tile.o \
+	xe_tile_sysfs.o \
+	xe_trace.o \
+	xe_ttm_sys_mgr.o \
+	xe_ttm_stolen_mgr.o \
+	xe_ttm_vram_mgr.o \
+	xe_tuning.o \
+	xe_uc.o \
+	xe_uc_debugfs.o \
+	xe_uc_fw.o \
+	xe_vm.o \
+	xe_wait_user_fence.o \
+	xe_wa.o \
+	xe_wopcm.o
+
+# graphics hardware monitoring (HWMON) support
+xe-$(CONFIG_HWMON) += xe_hwmon.o
+
+# graphics virtualization (SR-IOV) support
+xe-y += xe_sriov.o
+
+xe-$(CONFIG_PCI_IOV) += \
+	xe_lmtt.o \
+	xe_lmtt_2l.o \
+	xe_lmtt_ml.o
+
+# i915 Display compat #defines and #includes
+subdir-ccflags-$(CONFIG_DRM_XE_DISPLAY) += \
+	-I$(srctree)/$(src)/display/ext \
+	-I$(srctree)/$(src)/compat-i915-headers \
+	-I$(srctree)/drivers/gpu/drm/xe/display/ \
+	-I$(srctree)/drivers/gpu/drm/i915/display/ \
+	-Ddrm_i915_gem_object=xe_bo \
+	-Ddrm_i915_private=xe_device
+
+CFLAGS_i915-display/intel_fbdev.o = $(call cc-disable-warning, override-init)
+CFLAGS_i915-display/intel_display_device.o = $(call cc-disable-warning, override-init)
+
+# Rule to build SOC code shared with i915
+$(obj)/i915-soc/%.o: $(srctree)/drivers/gpu/drm/i915/soc/%.c FORCE
+	$(call cmd,force_checksrc)
+	$(call if_changed_rule,cc_o_c)
+
+# Rule to build display code shared with i915
+$(obj)/i915-display/%.o: $(srctree)/drivers/gpu/drm/i915/display/%.c FORCE
+	$(call cmd,force_checksrc)
+	$(call if_changed_rule,cc_o_c)
+
+# Display code specific to xe
+xe-$(CONFIG_DRM_XE_DISPLAY) += \
+	xe_display.o \
+	display/xe_fb_pin.o \
+	display/xe_hdcp_gsc.o \
+	display/xe_plane_initial.o \
+	display/xe_display_rps.o \
+	display/xe_display_misc.o \
+	display/xe_dsb_buffer.o \
+	display/intel_fbdev_fb.o \
+	display/intel_fb_bo.o \
+	display/ext/i915_irq.o \
+	display/ext/i915_utils.o
+
+# SOC code shared with i915
+xe-$(CONFIG_DRM_XE_DISPLAY) += \
+	i915-soc/intel_dram.o \
+	i915-soc/intel_pch.o
+
+# Display code shared with i915
+xe-$(CONFIG_DRM_XE_DISPLAY) += \
+	i915-display/icl_dsi.o \
+	i915-display/intel_atomic.o \
+	i915-display/intel_atomic_plane.o \
+	i915-display/intel_audio.o \
+	i915-display/intel_backlight.o \
+	i915-display/intel_bios.o \
+	i915-display/intel_bw.o \
+	i915-display/intel_cdclk.o \
+	i915-display/intel_color.o \
+	i915-display/intel_combo_phy.o \
+	i915-display/intel_connector.o \
+	i915-display/intel_crtc.o \
+	i915-display/intel_crtc_state_dump.o \
+	i915-display/intel_cursor.o \
+	i915-display/intel_cx0_phy.o \
+	i915-display/intel_ddi.o \
+	i915-display/intel_ddi_buf_trans.o \
+	i915-display/intel_display.o \
+	i915-display/intel_display_debugfs.o \
+	i915-display/intel_display_debugfs_params.o \
+	i915-display/intel_display_device.o \
+	i915-display/intel_display_driver.o \
+	i915-display/intel_display_irq.o \
+	i915-display/intel_display_params.o \
+	i915-display/intel_display_power.o \
+	i915-display/intel_display_power_map.o \
+	i915-display/intel_display_power_well.o \
+	i915-display/intel_display_trace.o \
+	i915-display/intel_display_wa.o \
+	i915-display/intel_dkl_phy.o \
+	i915-display/intel_dmc.o \
+	i915-display/intel_dp.o \
+	i915-display/intel_dp_aux.o \
+	i915-display/intel_dp_aux_backlight.o \
+	i915-display/intel_dp_hdcp.o \
+	i915-display/intel_dp_link_training.o \
+	i915-display/intel_dp_mst.o \
+	i915-display/intel_dpll.o \
+	i915-display/intel_dpll_mgr.o \
+	i915-display/intel_dpt_common.o \
+	i915-display/intel_drrs.o \
+	i915-display/intel_dsb.o \
+	i915-display/intel_dsi.o \
+	i915-display/intel_dsi_dcs_backlight.o \
+	i915-display/intel_dsi_vbt.o \
+	i915-display/intel_fb.o \
+	i915-display/intel_fbc.o \
+	i915-display/intel_fdi.o \
+	i915-display/intel_fifo_underrun.o \
+	i915-display/intel_frontbuffer.o \
+	i915-display/intel_global_state.o \
+	i915-display/intel_gmbus.o \
+	i915-display/intel_hdcp.o \
+	i915-display/intel_hdmi.o \
+	i915-display/intel_hotplug.o \
+	i915-display/intel_hotplug_irq.o \
+	i915-display/intel_hti.o \
+	i915-display/intel_link_bw.o \
+	i915-display/intel_lspcon.o \
+	i915-display/intel_modeset_lock.o \
+	i915-display/intel_modeset_setup.o \
+	i915-display/intel_modeset_verify.o \
+	i915-display/intel_panel.o \
+	i915-display/intel_pipe_crc.o \
+	i915-display/intel_pmdemand.o \
+	i915-display/intel_pps.o \
+	i915-display/intel_psr.o \
+	i915-display/intel_qp_tables.o \
+	i915-display/intel_quirks.o \
+	i915-display/intel_snps_phy.o \
+	i915-display/intel_tc.o \
+	i915-display/intel_vblank.o \
+	i915-display/intel_vdsc.o \
+	i915-display/intel_vga.o \
+	i915-display/intel_vrr.o \
+	i915-display/intel_wm.o \
+	i915-display/skl_scaler.o \
+	i915-display/skl_universal_plane.o \
+	i915-display/skl_watermark.o
+
+ifeq ($(CONFIG_ACPI),y)
+	xe-$(CONFIG_DRM_XE_DISPLAY) += \
+		i915-display/intel_acpi.o \
+		i915-display/intel_opregion.o
+endif
+
+ifeq ($(CONFIG_DRM_FBDEV_EMULATION),y)
+	xe-$(CONFIG_DRM_XE_DISPLAY) += i915-display/intel_fbdev.o
+endif
+
+obj-$(CONFIG_DRM_XE) += xe.o
+obj-$(CONFIG_DRM_XE_KUNIT_TEST) += tests/
+
+# header test
+hdrtest_find_args := -not -path xe_rtp_helpers.h
+ifneq ($(CONFIG_DRM_XE_DISPLAY),y)
+	hdrtest_find_args += -not -path display/\* -not -path compat-i915-headers/\* -not -path xe_display.h
+endif
+
+always-$(CONFIG_DRM_XE_WERROR) += \
+	$(patsubst %.h,%.hdrtest, $(shell cd $(srctree)/$(src) && find * -name '*.h' $(hdrtest_find_args)))
+
+quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@)
+      cmd_hdrtest = $(CC) -DHDRTEST $(filter-out $(CFLAGS_GCOV), $(c_flags)) -S -o /dev/null -x c /dev/null -include $<; touch $@
+
+$(obj)/%.hdrtest: $(src)/%.h FORCE
+	$(call if_changed_dep,hdrtest)
diff --git a/drivers/gpu/drm/xe/abi/gsc_command_header_abi.h b/drivers/gpu/drm/xe/abi/gsc_command_header_abi.h
new file mode 100644
index 000000000000..a4c2646803b5
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/gsc_command_header_abi.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _ABI_GSC_COMMAND_HEADER_ABI_H
+#define _ABI_GSC_COMMAND_HEADER_ABI_H
+
+#include <linux/types.h>
+
+struct intel_gsc_mtl_header {
+	u32 validity_marker;
+#define GSC_HECI_VALIDITY_MARKER 0xA578875A
+
+	u8 heci_client_id;
+
+	u8 reserved1;
+
+	u16 header_version;
+#define MTL_GSC_HEADER_VERSION 1
+
+	/* FW allows host to decide host_session handle as it sees fit. */
+	u64 host_session_handle;
+
+	/* handle generated by FW for messages that need to be re-submitted */
+	u64 gsc_message_handle;
+
+	u32 message_size; /* lower 20 bits only, upper 12 are reserved */
+
+	/*
+	 * Flags mask:
+	 * Bit 0: Pending
+	 * Bit 1: Session Cleanup;
+	 * Bits 2-15: Flags
+	 * Bits 16-31: Extension Size
+	 * According to internal spec flags are either input or output
+	 * we distinguish the flags using OUTFLAG or INFLAG
+	 */
+	u32 flags;
+#define GSC_OUTFLAG_MSG_PENDING	BIT(0)
+#define GSC_INFLAG_MSG_CLEANUP	BIT(1)
+
+	u32 status;
+} __packed;
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/gsc_mkhi_commands_abi.h b/drivers/gpu/drm/xe/abi/gsc_mkhi_commands_abi.h
new file mode 100644
index 000000000000..ad4d041873ab
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/gsc_mkhi_commands_abi.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _ABI_GSC_MKHI_COMMANDS_ABI_H
+#define _ABI_GSC_MKHI_COMMANDS_ABI_H
+
+#include <linux/types.h>
+
+/* Heci client ID for MKHI commands */
+#define HECI_MEADDRESS_MKHI 7
+
+/* Generic MKHI header */
+struct gsc_mkhi_header {
+	u8  group_id;
+	u8  command;
+	u8  reserved;
+	u8  result;
+} __packed;
+
+/* GFX_SRV commands */
+#define MKHI_GROUP_ID_GFX_SRV 0x30
+
+#define MKHI_GFX_SRV_GET_HOST_COMPATIBILITY_VERSION (0x42)
+
+struct gsc_get_compatibility_version_in {
+	struct gsc_mkhi_header header;
+} __packed;
+
+struct gsc_get_compatibility_version_out {
+	struct gsc_mkhi_header header;
+	u16 proj_major;
+	u16 compat_major;
+	u16 compat_minor;
+	u16 reserved[5];
+} __packed;
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h b/drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h
new file mode 100644
index 000000000000..57520809e48d
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _ABI_GSC_PXP_COMMANDS_ABI_H
+#define _ABI_GSC_PXP_COMMANDS_ABI_H
+
+#include <linux/types.h>
+
+/* Heci client ID for PXP commands */
+#define HECI_MEADDRESS_PXP 17
+
+#define PXP_APIVER(x, y) (((x) & 0xFFFF) << 16 | ((y) & 0xFFFF))
+
+/*
+ * there are a lot of status codes for PXP, but we only define the cross-API
+ * common ones that we actually can handle in the kernel driver. Other failure
+ * codes should be printed to error msg for debug.
+ */
+enum pxp_status {
+	PXP_STATUS_SUCCESS = 0x0,
+	PXP_STATUS_ERROR_API_VERSION = 0x1002,
+	PXP_STATUS_NOT_READY = 0x100e,
+	PXP_STATUS_PLATFCONFIG_KF1_NOVERIF = 0x101a,
+	PXP_STATUS_PLATFCONFIG_KF1_BAD = 0x101f,
+	PXP_STATUS_OP_NOT_PERMITTED = 0x4013
+};
+
+/* Common PXP FW message header */
+struct pxp_cmd_header {
+	u32 api_version;
+	u32 command_id;
+	union {
+		u32 status; /* out */
+		u32 stream_id; /* in */
+#define PXP_CMDHDR_EXTDATA_SESSION_VALID GENMASK(0, 0)
+#define PXP_CMDHDR_EXTDATA_APP_TYPE GENMASK(1, 1)
+#define PXP_CMDHDR_EXTDATA_SESSION_ID GENMASK(17, 2)
+	};
+	/* Length of the message (excluding the header) */
+	u32 buffer_len;
+} __packed;
+
+#define PXP43_CMDID_NEW_HUC_AUTH 0x0000003F /* MTL+ */
+
+/* PXP-Input-Packet: HUC Auth-only */
+struct pxp43_new_huc_auth_in {
+	struct pxp_cmd_header header;
+	u64 huc_base_address;
+	u32 huc_size;
+} __packed;
+
+/* PXP-Output-Packet: HUC Load and Authentication or Auth-only */
+struct pxp43_huc_auth_out {
+	struct pxp_cmd_header header;
+} __packed;
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
new file mode 100644
index 000000000000..79ba98a169f9
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
@@ -0,0 +1,219 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2021 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_ACTIONS_ABI_H
+#define _ABI_GUC_ACTIONS_ABI_H
+
+/**
+ * DOC: HOST2GUC_SELF_CFG
+ *
+ * This message is used by Host KMD to setup of the `GuC Self Config KLVs`_.
+ *
+ * This message must be sent as `MMIO HXG Message`_.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_HOST_                                |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | DATA0 = MBZ                                                  |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | ACTION = _`GUC_ACTION_HOST2GUC_SELF_CFG` = 0x0508            |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 | 31:16 | **KLV_KEY** - KLV key, see `GuC Self Config KLVs`_           |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | **KLV_LEN** - KLV length                                     |
+ *  |   |       |                                                              |
+ *  |   |       |   - 32 bit KLV = 1                                           |
+ *  |   |       |   - 64 bit KLV = 2                                           |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 2 |  31:0 | **VALUE32** - Bits 31-0 of the KLV value                     |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 3 |  31:0 | **VALUE64** - Bits 63-32 of the KLV value (**KLV_LEN** = 2)  |
+ *  +---+-------+--------------------------------------------------------------+
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_GUC_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_                        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  27:0 | DATA0 = **NUM** - 1 if KLV was parsed, 0 if not recognized   |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+#define GUC_ACTION_HOST2GUC_SELF_CFG			0x0508
+
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_LEN		(GUC_HXG_REQUEST_MSG_MIN_LEN + 3u)
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_0_MBZ		GUC_HXG_REQUEST_MSG_0_DATA0
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY		(0xffffu << 16)
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_LEN		(0xffffu << 0)
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_2_VALUE32		GUC_HXG_REQUEST_MSG_n_DATAn
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_3_VALUE64		GUC_HXG_REQUEST_MSG_n_DATAn
+
+#define HOST2GUC_SELF_CFG_RESPONSE_MSG_LEN		GUC_HXG_RESPONSE_MSG_MIN_LEN
+#define HOST2GUC_SELF_CFG_RESPONSE_MSG_0_NUM		GUC_HXG_RESPONSE_MSG_0_DATA0
+
+/**
+ * DOC: HOST2GUC_CONTROL_CTB
+ *
+ * This H2G action allows Vf Host to enable or disable H2G and G2H `CT Buffer`_.
+ *
+ * This message must be sent as `MMIO HXG Message`_.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_HOST_                                |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | DATA0 = MBZ                                                  |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | ACTION = _`GUC_ACTION_HOST2GUC_CONTROL_CTB` = 0x4509         |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 | **CONTROL** - control `CTB based communication`_             |
+ *  |   |       |                                                              |
+ *  |   |       |   - _`GUC_CTB_CONTROL_DISABLE` = 0                           |
+ *  |   |       |   - _`GUC_CTB_CONTROL_ENABLE` = 1                            |
+ *  +---+-------+--------------------------------------------------------------+
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_GUC_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_                        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  27:0 | DATA0 = MBZ                                                  |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+#define GUC_ACTION_HOST2GUC_CONTROL_CTB			0x4509
+
+#define HOST2GUC_CONTROL_CTB_REQUEST_MSG_LEN		(GUC_HXG_REQUEST_MSG_MIN_LEN + 1u)
+#define HOST2GUC_CONTROL_CTB_REQUEST_MSG_0_MBZ		GUC_HXG_REQUEST_MSG_0_DATA0
+#define HOST2GUC_CONTROL_CTB_REQUEST_MSG_1_CONTROL	GUC_HXG_REQUEST_MSG_n_DATAn
+#define   GUC_CTB_CONTROL_DISABLE			0u
+#define   GUC_CTB_CONTROL_ENABLE			1u
+
+#define HOST2GUC_CONTROL_CTB_RESPONSE_MSG_LEN		GUC_HXG_RESPONSE_MSG_MIN_LEN
+#define HOST2GUC_CONTROL_CTB_RESPONSE_MSG_0_MBZ		GUC_HXG_RESPONSE_MSG_0_DATA0
+
+/* legacy definitions */
+
+enum xe_guc_action {
+	XE_GUC_ACTION_DEFAULT = 0x0,
+	XE_GUC_ACTION_REQUEST_PREEMPTION = 0x2,
+	XE_GUC_ACTION_REQUEST_ENGINE_RESET = 0x3,
+	XE_GUC_ACTION_ALLOCATE_DOORBELL = 0x10,
+	XE_GUC_ACTION_DEALLOCATE_DOORBELL = 0x20,
+	XE_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE = 0x30,
+	XE_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x40,
+	XE_GUC_ACTION_FORCE_LOG_BUFFER_FLUSH = 0x302,
+	XE_GUC_ACTION_ENTER_S_STATE = 0x501,
+	XE_GUC_ACTION_EXIT_S_STATE = 0x502,
+	XE_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE = 0x506,
+	XE_GUC_ACTION_SCHED_CONTEXT = 0x1000,
+	XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET = 0x1001,
+	XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002,
+	XE_GUC_ACTION_SCHED_ENGINE_MODE_SET = 0x1003,
+	XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE = 0x1004,
+	XE_GUC_ACTION_SET_CONTEXT_PRIORITY = 0x1005,
+	XE_GUC_ACTION_SET_CONTEXT_EXECUTION_QUANTUM = 0x1006,
+	XE_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT = 0x1007,
+	XE_GUC_ACTION_CONTEXT_RESET_NOTIFICATION = 0x1008,
+	XE_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009,
+	XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES = 0x100B,
+	XE_GUC_ACTION_SETUP_PC_GUCRC = 0x3004,
+	XE_GUC_ACTION_AUTHENTICATE_HUC = 0x4000,
+	XE_GUC_ACTION_GET_HWCONFIG = 0x4100,
+	XE_GUC_ACTION_REGISTER_CONTEXT = 0x4502,
+	XE_GUC_ACTION_DEREGISTER_CONTEXT = 0x4503,
+	XE_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
+	XE_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
+	XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600,
+	XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
+	XE_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507,
+	XE_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A,
+	XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR = 0x6000,
+	XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC = 0x6002,
+	XE_GUC_ACTION_PAGE_FAULT_RES_DESC = 0x6003,
+	XE_GUC_ACTION_ACCESS_COUNTER_NOTIFY = 0x6004,
+	XE_GUC_ACTION_TLB_INVALIDATION = 0x7000,
+	XE_GUC_ACTION_TLB_INVALIDATION_DONE = 0x7001,
+	XE_GUC_ACTION_TLB_INVALIDATION_ALL = 0x7002,
+	XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION = 0x8002,
+	XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE = 0x8003,
+	XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED = 0x8004,
+	XE_GUC_ACTION_NOTIFY_EXCEPTION = 0x8005,
+	XE_GUC_ACTION_LIMIT
+};
+
+enum xe_guc_rc_options {
+	XE_GUCRC_HOST_CONTROL,
+	XE_GUCRC_FIRMWARE_CONTROL,
+};
+
+enum xe_guc_preempt_options {
+	XE_GUC_PREEMPT_OPTION_DROP_WORK_Q = 0x4,
+	XE_GUC_PREEMPT_OPTION_DROP_SUBMIT_Q = 0x8,
+};
+
+enum xe_guc_report_status {
+	XE_GUC_REPORT_STATUS_UNKNOWN = 0x0,
+	XE_GUC_REPORT_STATUS_ACKED = 0x1,
+	XE_GUC_REPORT_STATUS_ERROR = 0x2,
+	XE_GUC_REPORT_STATUS_COMPLETE = 0x4,
+};
+
+enum xe_guc_sleep_state_status {
+	XE_GUC_SLEEP_STATE_SUCCESS = 0x1,
+	XE_GUC_SLEEP_STATE_PREEMPT_TO_IDLE_FAILED = 0x2,
+	XE_GUC_SLEEP_STATE_ENGINE_RESET_FAILED = 0x3
+#define XE_GUC_SLEEP_STATE_INVALID_MASK 0x80000000
+};
+
+#define GUC_LOG_CONTROL_LOGGING_ENABLED	(1 << 0)
+#define GUC_LOG_CONTROL_VERBOSITY_SHIFT	4
+#define GUC_LOG_CONTROL_VERBOSITY_MASK	(0xF << GUC_LOG_CONTROL_VERBOSITY_SHIFT)
+#define GUC_LOG_CONTROL_DEFAULT_LOGGING	(1 << 8)
+
+#define XE_GUC_TLB_INVAL_TYPE_SHIFT 0
+#define XE_GUC_TLB_INVAL_MODE_SHIFT 8
+/* Flush PPC or SMRO caches along with TLB invalidation request */
+#define XE_GUC_TLB_INVAL_FLUSH_CACHE (1 << 31)
+
+enum xe_guc_tlb_invalidation_type {
+	XE_GUC_TLB_INVAL_FULL = 0x0,
+	XE_GUC_TLB_INVAL_PAGE_SELECTIVE = 0x1,
+	XE_GUC_TLB_INVAL_PAGE_SELECTIVE_CTX = 0x2,
+	XE_GUC_TLB_INVAL_GUC = 0x3,
+};
+
+/*
+ * 0: Heavy mode of Invalidation:
+ * The pipeline of the engine(s) for which the invalidation is targeted to is
+ * blocked, and all the in-flight transactions are guaranteed to be Globally
+ * Observed before completing the TLB invalidation
+ * 1: Lite mode of Invalidation:
+ * TLBs of the targeted engine(s) are immediately invalidated.
+ * In-flight transactions are NOT guaranteed to be Globally Observed before
+ * completing TLB invalidation.
+ * Light Invalidation Mode is to be used only when
+ * it can be guaranteed (by SW) that the address translations remain invariant
+ * for the in-flight transactions across the TLB invalidation. In other words,
+ * this mode can be used when the TLB invalidation is intended to clear out the
+ * stale cached translations that are no longer in use. Light Invalidation Mode
+ * is much faster than the Heavy Invalidation Mode, as it does not wait for the
+ * in-flight transactions to be GOd.
+ */
+enum xe_guc_tlb_inval_mode {
+	XE_GUC_TLB_INVAL_MODE_HEAVY = 0x0,
+	XE_GUC_TLB_INVAL_MODE_LITE = 0x1,
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h
new file mode 100644
index 000000000000..c165e26c0976
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h
@@ -0,0 +1,249 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _GUC_ACTIONS_SLPC_ABI_H_
+#define _GUC_ACTIONS_SLPC_ABI_H_
+
+#include <linux/types.h>
+
+/**
+ * DOC: SLPC SHARED DATA STRUCTURE
+ *
+ *  +----+------+--------------------------------------------------------------+
+ *  | CL | Bytes| Description                                                  |
+ *  +====+======+==============================================================+
+ *  | 1  | 0-3  | SHARED DATA SIZE                                             |
+ *  |    +------+--------------------------------------------------------------+
+ *  |    | 4-7  | GLOBAL STATE                                                 |
+ *  |    +------+--------------------------------------------------------------+
+ *  |    | 8-11 | DISPLAY DATA ADDRESS                                         |
+ *  |    +------+--------------------------------------------------------------+
+ *  |    | 12:63| PADDING                                                      |
+ *  +----+------+--------------------------------------------------------------+
+ *  |    | 0:63 | PADDING(PLATFORM INFO)                                       |
+ *  +----+------+--------------------------------------------------------------+
+ *  | 3  | 0-3  | TASK STATE DATA                                              |
+ *  +    +------+--------------------------------------------------------------+
+ *  |    | 4:63 | PADDING                                                      |
+ *  +----+------+--------------------------------------------------------------+
+ *  |4-21|0:1087| OVERRIDE PARAMS AND BIT FIELDS                               |
+ *  +----+------+--------------------------------------------------------------+
+ *  |    |      | PADDING + EXTRA RESERVED PAGE                                |
+ *  +----+------+--------------------------------------------------------------+
+ */
+
+/*
+ * SLPC exposes certain parameters for global configuration by the host.
+ * These are referred to as override parameters, because in most cases
+ * the host will not need to modify the default values used by SLPC.
+ * SLPC remembers the default values which allows the host to easily restore
+ * them by simply unsetting the override. The host can set or unset override
+ * parameters during SLPC (re-)initialization using the SLPC Reset event.
+ * The host can also set or unset override parameters on the fly using the
+ * Parameter Set and Parameter Unset events
+ */
+
+#define SLPC_MAX_OVERRIDE_PARAMETERS		256
+#define SLPC_OVERRIDE_BITFIELD_SIZE \
+		(SLPC_MAX_OVERRIDE_PARAMETERS / 32)
+
+#define SLPC_PAGE_SIZE_BYTES			4096
+#define SLPC_CACHELINE_SIZE_BYTES		64
+#define SLPC_SHARED_DATA_SIZE_BYTE_HEADER	SLPC_CACHELINE_SIZE_BYTES
+#define SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO	SLPC_CACHELINE_SIZE_BYTES
+#define SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE	SLPC_CACHELINE_SIZE_BYTES
+#define SLPC_SHARED_DATA_MODE_DEFN_TABLE_SIZE	SLPC_PAGE_SIZE_BYTES
+#define SLPC_SHARED_DATA_SIZE_BYTE_MAX		(2 * SLPC_PAGE_SIZE_BYTES)
+
+/*
+ * Cacheline size aligned (Total size needed for
+ * SLPM_KMD_MAX_OVERRIDE_PARAMETERS=256 is 1088 bytes)
+ */
+#define SLPC_OVERRIDE_PARAMS_TOTAL_BYTES	(((((SLPC_MAX_OVERRIDE_PARAMETERS * 4) \
+						+ ((SLPC_MAX_OVERRIDE_PARAMETERS / 32) * 4)) \
+		+ (SLPC_CACHELINE_SIZE_BYTES - 1)) / SLPC_CACHELINE_SIZE_BYTES) * \
+					SLPC_CACHELINE_SIZE_BYTES)
+
+#define SLPC_SHARED_DATA_SIZE_BYTE_OTHER	(SLPC_SHARED_DATA_SIZE_BYTE_MAX - \
+					(SLPC_SHARED_DATA_SIZE_BYTE_HEADER \
+					+ SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO \
+					+ SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE \
+					+ SLPC_OVERRIDE_PARAMS_TOTAL_BYTES \
+					+ SLPC_SHARED_DATA_MODE_DEFN_TABLE_SIZE))
+
+enum slpc_task_enable {
+	SLPC_PARAM_TASK_DEFAULT = 0,
+	SLPC_PARAM_TASK_ENABLED,
+	SLPC_PARAM_TASK_DISABLED,
+	SLPC_PARAM_TASK_UNKNOWN
+};
+
+enum slpc_global_state {
+	SLPC_GLOBAL_STATE_NOT_RUNNING = 0,
+	SLPC_GLOBAL_STATE_INITIALIZING = 1,
+	SLPC_GLOBAL_STATE_RESETTING = 2,
+	SLPC_GLOBAL_STATE_RUNNING = 3,
+	SLPC_GLOBAL_STATE_SHUTTING_DOWN = 4,
+	SLPC_GLOBAL_STATE_ERROR = 5
+};
+
+enum slpc_param_id {
+	SLPC_PARAM_TASK_ENABLE_GTPERF = 0,
+	SLPC_PARAM_TASK_DISABLE_GTPERF = 1,
+	SLPC_PARAM_TASK_ENABLE_BALANCER = 2,
+	SLPC_PARAM_TASK_DISABLE_BALANCER = 3,
+	SLPC_PARAM_TASK_ENABLE_DCC = 4,
+	SLPC_PARAM_TASK_DISABLE_DCC = 5,
+	SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ = 6,
+	SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ = 7,
+	SLPC_PARAM_GLOBAL_MIN_GT_SLICE_FREQ_MHZ = 8,
+	SLPC_PARAM_GLOBAL_MAX_GT_SLICE_FREQ_MHZ = 9,
+	SLPC_PARAM_GTPERF_THRESHOLD_MAX_FPS = 10,
+	SLPC_PARAM_GLOBAL_DISABLE_GT_FREQ_MANAGEMENT = 11,
+	SLPC_PARAM_GTPERF_ENABLE_FRAMERATE_STALLING = 12,
+	SLPC_PARAM_GLOBAL_DISABLE_RC6_MODE_CHANGE = 13,
+	SLPC_PARAM_GLOBAL_OC_UNSLICE_FREQ_MHZ = 14,
+	SLPC_PARAM_GLOBAL_OC_SLICE_FREQ_MHZ = 15,
+	SLPC_PARAM_GLOBAL_ENABLE_IA_GT_BALANCING = 16,
+	SLPC_PARAM_GLOBAL_ENABLE_ADAPTIVE_BURST_TURBO = 17,
+	SLPC_PARAM_GLOBAL_ENABLE_EVAL_MODE = 18,
+	SLPC_PARAM_GLOBAL_ENABLE_BALANCER_IN_NON_GAMING_MODE = 19,
+	SLPC_PARAM_GLOBAL_RT_MODE_TURBO_FREQ_DELTA_MHZ = 20,
+	SLPC_PARAM_PWRGATE_RC_MODE = 21,
+	SLPC_PARAM_EDR_MODE_COMPUTE_TIMEOUT_MS = 22,
+	SLPC_PARAM_EDR_QOS_FREQ_MHZ = 23,
+	SLPC_PARAM_MEDIA_FF_RATIO_MODE = 24,
+	SLPC_PARAM_ENABLE_IA_FREQ_LIMITING = 25,
+	SLPC_PARAM_STRATEGIES = 26,
+	SLPC_PARAM_POWER_PROFILE = 27,
+	SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY = 28,
+	SLPC_MAX_PARAM = 32,
+};
+
+enum slpc_media_ratio_mode {
+	SLPC_MEDIA_RATIO_MODE_DYNAMIC_CONTROL = 0,
+	SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_ONE = 1,
+	SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_TWO = 2,
+};
+
+enum slpc_gucrc_mode {
+	SLPC_GUCRC_MODE_HW = 0,
+	SLPC_GUCRC_MODE_GUCRC_NO_RC6 = 1,
+	SLPC_GUCRC_MODE_GUCRC_STATIC_TIMEOUT = 2,
+	SLPC_GUCRC_MODE_GUCRC_DYNAMIC_HYSTERESIS = 3,
+
+	SLPC_GUCRC_MODE_MAX,
+};
+
+enum slpc_event_id {
+	SLPC_EVENT_RESET = 0,
+	SLPC_EVENT_SHUTDOWN = 1,
+	SLPC_EVENT_PLATFORM_INFO_CHANGE = 2,
+	SLPC_EVENT_DISPLAY_MODE_CHANGE = 3,
+	SLPC_EVENT_FLIP_COMPLETE = 4,
+	SLPC_EVENT_QUERY_TASK_STATE = 5,
+	SLPC_EVENT_PARAMETER_SET = 6,
+	SLPC_EVENT_PARAMETER_UNSET = 7,
+};
+
+struct slpc_task_state_data {
+	union {
+		u32 task_status_padding;
+		struct {
+			u32 status;
+#define SLPC_GTPERF_TASK_ENABLED	REG_BIT(0)
+#define SLPC_DCC_TASK_ENABLED		REG_BIT(11)
+#define SLPC_IN_DCC			REG_BIT(12)
+#define SLPC_BALANCER_ENABLED		REG_BIT(15)
+#define SLPC_IBC_TASK_ENABLED		REG_BIT(16)
+#define SLPC_BALANCER_IA_LMT_ENABLED	REG_BIT(17)
+#define SLPC_BALANCER_IA_LMT_ACTIVE	REG_BIT(18)
+		};
+	};
+	union {
+		u32 freq_padding;
+		struct {
+#define SLPC_MAX_UNSLICE_FREQ_MASK	REG_GENMASK(7, 0)
+#define SLPC_MIN_UNSLICE_FREQ_MASK	REG_GENMASK(15, 8)
+#define SLPC_MAX_SLICE_FREQ_MASK	REG_GENMASK(23, 16)
+#define SLPC_MIN_SLICE_FREQ_MASK	REG_GENMASK(31, 24)
+			u32 freq;
+		};
+	};
+} __packed;
+
+struct slpc_shared_data_header {
+	/* Total size in bytes of this shared buffer. */
+	u32 size;
+	u32 global_state;
+	u32 display_data_addr;
+} __packed;
+
+struct slpc_override_params {
+	u32 bits[SLPC_OVERRIDE_BITFIELD_SIZE];
+	u32 values[SLPC_MAX_OVERRIDE_PARAMETERS];
+} __packed;
+
+struct slpc_shared_data {
+	struct slpc_shared_data_header header;
+	u8 shared_data_header_pad[SLPC_SHARED_DATA_SIZE_BYTE_HEADER -
+				sizeof(struct slpc_shared_data_header)];
+
+	u8 platform_info_pad[SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO];
+
+	struct slpc_task_state_data task_state_data;
+	u8 task_state_data_pad[SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE -
+				sizeof(struct slpc_task_state_data)];
+
+	struct slpc_override_params override_params;
+	u8 override_params_pad[SLPC_OVERRIDE_PARAMS_TOTAL_BYTES -
+				sizeof(struct slpc_override_params)];
+
+	u8 shared_data_pad[SLPC_SHARED_DATA_SIZE_BYTE_OTHER];
+
+	/* PAGE 2 (4096 bytes), mode based parameter will be removed soon */
+	u8 reserved_mode_definition[4096];
+} __packed;
+
+/**
+ * DOC: SLPC H2G MESSAGE FORMAT
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_HOST_                                |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | DATA0 = MBZ                                                  |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | ACTION = _`GUC_ACTION_HOST2GUC_PC_SLPM_REQUEST` = 0x3003     |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:8 | **EVENT_ID**                                                 |
+ *  +   +-------+--------------------------------------------------------------+
+ *  |   |   7:0 | **EVENT_ARGC** - number of data arguments                    |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 2 |  31:0 | **EVENT_DATA1**                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ *  |...|  31:0 | ...                                                          |
+ *  +---+-------+--------------------------------------------------------------+
+ *  |2+n|  31:0 | **EVENT_DATAn**                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST		0x3003
+
+#define HOST2GUC_PC_SLPC_REQUEST_MSG_MIN_LEN \
+				(GUC_HXG_REQUEST_MSG_MIN_LEN + 1u)
+#define HOST2GUC_PC_SLPC_EVENT_MAX_INPUT_ARGS		9
+#define HOST2GUC_PC_SLPC_REQUEST_MSG_MAX_LEN \
+		(HOST2GUC_PC_SLPC_REQUEST_REQUEST_MSG_MIN_LEN + \
+			HOST2GUC_PC_SLPC_EVENT_MAX_INPUT_ARGS)
+#define HOST2GUC_PC_SLPC_REQUEST_MSG_0_MBZ		GUC_HXG_REQUEST_MSG_0_DATA0
+#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID		(0xffu << 8)
+#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC	(0xffu << 0)
+#define HOST2GUC_PC_SLPC_REQUEST_MSG_N_EVENT_DATA_N	GUC_HXG_REQUEST_MSG_n_DATAn
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
new file mode 100644
index 000000000000..0b1146d0c997
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2021 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_COMMUNICATION_CTB_ABI_H
+#define _ABI_GUC_COMMUNICATION_CTB_ABI_H
+
+#include <linux/types.h>
+#include <linux/build_bug.h>
+
+#include "guc_messages_abi.h"
+
+/**
+ * DOC: CT Buffer
+ *
+ * Circular buffer used to send `CTB Message`_
+ */
+
+/**
+ * DOC: CTB Descriptor
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |  31:0 | **HEAD** - offset (in dwords) to the last dword that was     |
+ *  |   |       | read from the `CT Buffer`_.                                  |
+ *  |   |       | It can only be updated by the receiver.                      |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 | **TAIL** - offset (in dwords) to the last dword that was     |
+ *  |   |       | written to the `CT Buffer`_.                                 |
+ *  |   |       | It can only be updated by the sender.                        |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 2 |  31:0 | **STATUS** - status of the CTB                               |
+ *  |   |       |                                                              |
+ *  |   |       |   - _`GUC_CTB_STATUS_NO_ERROR` = 0 (normal operation)        |
+ *  |   |       |   - _`GUC_CTB_STATUS_OVERFLOW` = 1 (head/tail too large)     |
+ *  |   |       |   - _`GUC_CTB_STATUS_UNDERFLOW` = 2 (truncated message)      |
+ *  |   |       |   - _`GUC_CTB_STATUS_MISMATCH` = 4 (head/tail modified)      |
+ *  +---+-------+--------------------------------------------------------------+
+ *  |...|       | RESERVED = MBZ                                               |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 15|  31:0 | RESERVED = MBZ                                               |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+struct guc_ct_buffer_desc {
+	u32 head;
+	u32 tail;
+	u32 status;
+#define GUC_CTB_STATUS_NO_ERROR				0
+#define GUC_CTB_STATUS_OVERFLOW				(1 << 0)
+#define GUC_CTB_STATUS_UNDERFLOW			(1 << 1)
+#define GUC_CTB_STATUS_MISMATCH				(1 << 2)
+	u32 reserved[13];
+} __packed;
+static_assert(sizeof(struct guc_ct_buffer_desc) == 64);
+
+/**
+ * DOC: CTB Message
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 | 31:16 | **FENCE** - message identifier                               |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 15:12 | **FORMAT** - format of the CTB message                       |
+ *  |   |       |  - _`GUC_CTB_FORMAT_HXG` = 0 - see `CTB HXG Message`_        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  11:8 | **RESERVED**                                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |   7:0 | **NUM_DWORDS** - length of the CTB message (w/o header)      |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 | optional (depends on FORMAT)                                 |
+ *  +---+-------+                                                              |
+ *  |...|       |                                                              |
+ *  +---+-------+                                                              |
+ *  | n |  31:0 |                                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_CTB_HDR_LEN				1u
+#define GUC_CTB_MSG_MIN_LEN			GUC_CTB_HDR_LEN
+#define GUC_CTB_MSG_MAX_LEN			256u
+#define GUC_CTB_MSG_0_FENCE			(0xffffu << 16)
+#define GUC_CTB_MSG_0_FORMAT			(0xfu << 12)
+#define   GUC_CTB_FORMAT_HXG			0u
+#define GUC_CTB_MSG_0_RESERVED			(0xfu << 8)
+#define GUC_CTB_MSG_0_NUM_DWORDS		(0xffu << 0)
+
+/**
+ * DOC: CTB HXG Message
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 | 31:16 | FENCE                                                        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 15:12 | FORMAT = GUC_CTB_FORMAT_HXG_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  11:8 | RESERVED = MBZ                                               |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |   7:0 | NUM_DWORDS = length (in dwords) of the embedded HXG message  |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 |                                                              |
+ *  +---+-------+                                                              |
+ *  |...|       | [Embedded `HXG Message`_]                                    |
+ *  +---+-------+                                                              |
+ *  | n |  31:0 |                                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_CTB_HXG_MSG_MIN_LEN		(GUC_CTB_MSG_MIN_LEN + GUC_HXG_MSG_MIN_LEN)
+#define GUC_CTB_HXG_MSG_MAX_LEN		GUC_CTB_MSG_MAX_LEN
+
+/**
+ * DOC: CTB based communication
+ *
+ * The CTB (command transport buffer) communication between Host and GuC
+ * is based on u32 data stream written to the shared buffer. One buffer can
+ * be used to transmit data only in one direction (one-directional channel).
+ *
+ * Current status of the each buffer is maintained in the `CTB Descriptor`_.
+ * Each message in data stream is encoded as `CTB HXG Message`_.
+ */
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_communication_mmio_abi.h b/drivers/gpu/drm/xe/abi/guc_communication_mmio_abi.h
new file mode 100644
index 000000000000..ef538e34f894
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_communication_mmio_abi.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2021 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_COMMUNICATION_MMIO_ABI_H
+#define _ABI_GUC_COMMUNICATION_MMIO_ABI_H
+
+/**
+ * DOC: GuC MMIO based communication
+ *
+ * The MMIO based communication between Host and GuC relies on special
+ * hardware registers which format could be defined by the software
+ * (so called scratch registers).
+ *
+ * Each MMIO based message, both Host to GuC (H2G) and GuC to Host (G2H)
+ * messages, which maximum length depends on number of available scratch
+ * registers, is directly written into those scratch registers.
+ *
+ * For Gen9+, there are 16 software scratch registers 0xC180-0xC1B8,
+ * but no H2G command takes more than 4 parameters and the GuC firmware
+ * itself uses an 4-element array to store the H2G message.
+ *
+ * For Gen11+, there are additional 4 registers 0x190240-0x19024C, which
+ * are, regardless on lower count, preferred over legacy ones.
+ *
+ * The MMIO based communication is mainly used during driver initialization
+ * phase to setup the `CTB based communication`_ that will be used afterwards.
+ */
+
+#define GUC_MAX_MMIO_MSG_LEN		4
+
+/**
+ * DOC: MMIO HXG Message
+ *
+ * Format of the MMIO messages follows definitions of `HXG Message`_.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |  31:0 |                                                              |
+ *  +---+-------+                                                              |
+ *  |...|       | [Embedded `HXG Message`_]                                    |
+ *  +---+-------+                                                              |
+ *  | n |  31:0 |                                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_errors_abi.h b/drivers/gpu/drm/xe/abi/guc_errors_abi.h
new file mode 100644
index 000000000000..ec83551bf9c0
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_errors_abi.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2021 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_ERRORS_ABI_H
+#define _ABI_GUC_ERRORS_ABI_H
+
+enum xe_guc_response_status {
+	XE_GUC_RESPONSE_STATUS_SUCCESS = 0x0,
+	XE_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000,
+};
+
+enum xe_guc_load_status {
+	XE_GUC_LOAD_STATUS_DEFAULT                          = 0x00,
+	XE_GUC_LOAD_STATUS_START                            = 0x01,
+	XE_GUC_LOAD_STATUS_ERROR_DEVID_BUILD_MISMATCH       = 0x02,
+	XE_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH       = 0x03,
+	XE_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE      = 0x04,
+	XE_GUC_LOAD_STATUS_GDT_DONE                         = 0x10,
+	XE_GUC_LOAD_STATUS_IDT_DONE                         = 0x20,
+	XE_GUC_LOAD_STATUS_LAPIC_DONE                       = 0x30,
+	XE_GUC_LOAD_STATUS_GUCINT_DONE                      = 0x40,
+	XE_GUC_LOAD_STATUS_DPC_READY                        = 0x50,
+	XE_GUC_LOAD_STATUS_DPC_ERROR                        = 0x60,
+	XE_GUC_LOAD_STATUS_EXCEPTION                        = 0x70,
+	XE_GUC_LOAD_STATUS_INIT_DATA_INVALID                = 0x71,
+	XE_GUC_LOAD_STATUS_PXP_TEARDOWN_CTRL_ENABLED        = 0x72,
+	XE_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_START,
+	XE_GUC_LOAD_STATUS_MPU_DATA_INVALID                 = 0x73,
+	XE_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID   = 0x74,
+	XE_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_END,
+
+	XE_GUC_LOAD_STATUS_READY                            = 0xF0,
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
new file mode 100644
index 000000000000..0400bc0fccdc
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
@@ -0,0 +1,322 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_KLVS_ABI_H
+#define _ABI_GUC_KLVS_ABI_H
+
+#include <linux/types.h>
+
+/**
+ * DOC: GuC KLV
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 | 31:16 | **KEY** - KLV key identifier                                 |
+ *  |   |       |   - `GuC Self Config KLVs`_                                  |
+ *  |   |       |   - `GuC VGT Policy KLVs`_                                   |
+ *  |   |       |   - `GuC VF Configuration KLVs`_                             |
+ *  |   |       |                                                              |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | **LEN** - length of VALUE (in 32bit dwords)                  |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 | **VALUE** - actual value of the KLV (format depends on KEY)  |
+ *  +---+-------+                                                              |
+ *  |...|       |                                                              |
+ *  +---+-------+                                                              |
+ *  | n |  31:0 |                                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_KLV_LEN_MIN				1u
+#define GUC_KLV_0_KEY				(0xffffu << 16)
+#define GUC_KLV_0_LEN				(0xffffu << 0)
+#define GUC_KLV_n_VALUE				(0xffffffffu << 0)
+
+/**
+ * DOC: GuC Self Config KLVs
+ *
+ * `GuC KLV`_ keys available for use with HOST2GUC_SELF_CFG_.
+ *
+ * _`GUC_KLV_SELF_CFG_MEMIRQ_STATUS_ADDR` : 0x0900
+ *      Refers to 64 bit Global Gfx address (in bytes) of memory based interrupts
+ *      status vector for use by the GuC.
+ *
+ * _`GUC_KLV_SELF_CFG_MEMIRQ_SOURCE_ADDR` : 0x0901
+ *      Refers to 64 bit Global Gfx address (in bytes) of memory based interrupts
+ *      source vector for use by the GuC.
+ *
+ * _`GUC_KLV_SELF_CFG_H2G_CTB_ADDR` : 0x0902
+ *      Refers to 64 bit Global Gfx address of H2G `CT Buffer`_.
+ *      Should be above WOPCM address but below APIC base address for native mode.
+ *
+ * _`GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR : 0x0903
+ *      Refers to 64 bit Global Gfx address of H2G `CTB Descriptor`_.
+ *      Should be above WOPCM address but below APIC base address for native mode.
+ *
+ * _`GUC_KLV_SELF_CFG_H2G_CTB_SIZE : 0x0904
+ *      Refers to size of H2G `CT Buffer`_ in bytes.
+ *      Should be a multiple of 4K.
+ *
+ * _`GUC_KLV_SELF_CFG_G2H_CTB_ADDR : 0x0905
+ *      Refers to 64 bit Global Gfx address of G2H `CT Buffer`_.
+ *      Should be above WOPCM address but below APIC base address for native mode.
+ *
+ * _GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR : 0x0906
+ *      Refers to 64 bit Global Gfx address of G2H `CTB Descriptor`_.
+ *      Should be above WOPCM address but below APIC base address for native mode.
+ *
+ * _GUC_KLV_SELF_CFG_G2H_CTB_SIZE : 0x0907
+ *      Refers to size of G2H `CT Buffer`_ in bytes.
+ *      Should be a multiple of 4K.
+ */
+
+#define GUC_KLV_SELF_CFG_MEMIRQ_STATUS_ADDR_KEY		0x0900
+#define GUC_KLV_SELF_CFG_MEMIRQ_STATUS_ADDR_LEN		2u
+
+#define GUC_KLV_SELF_CFG_MEMIRQ_SOURCE_ADDR_KEY		0x0901
+#define GUC_KLV_SELF_CFG_MEMIRQ_SOURCE_ADDR_LEN		2u
+
+#define GUC_KLV_SELF_CFG_H2G_CTB_ADDR_KEY		0x0902
+#define GUC_KLV_SELF_CFG_H2G_CTB_ADDR_LEN		2u
+
+#define GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_KEY	0x0903
+#define GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_LEN	2u
+
+#define GUC_KLV_SELF_CFG_H2G_CTB_SIZE_KEY		0x0904
+#define GUC_KLV_SELF_CFG_H2G_CTB_SIZE_LEN		1u
+
+#define GUC_KLV_SELF_CFG_G2H_CTB_ADDR_KEY		0x0905
+#define GUC_KLV_SELF_CFG_G2H_CTB_ADDR_LEN		2u
+
+#define GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_KEY	0x0906
+#define GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_LEN	2u
+
+#define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY		0x0907
+#define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_LEN		1u
+
+/*
+ * Per context scheduling policy update keys.
+ */
+enum  {
+	GUC_CONTEXT_POLICIES_KLV_ID_EXECUTION_QUANTUM			= 0x2001,
+	GUC_CONTEXT_POLICIES_KLV_ID_PREEMPTION_TIMEOUT			= 0x2002,
+	GUC_CONTEXT_POLICIES_KLV_ID_SCHEDULING_PRIORITY			= 0x2003,
+	GUC_CONTEXT_POLICIES_KLV_ID_PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY	= 0x2004,
+	GUC_CONTEXT_POLICIES_KLV_ID_SLPM_GT_FREQUENCY			= 0x2005,
+
+	GUC_CONTEXT_POLICIES_KLV_NUM_IDS = 5,
+};
+
+/**
+ * DOC: GuC VGT Policy KLVs
+ *
+ * `GuC KLV`_ keys available for use with PF2GUC_UPDATE_VGT_POLICY.
+ *
+ * _`GUC_KLV_VGT_POLICY_SCHED_IF_IDLE` : 0x8001
+ *      This config sets whether strict scheduling is enabled whereby any VF
+ *      that doesn’t have work to submit is still allocated a fixed execution
+ *      time-slice to ensure active VFs execution is always consitent even
+ *      during other VF reprovisiong / rebooting events. Changing this KLV
+ *      impacts all VFs and takes effect on the next VF-Switch event.
+ *
+ *      :0: don't schedule idle (default)
+ *      :1: schedule if idle
+ *
+ * _`GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD` : 0x8002
+ *      This config sets the sample period for tracking adverse event counters.
+ *       A sample period is the period in millisecs during which events are counted.
+ *       This is applicable for all the VFs.
+ *
+ *      :0: adverse events are not counted (default)
+ *      :n: sample period in milliseconds
+ *
+ * _`GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH` : 0x8D00
+ *      This enum is to reset utilized HW engine after VF Switch (i.e to clean
+ *      up Stale HW register left behind by previous VF)
+ *
+ *      :0: don't reset (default)
+ *      :1: reset
+ */
+
+#define GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_KEY		0x8001
+#define GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_LEN		1u
+
+#define GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD_KEY	0x8002
+#define GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD_LEN	1u
+
+#define GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH_KEY	0x8D00
+#define GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH_LEN	1u
+
+/**
+ * DOC: GuC VF Configuration KLVs
+ *
+ * `GuC KLV`_ keys available for use with PF2GUC_UPDATE_VF_CFG.
+ *
+ * _`GUC_KLV_VF_CFG_GGTT_START` : 0x0001
+ *      A 4K aligned start GTT address/offset assigned to VF.
+ *      Value is 64 bits.
+ *
+ * _`GUC_KLV_VF_CFG_GGTT_SIZE` : 0x0002
+ *      A 4K aligned size of GGTT assigned to VF.
+ *      Value is 64 bits.
+ *
+ * _`GUC_KLV_VF_CFG_LMEM_SIZE` : 0x0003
+ *      A 2M aligned size of local memory assigned to VF.
+ *      Value is 64 bits.
+ *
+ * _`GUC_KLV_VF_CFG_NUM_CONTEXTS` : 0x0004
+ *      Refers to the number of contexts allocated to this VF.
+ *
+ *      :0: no contexts (default)
+ *      :1-65535: number of contexts (Gen12)
+ *
+ * _`GUC_KLV_VF_CFG_TILE_MASK` : 0x0005
+ *      For multi-tiled products, this field contains the bitwise-OR of tiles
+ *      assigned to the VF. Bit-0-set means VF has access to Tile-0,
+ *      Bit-31-set means VF has access to Tile-31, and etc.
+ *      At least one tile will always be allocated.
+ *      If all bits are zero, VF KMD should treat this as a fatal error.
+ *      For, single-tile products this KLV config is ignored.
+ *
+ * _`GUC_KLV_VF_CFG_NUM_DOORBELLS` : 0x0006
+ *      Refers to the number of doorbells allocated to this VF.
+ *
+ *      :0: no doorbells (default)
+ *      :1-255: number of doorbells (Gen12)
+ *
+ * _`GUC_KLV_VF_CFG_EXEC_QUANTUM` : 0x8A01
+ *      This config sets the VFs-execution-quantum in milliseconds.
+ *      GUC will attempt to obey the maximum values as much as HW is capable
+ *      of and this will never be perfectly-exact (accumulated nano-second
+ *      granularity) since the GPUs clock time runs off a different crystal
+ *      from the CPUs clock. Changing this KLV on a VF that is currently
+ *      running a context wont take effect until a new context is scheduled in.
+ *      That said, when the PF is changing this value from 0xFFFFFFFF to
+ *      something else, it might never take effect if the VF is running an
+ *      inifinitely long compute or shader kernel. In such a scenario, the
+ *      PF would need to trigger a VM PAUSE and then change the KLV to force
+ *      it to take effect. Such cases might typically happen on a 1PF+1VF
+ *      Virtualization config enabled for heavier workloads like AI/ML.
+ *
+ *      :0: infinite exec quantum (default)
+ *
+ * _`GUC_KLV_VF_CFG_PREEMPT_TIMEOUT` : 0x8A02
+ *      This config sets the VF-preemption-timeout in microseconds.
+ *      GUC will attempt to obey the minimum and maximum values as much as
+ *      HW is capable and this will never be perfectly-exact (accumulated
+ *      nano-second granularity) since the GPUs clock time runs off a
+ *      different crystal from the CPUs clock. Changing this KLV on a VF
+ *      that is currently running a context wont take effect until a new
+ *      context is scheduled in.
+ *      That said, when the PF is changing this value from 0xFFFFFFFF to
+ *      something else, it might never take effect if the VF is running an
+ *      inifinitely long compute or shader kernel.
+ *      In this case, the PF would need to trigger a VM PAUSE and then change
+ *      the KLV to force it to take effect. Such cases might typically happen
+ *      on a 1PF+1VF Virtualization config enabled for heavier workloads like
+ *      AI/ML.
+ *
+ *      :0: no preemption timeout (default)
+ *
+ * _`GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR` : 0x8A03
+ *      This config sets threshold for CAT errors caused by the VF.
+ *
+ *      :0: adverse events or error will not be reported (default)
+ *      :n: event occurrence count per sampling interval
+ *
+ * _`GUC_KLV_VF_CFG_THRESHOLD_ENGINE_RESET` : 0x8A04
+ *      This config sets threshold for engine reset caused by the VF.
+ *
+ *      :0: adverse events or error will not be reported (default)
+ *      :n: event occurrence count per sampling interval
+ *
+ * _`GUC_KLV_VF_CFG_THRESHOLD_PAGE_FAULT` : 0x8A05
+ *      This config sets threshold for page fault errors caused by the VF.
+ *
+ *      :0: adverse events or error will not be reported (default)
+ *      :n: event occurrence count per sampling interval
+ *
+ * _`GUC_KLV_VF_CFG_THRESHOLD_H2G_STORM` : 0x8A06
+ *      This config sets threshold for H2G interrupts triggered by the VF.
+ *
+ *      :0: adverse events or error will not be reported (default)
+ *      :n: time (us) per sampling interval
+ *
+ * _`GUC_KLV_VF_CFG_THRESHOLD_IRQ_STORM` : 0x8A07
+ *      This config sets threshold for GT interrupts triggered by the VF's
+ *      workloads.
+ *
+ *      :0: adverse events or error will not be reported (default)
+ *      :n: time (us) per sampling interval
+ *
+ * _`GUC_KLV_VF_CFG_THRESHOLD_DOORBELL_STORM` : 0x8A08
+ *      This config sets threshold for doorbell's ring triggered by the VF.
+ *
+ *      :0: adverse events or error will not be reported (default)
+ *      :n: time (us) per sampling interval
+ *
+ * _`GUC_KLV_VF_CFG_BEGIN_DOORBELL_ID` : 0x8A0A
+ *      Refers to the start index of doorbell assigned to this VF.
+ *
+ *      :0: (default)
+ *      :1-255: number of doorbells (Gen12)
+ *
+ * _`GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID` : 0x8A0B
+ *      Refers to the start index in context array allocated to this VF’s use.
+ *
+ *      :0: (default)
+ *      :1-65535: number of contexts (Gen12)
+ */
+
+#define GUC_KLV_VF_CFG_GGTT_START_KEY		0x0001
+#define GUC_KLV_VF_CFG_GGTT_START_LEN		2u
+
+#define GUC_KLV_VF_CFG_GGTT_SIZE_KEY		0x0002
+#define GUC_KLV_VF_CFG_GGTT_SIZE_LEN		2u
+
+#define GUC_KLV_VF_CFG_LMEM_SIZE_KEY		0x0003
+#define GUC_KLV_VF_CFG_LMEM_SIZE_LEN		2u
+
+#define GUC_KLV_VF_CFG_NUM_CONTEXTS_KEY		0x0004
+#define GUC_KLV_VF_CFG_NUM_CONTEXTS_LEN		1u
+
+#define GUC_KLV_VF_CFG_TILE_MASK_KEY		0x0005
+#define GUC_KLV_VF_CFG_TILE_MASK_LEN		1u
+
+#define GUC_KLV_VF_CFG_NUM_DOORBELLS_KEY	0x0006
+#define GUC_KLV_VF_CFG_NUM_DOORBELLS_LEN	1u
+
+#define GUC_KLV_VF_CFG_EXEC_QUANTUM_KEY		0x8a01
+#define GUC_KLV_VF_CFG_EXEC_QUANTUM_LEN		1u
+
+#define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY	0x8a02
+#define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_LEN	1u
+
+#define GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR_KEY		0x8a03
+#define GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR_LEN		1u
+
+#define GUC_KLV_VF_CFG_THRESHOLD_ENGINE_RESET_KEY	0x8a04
+#define GUC_KLV_VF_CFG_THRESHOLD_ENGINE_RESET_LEN	1u
+
+#define GUC_KLV_VF_CFG_THRESHOLD_PAGE_FAULT_KEY		0x8a05
+#define GUC_KLV_VF_CFG_THRESHOLD_PAGE_FAULT_LEN		1u
+
+#define GUC_KLV_VF_CFG_THRESHOLD_H2G_STORM_KEY		0x8a06
+#define GUC_KLV_VF_CFG_THRESHOLD_H2G_STORM_LEN		1u
+
+#define GUC_KLV_VF_CFG_THRESHOLD_IRQ_STORM_KEY		0x8a07
+#define GUC_KLV_VF_CFG_THRESHOLD_IRQ_STORM_LEN		1u
+
+#define GUC_KLV_VF_CFG_THRESHOLD_DOORBELL_STORM_KEY	0x8a08
+#define GUC_KLV_VF_CFG_THRESHOLD_DOORBELL_STORM_LEN	1u
+
+#define GUC_KLV_VF_CFG_BEGIN_DOORBELL_ID_KEY	0x8a0a
+#define GUC_KLV_VF_CFG_BEGIN_DOORBELL_ID_LEN	1u
+
+#define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_KEY	0x8a0b
+#define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_LEN	1u
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_messages_abi.h b/drivers/gpu/drm/xe/abi/guc_messages_abi.h
new file mode 100644
index 000000000000..29e414c82d56
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_messages_abi.h
@@ -0,0 +1,234 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2021 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_MESSAGES_ABI_H
+#define _ABI_GUC_MESSAGES_ABI_H
+
+/**
+ * DOC: HXG Message
+ *
+ * All messages exchanged with GuC are defined using 32 bit dwords.
+ * First dword is treated as a message header. Remaining dwords are optional.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  |   |       |                                                              |
+ *  | 0 |    31 | **ORIGIN** - originator of the message                       |
+ *  |   |       |   - _`GUC_HXG_ORIGIN_HOST` = 0                               |
+ *  |   |       |   - _`GUC_HXG_ORIGIN_GUC` = 1                                |
+ *  |   |       |                                                              |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | **TYPE** - message type                                      |
+ *  |   |       |   - _`GUC_HXG_TYPE_REQUEST` = 0                              |
+ *  |   |       |   - _`GUC_HXG_TYPE_EVENT` = 1                                |
+ *  |   |       |   - _`GUC_HXG_TYPE_NO_RESPONSE_BUSY` = 3                     |
+ *  |   |       |   - _`GUC_HXG_TYPE_NO_RESPONSE_RETRY` = 5                    |
+ *  |   |       |   - _`GUC_HXG_TYPE_RESPONSE_FAILURE` = 6                     |
+ *  |   |       |   - _`GUC_HXG_TYPE_RESPONSE_SUCCESS` = 7                     |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  27:0 | **AUX** - auxiliary data (depends on TYPE)                   |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 |                                                              |
+ *  +---+-------+                                                              |
+ *  |...|       | **PAYLOAD** - optional payload (depends on TYPE)             |
+ *  +---+-------+                                                              |
+ *  | n |  31:0 |                                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_HXG_MSG_MIN_LEN			1u
+#define GUC_HXG_MSG_0_ORIGIN			(0x1u << 31)
+#define   GUC_HXG_ORIGIN_HOST			0u
+#define   GUC_HXG_ORIGIN_GUC			1u
+#define GUC_HXG_MSG_0_TYPE			(0x7u << 28)
+#define   GUC_HXG_TYPE_REQUEST			0u
+#define   GUC_HXG_TYPE_EVENT			1u
+#define   GUC_HXG_TYPE_NO_RESPONSE_BUSY		3u
+#define   GUC_HXG_TYPE_NO_RESPONSE_RETRY	5u
+#define   GUC_HXG_TYPE_RESPONSE_FAILURE		6u
+#define   GUC_HXG_TYPE_RESPONSE_SUCCESS		7u
+#define GUC_HXG_MSG_0_AUX			(0xfffffffu << 0)
+#define GUC_HXG_MSG_n_PAYLOAD			(0xffffffffu << 0)
+
+/**
+ * DOC: HXG Request
+ *
+ * The `HXG Request`_ message should be used to initiate synchronous activity
+ * for which confirmation or return data is expected.
+ *
+ * The recipient of this message shall use `HXG Response`_, `HXG Failure`_
+ * or `HXG Retry`_ message as a definite reply, and may use `HXG Busy`_
+ * message as a intermediate reply.
+ *
+ * Format of @DATA0 and all @DATAn fields depends on the @ACTION code.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN                                                       |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | **DATA0** - request data (depends on ACTION)                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | **ACTION** - requested action code                           |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 |                                                              |
+ *  +---+-------+                                                              |
+ *  |...|       | **DATAn** - optional data (depends on ACTION)                |
+ *  +---+-------+                                                              |
+ *  | n |  31:0 |                                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_HXG_REQUEST_MSG_MIN_LEN		GUC_HXG_MSG_MIN_LEN
+#define GUC_HXG_REQUEST_MSG_0_DATA0		(0xfffu << 16)
+#define GUC_HXG_REQUEST_MSG_0_ACTION		(0xffffu << 0)
+#define GUC_HXG_REQUEST_MSG_n_DATAn		GUC_HXG_MSG_n_PAYLOAD
+
+/**
+ * DOC: HXG Event
+ *
+ * The `HXG Event`_ message should be used to initiate asynchronous activity
+ * that does not involves immediate confirmation nor data.
+ *
+ * Format of @DATA0 and all @DATAn fields depends on the @ACTION code.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN                                                       |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_EVENT_                                   |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | **DATA0** - event data (depends on ACTION)                   |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | **ACTION** - event action code                               |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 |                                                              |
+ *  +---+-------+                                                              |
+ *  |...|       | **DATAn** - optional event  data (depends on ACTION)         |
+ *  +---+-------+                                                              |
+ *  | n |  31:0 |                                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_HXG_EVENT_MSG_MIN_LEN		GUC_HXG_MSG_MIN_LEN
+#define GUC_HXG_EVENT_MSG_0_DATA0		(0xfffu << 16)
+#define GUC_HXG_EVENT_MSG_0_ACTION		(0xffffu << 0)
+#define GUC_HXG_EVENT_MSG_n_DATAn		GUC_HXG_MSG_n_PAYLOAD
+
+/**
+ * DOC: HXG Busy
+ *
+ * The `HXG Busy`_ message may be used to acknowledge reception of the `HXG Request`_
+ * message if the recipient expects that it processing will be longer than default
+ * timeout.
+ *
+ * The @COUNTER field may be used as a progress indicator.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN                                                       |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_NO_RESPONSE_BUSY_                        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  27:0 | **COUNTER** - progress indicator                             |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_HXG_BUSY_MSG_LEN			GUC_HXG_MSG_MIN_LEN
+#define GUC_HXG_BUSY_MSG_0_COUNTER		GUC_HXG_MSG_0_AUX
+
+/**
+ * DOC: HXG Retry
+ *
+ * The `HXG Retry`_ message should be used by recipient to indicate that the
+ * `HXG Request`_ message was dropped and it should be resent again.
+ *
+ * The @REASON field may be used to provide additional information.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN                                                       |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_NO_RESPONSE_RETRY_                       |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  27:0 | **REASON** - reason for retry                                |
+ *  |   |       |  - _`GUC_HXG_RETRY_REASON_UNSPECIFIED` = 0                   |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_HXG_RETRY_MSG_LEN			GUC_HXG_MSG_MIN_LEN
+#define GUC_HXG_RETRY_MSG_0_REASON		GUC_HXG_MSG_0_AUX
+#define   GUC_HXG_RETRY_REASON_UNSPECIFIED	0u
+
+/**
+ * DOC: HXG Failure
+ *
+ * The `HXG Failure`_ message shall be used as a reply to the `HXG Request`_
+ * message that could not be processed due to an error.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN                                                       |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_FAILURE_                        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | **HINT** - additional error hint                             |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | **ERROR** - error/result code                                |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_HXG_FAILURE_MSG_LEN			GUC_HXG_MSG_MIN_LEN
+#define GUC_HXG_FAILURE_MSG_0_HINT		(0xfffu << 16)
+#define GUC_HXG_FAILURE_MSG_0_ERROR		(0xffffu << 0)
+
+/**
+ * DOC: HXG Response
+ *
+ * The `HXG Response`_ message shall be used as a reply to the `HXG Request`_
+ * message that was successfully processed without an error.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN                                                       |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_                        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  27:0 | **DATA0** - data (depends on ACTION from `HXG Request`_)     |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 |                                                              |
+ *  +---+-------+                                                              |
+ *  |...|       | **DATAn** - data (depends on ACTION from `HXG Request`_)     |
+ *  +---+-------+                                                              |
+ *  | n |  31:0 |                                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_HXG_RESPONSE_MSG_MIN_LEN		GUC_HXG_MSG_MIN_LEN
+#define GUC_HXG_RESPONSE_MSG_0_DATA0		GUC_HXG_MSG_0_AUX
+#define GUC_HXG_RESPONSE_MSG_n_DATAn		GUC_HXG_MSG_n_PAYLOAD
+
+/* deprecated */
+#define INTEL_GUC_MSG_TYPE_SHIFT	28
+#define INTEL_GUC_MSG_TYPE_MASK		(0xF << INTEL_GUC_MSG_TYPE_SHIFT)
+#define INTEL_GUC_MSG_DATA_SHIFT	16
+#define INTEL_GUC_MSG_DATA_MASK		(0xFFF << INTEL_GUC_MSG_DATA_SHIFT)
+#define INTEL_GUC_MSG_CODE_SHIFT	0
+#define INTEL_GUC_MSG_CODE_MASK		(0xFFFF << INTEL_GUC_MSG_CODE_SHIFT)
+
+enum intel_guc_msg_type {
+	INTEL_GUC_MSG_TYPE_REQUEST = 0x0,
+	INTEL_GUC_MSG_TYPE_RESPONSE = 0xF,
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_lmem.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_lmem.h
new file mode 100644
index 000000000000..710cecca972d
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_lmem.h
@@ -0,0 +1 @@
+/* Empty */
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_mman.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_mman.h
new file mode 100644
index 000000000000..650ea2803a97
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_mman.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _I915_GEM_MMAN_H_
+#define _I915_GEM_MMAN_H_
+
+#include "xe_bo_types.h"
+#include <drm/drm_prime.h>
+
+static inline int i915_gem_fb_mmap(struct xe_bo *bo, struct vm_area_struct *vma)
+{
+	return drm_gem_prime_mmap(&bo->ttm.base, vma);
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h
new file mode 100644
index 000000000000..777c20ceabab
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _I915_GEM_OBJECT_H_
+#define _I915_GEM_OBJECT_H_
+
+#include <linux/types.h>
+
+#include "xe_bo.h"
+
+#define i915_gem_object_is_shmem(obj) (0) /* We don't use shmem */
+
+static inline dma_addr_t i915_gem_object_get_dma_address(const struct xe_bo *bo, pgoff_t n)
+{
+	/* Should never be called */
+	WARN_ON(1);
+	return n;
+}
+
+static inline bool i915_gem_object_is_tiled(const struct xe_bo *bo)
+{
+	/* legacy tiling is unused */
+	return false;
+}
+
+static inline bool i915_gem_object_is_userptr(const struct xe_bo *bo)
+{
+	/* legacy tiling is unused */
+	return false;
+}
+
+static inline int i915_gem_object_read_from_page(struct xe_bo *bo,
+					  u32 ofs, u64 *ptr, u32 size)
+{
+	struct ttm_bo_kmap_obj map;
+	void *src;
+	bool is_iomem;
+	int ret;
+
+	ret = xe_bo_lock(bo, true);
+	if (ret)
+		return ret;
+
+	ret = ttm_bo_kmap(&bo->ttm, ofs >> PAGE_SHIFT, 1, &map);
+	if (ret)
+		goto out_unlock;
+
+	ofs &= ~PAGE_MASK;
+	src = ttm_kmap_obj_virtual(&map, &is_iomem);
+	src += ofs;
+	if (is_iomem)
+		memcpy_fromio(ptr, (void __iomem *)src, size);
+	else
+		memcpy(ptr, src, size);
+
+	ttm_bo_kunmap(&map);
+out_unlock:
+	xe_bo_unlock(bo);
+	return ret;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object_frontbuffer.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object_frontbuffer.h
new file mode 100644
index 000000000000..2a3f12d2978c
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object_frontbuffer.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _I915_GEM_OBJECT_FRONTBUFFER_H_
+#define _I915_GEM_OBJECT_FRONTBUFFER_H_
+
+#define i915_gem_object_get_frontbuffer(obj)		NULL
+#define i915_gem_object_set_frontbuffer(obj, front)	(front)
+
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gt/intel_rps.h b/drivers/gpu/drm/xe/compat-i915-headers/gt/intel_rps.h
new file mode 100644
index 000000000000..21fec9cc837c
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/gt/intel_rps.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __INTEL_RPS_H__
+#define __INTEL_RPS_H__
+
+#define gen5_rps_irq_handler(x) ({})
+
+#endif /* __INTEL_RPS_H__ */
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_active.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_active.h
new file mode 100644
index 000000000000..6f0ab3753563
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_active.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _I915_ACTIVE_H_
+#define _I915_ACTIVE_H_
+
+#include "i915_active_types.h"
+
+static inline void i915_active_init(struct i915_active *ref,
+				    int (*active)(struct i915_active *ref),
+				    void (*retire)(struct i915_active *ref),
+				    unsigned long flags)
+{
+	(void) active;
+	(void) retire;
+}
+
+#define i915_active_fini(active) do { } while (0)
+
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_active_types.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_active_types.h
new file mode 100644
index 000000000000..8c31f9a8b168
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_active_types.h
@@ -0,0 +1,13 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef _I915_ACTIVE_TYPES_H_
+#define _I915_ACTIVE_TYPES_H_
+
+struct i915_active {};
+#define I915_ACTIVE_RETIRE_SLEEPS 0
+
+#endif /* _I915_ACTIVE_TYPES_H_ */
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_config.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_config.h
new file mode 100644
index 000000000000..e835bea08d1b
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_config.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __I915_CONFIG_H__
+#define __I915_CONFIG_H__
+
+#include <linux/sched.h>
+
+struct drm_i915_private;
+
+static inline unsigned long
+i915_fence_timeout(const struct drm_i915_private *i915)
+{
+	return MAX_SCHEDULE_TIMEOUT;
+}
+
+#endif /* __I915_CONFIG_H__ */
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_debugfs.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_debugfs.h
new file mode 100644
index 000000000000..b4c47617b64b
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_debugfs.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __I915_DEBUGFS_H__
+#define __I915_DEBUGFS_H__
+
+struct drm_i915_gem_object;
+struct seq_file;
+
+static inline void i915_debugfs_describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) {}
+
+#endif /* __I915_DEBUGFS_H__ */
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
new file mode 100644
index 000000000000..5d2a77b52db4
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
@@ -0,0 +1,233 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+#ifndef _XE_I915_DRV_H_
+#define _XE_I915_DRV_H_
+
+/*
+ * "Adaptation header" to allow i915 display to also build for xe driver.
+ * TODO: refactor i915 and xe so this can cease to exist
+ */
+
+#include <drm/drm_drv.h>
+
+#include "gem/i915_gem_object.h"
+
+#include "soc/intel_pch.h"
+#include "xe_device.h"
+#include "xe_bo.h"
+#include "xe_pm.h"
+#include "xe_step.h"
+#include "i915_gem.h"
+#include "i915_gem_stolen.h"
+#include "i915_gpu_error.h"
+#include "i915_reg_defs.h"
+#include "i915_utils.h"
+#include "intel_gt_types.h"
+#include "intel_step.h"
+#include "intel_uc_fw.h"
+#include "intel_uncore.h"
+#include "intel_runtime_pm.h"
+#include <linux/pm_runtime.h>
+
+static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
+{
+	return container_of(dev, struct drm_i915_private, drm);
+}
+
+static inline struct drm_i915_private *kdev_to_i915(struct device *kdev)
+{
+	return dev_get_drvdata(kdev);
+}
+
+
+#define INTEL_JASPERLAKE 0
+#define INTEL_ELKHARTLAKE 0
+#define IS_PLATFORM(xe, x) ((xe)->info.platform == x)
+#define INTEL_INFO(dev_priv)	(&((dev_priv)->info))
+#define INTEL_DEVID(dev_priv)	((dev_priv)->info.devid)
+#define IS_I830(dev_priv)	(dev_priv && 0)
+#define IS_I845G(dev_priv)	(dev_priv && 0)
+#define IS_I85X(dev_priv)	(dev_priv && 0)
+#define IS_I865G(dev_priv)	(dev_priv && 0)
+#define IS_I915G(dev_priv)	(dev_priv && 0)
+#define IS_I915GM(dev_priv)	(dev_priv && 0)
+#define IS_I945G(dev_priv)	(dev_priv && 0)
+#define IS_I945GM(dev_priv)	(dev_priv && 0)
+#define IS_I965G(dev_priv)	(dev_priv && 0)
+#define IS_I965GM(dev_priv)	(dev_priv && 0)
+#define IS_G45(dev_priv)	(dev_priv && 0)
+#define IS_GM45(dev_priv)	(dev_priv && 0)
+#define IS_G4X(dev_priv)	(dev_priv && 0)
+#define IS_PINEVIEW(dev_priv)	(dev_priv && 0)
+#define IS_G33(dev_priv)	(dev_priv && 0)
+#define IS_IRONLAKE(dev_priv)	(dev_priv && 0)
+#define IS_IRONLAKE_M(dev_priv) (dev_priv && 0)
+#define IS_SANDYBRIDGE(dev_priv)	(dev_priv && 0)
+#define IS_IVYBRIDGE(dev_priv)	(dev_priv && 0)
+#define IS_IVB_GT1(dev_priv)	(dev_priv && 0)
+#define IS_VALLEYVIEW(dev_priv)	(dev_priv && 0)
+#define IS_CHERRYVIEW(dev_priv)	(dev_priv && 0)
+#define IS_HASWELL(dev_priv)	(dev_priv && 0)
+#define IS_BROADWELL(dev_priv)	(dev_priv && 0)
+#define IS_SKYLAKE(dev_priv)	(dev_priv && 0)
+#define IS_BROXTON(dev_priv)	(dev_priv && 0)
+#define IS_KABYLAKE(dev_priv)	(dev_priv && 0)
+#define IS_GEMINILAKE(dev_priv)	(dev_priv && 0)
+#define IS_COFFEELAKE(dev_priv)	(dev_priv && 0)
+#define IS_COMETLAKE(dev_priv)	(dev_priv && 0)
+#define IS_ICELAKE(dev_priv)	(dev_priv && 0)
+#define IS_JASPERLAKE(dev_priv)	(dev_priv && 0)
+#define IS_ELKHARTLAKE(dev_priv)	(dev_priv && 0)
+#define IS_TIGERLAKE(dev_priv)	IS_PLATFORM(dev_priv, XE_TIGERLAKE)
+#define IS_ROCKETLAKE(dev_priv)	IS_PLATFORM(dev_priv, XE_ROCKETLAKE)
+#define IS_DG1(dev_priv)        IS_PLATFORM(dev_priv, XE_DG1)
+#define IS_ALDERLAKE_S(dev_priv) IS_PLATFORM(dev_priv, XE_ALDERLAKE_S)
+#define IS_ALDERLAKE_P(dev_priv) IS_PLATFORM(dev_priv, XE_ALDERLAKE_P)
+#define IS_XEHPSDV(dev_priv) (dev_priv && 0)
+#define IS_DG2(dev_priv)	IS_PLATFORM(dev_priv, XE_DG2)
+#define IS_PONTEVECCHIO(dev_priv) IS_PLATFORM(dev_priv, XE_PVC)
+#define IS_METEORLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_METEORLAKE)
+#define IS_LUNARLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_LUNARLAKE)
+
+#define IS_HASWELL_ULT(dev_priv) (dev_priv && 0)
+#define IS_BROADWELL_ULT(dev_priv) (dev_priv && 0)
+#define IS_BROADWELL_ULX(dev_priv) (dev_priv && 0)
+
+#define IP_VER(ver, rel)                ((ver) << 8 | (rel))
+
+#define INTEL_DISPLAY_ENABLED(xe) (HAS_DISPLAY((xe)) && !intel_opregion_headless_sku((xe)))
+
+#define IS_GRAPHICS_VER(xe, first, last) \
+	((xe)->info.graphics_verx100 >= first * 100 && \
+	 (xe)->info.graphics_verx100 <= (last*100 + 99))
+#define IS_MOBILE(xe) (xe && 0)
+#define HAS_LLC(xe) (!IS_DGFX((xe)))
+
+#define HAS_GMD_ID(xe) GRAPHICS_VERx100(xe) >= 1270
+
+/* Workarounds not handled yet */
+#define IS_DISPLAY_STEP(xe, first, last) ({u8 __step = (xe)->info.step.display; first <= __step && __step <= last; })
+#define IS_GRAPHICS_STEP(xe, first, last) ({u8 __step = (xe)->info.step.graphics; first <= __step && __step <= last; })
+
+#define IS_LP(xe) (0)
+#define IS_GEN9_LP(xe) (0)
+#define IS_GEN9_BC(xe) (0)
+
+#define IS_TIGERLAKE_UY(xe) (xe && 0)
+#define IS_COMETLAKE_ULX(xe) (xe && 0)
+#define IS_COFFEELAKE_ULX(xe) (xe && 0)
+#define IS_KABYLAKE_ULX(xe) (xe && 0)
+#define IS_SKYLAKE_ULX(xe) (xe && 0)
+#define IS_HASWELL_ULX(xe) (xe && 0)
+#define IS_COMETLAKE_ULT(xe) (xe && 0)
+#define IS_COFFEELAKE_ULT(xe) (xe && 0)
+#define IS_KABYLAKE_ULT(xe) (xe && 0)
+#define IS_SKYLAKE_ULT(xe) (xe && 0)
+
+#define IS_DG1_GRAPHICS_STEP(xe, first, last) (IS_DG1(xe) && IS_GRAPHICS_STEP(xe, first, last))
+#define IS_DG2_GRAPHICS_STEP(xe, variant, first, last) \
+	((xe)->info.subplatform == XE_SUBPLATFORM_DG2_ ## variant && \
+	 IS_GRAPHICS_STEP(xe, first, last))
+#define IS_XEHPSDV_GRAPHICS_STEP(xe, first, last) (IS_XEHPSDV(xe) && IS_GRAPHICS_STEP(xe, first, last))
+
+/* XXX: No basedie stepping support yet */
+#define IS_PVC_BD_STEP(xe, first, last) (!WARN_ON(1) && IS_PONTEVECCHIO(xe))
+
+#define IS_TIGERLAKE_DISPLAY_STEP(xe, first, last) (IS_TIGERLAKE(xe) && IS_DISPLAY_STEP(xe, first, last))
+#define IS_ROCKETLAKE_DISPLAY_STEP(xe, first, last) (IS_ROCKETLAKE(xe) && IS_DISPLAY_STEP(xe, first, last))
+#define IS_DG1_DISPLAY_STEP(xe, first, last) (IS_DG1(xe) && IS_DISPLAY_STEP(xe, first, last))
+#define IS_DG2_DISPLAY_STEP(xe, first, last) (IS_DG2(xe) && IS_DISPLAY_STEP(xe, first, last))
+#define IS_ADLP_DISPLAY_STEP(xe, first, last) (IS_ALDERLAKE_P(xe) && IS_DISPLAY_STEP(xe, first, last))
+#define IS_ADLS_DISPLAY_STEP(xe, first, last) (IS_ALDERLAKE_S(xe) && IS_DISPLAY_STEP(xe, first, last))
+#define IS_JSL_EHL_DISPLAY_STEP(xe, first, last) (IS_JSL_EHL(xe) && IS_DISPLAY_STEP(xe, first, last))
+#define IS_MTL_DISPLAY_STEP(xe, first, last) (IS_METEORLAKE(xe) && IS_DISPLAY_STEP(xe, first, last))
+
+/* FIXME: Add subplatform here */
+#define IS_MTL_GRAPHICS_STEP(xe, sub, first, last) (IS_METEORLAKE(xe) && IS_DISPLAY_STEP(xe, first, last))
+
+#define IS_DG2_G10(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G10)
+#define IS_DG2_G11(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G11)
+#define IS_DG2_G12(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G12)
+#define IS_RAPTORLAKE_U(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_ALDERLAKE_P_RPLU)
+#define IS_ICL_WITH_PORT_F(xe) (xe && 0)
+#define HAS_FLAT_CCS(xe) (xe_device_has_flat_ccs(xe))
+#define to_intel_bo(x) gem_to_xe_bo((x))
+#define mkwrite_device_info(xe) (INTEL_INFO(xe))
+
+#define HAS_128_BYTE_Y_TILING(xe) (xe || 1)
+
+#define intel_has_gpu_reset(a) (a && 0)
+
+#include "intel_wakeref.h"
+
+static inline bool intel_runtime_pm_get(struct xe_runtime_pm *pm)
+{
+	struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm);
+
+	if (xe_pm_runtime_get(xe) < 0) {
+		xe_pm_runtime_put(xe);
+		return false;
+	}
+	return true;
+}
+
+static inline bool intel_runtime_pm_get_if_in_use(struct xe_runtime_pm *pm)
+{
+	struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm);
+
+	return xe_pm_runtime_get_if_active(xe);
+}
+
+static inline void intel_runtime_pm_put_unchecked(struct xe_runtime_pm *pm)
+{
+	struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm);
+
+	xe_pm_runtime_put(xe);
+}
+
+static inline void intel_runtime_pm_put(struct xe_runtime_pm *pm, bool wakeref)
+{
+	if (wakeref)
+		intel_runtime_pm_put_unchecked(pm);
+}
+
+#define intel_runtime_pm_get_raw intel_runtime_pm_get
+#define intel_runtime_pm_put_raw intel_runtime_pm_put
+#define assert_rpm_wakelock_held(x) do { } while (0)
+#define assert_rpm_raw_wakeref_held(x) do { } while (0)
+
+#define intel_uncore_forcewake_get(x, y) do { } while (0)
+#define intel_uncore_forcewake_put(x, y) do { } while (0)
+
+#define intel_uncore_arm_unclaimed_mmio_detection(x) do { } while (0)
+
+#define I915_PRIORITY_DISPLAY 0
+struct i915_sched_attr {
+	int priority;
+};
+#define i915_gem_fence_wait_priority(fence, attr) do { (void) attr; } while (0)
+
+#define with_intel_runtime_pm(rpm, wf) \
+	for ((wf) = intel_runtime_pm_get(rpm); (wf); \
+	     intel_runtime_pm_put((rpm), (wf)), (wf) = 0)
+
+#define pdev_to_i915 pdev_to_xe_device
+#define RUNTIME_INFO(xe)		(&(xe)->info.i915_runtime)
+
+#define FORCEWAKE_ALL XE_FORCEWAKE_ALL
+#define HPD_STORM_DEFAULT_THRESHOLD 50
+
+#ifdef CONFIG_ARM64
+/*
+ * arm64 indirectly includes linux/rtc.h,
+ * which defines a irq_lock, so include it
+ * here before #define-ing it
+ */
+#include <linux/rtc.h>
+#endif
+
+#define irq_lock irq.lock
+
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_fixed.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_fixed.h
new file mode 100644
index 000000000000..12c671fd5235
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_fixed.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "../../i915/i915_fixed.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_gem.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem.h
new file mode 100644
index 000000000000..06b723a479c5
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __I915_GEM_H__
+#define __I915_GEM_H__
+#define GEM_BUG_ON
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h
new file mode 100644
index 000000000000..888e7a87a925
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h
@@ -0,0 +1,79 @@
+#ifndef _I915_GEM_STOLEN_H_
+#define _I915_GEM_STOLEN_H_
+
+#include "xe_ttm_stolen_mgr.h"
+#include "xe_res_cursor.h"
+
+struct xe_bo;
+
+struct i915_stolen_fb {
+	struct xe_bo *bo;
+};
+
+static inline int i915_gem_stolen_insert_node_in_range(struct xe_device *xe,
+						       struct i915_stolen_fb *fb,
+						       u32 size, u32 align,
+						       u32 start, u32 end)
+{
+	struct xe_bo *bo;
+	int err;
+	u32 flags = XE_BO_CREATE_PINNED_BIT | XE_BO_CREATE_STOLEN_BIT;
+
+	bo = xe_bo_create_locked_range(xe, xe_device_get_root_tile(xe),
+				       NULL, size, start, end,
+				       ttm_bo_type_kernel, flags);
+	if (IS_ERR(bo)) {
+		err = PTR_ERR(bo);
+		bo = NULL;
+		return err;
+	}
+	err = xe_bo_pin(bo);
+	xe_bo_unlock_vm_held(bo);
+
+	if (err) {
+		xe_bo_put(fb->bo);
+		bo = NULL;
+	}
+
+	fb->bo = bo;
+
+	return err;
+}
+
+static inline int i915_gem_stolen_insert_node(struct xe_device *xe,
+					      struct i915_stolen_fb *fb,
+					      u32 size, u32 align)
+{
+	/* Not used on xe */
+	BUG_ON(1);
+	return -ENODEV;
+}
+
+static inline void i915_gem_stolen_remove_node(struct xe_device *xe,
+					       struct i915_stolen_fb *fb)
+{
+	xe_bo_unpin_map_no_vm(fb->bo);
+	fb->bo = NULL;
+}
+
+#define i915_gem_stolen_initialized(xe) (!!ttm_manager_type(&(xe)->ttm, XE_PL_STOLEN))
+#define i915_gem_stolen_node_allocated(fb) (!!((fb)->bo))
+
+static inline u32 i915_gem_stolen_node_offset(struct i915_stolen_fb *fb)
+{
+	struct xe_res_cursor res;
+
+	xe_res_first(fb->bo->ttm.resource, 0, 4096, &res);
+	return res.start;
+}
+
+/* Used for < gen4. These are not supported by Xe */
+#define i915_gem_stolen_area_address(xe) (!WARN_ON(1))
+/* Used for gen9 specific WA. Gen9 is not supported by Xe */
+#define i915_gem_stolen_area_size(xe) (!WARN_ON(1))
+
+#define i915_gem_stolen_node_address(xe, fb) (xe_ttm_stolen_gpu_offset(xe) + \
+					 i915_gem_stolen_node_offset(fb))
+#define i915_gem_stolen_node_size(fb) ((u64)((fb)->bo->ttm.base.size))
+
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_gpu_error.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_gpu_error.h
new file mode 100644
index 000000000000..98e9dd78f670
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_gpu_error.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _I915_GPU_ERROR_H_
+#define _I915_GPU_ERROR_H_
+
+struct drm_i915_error_state_buf;
+
+__printf(2, 3)
+static inline void
+i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
+{
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_irq.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_irq.h
new file mode 100644
index 000000000000..61707a07f91f
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_irq.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "../../i915/i915_irq.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_reg.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_reg.h
new file mode 100644
index 000000000000..8619ec015ad4
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_reg.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "../../i915/i915_reg.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_reg_defs.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_reg_defs.h
new file mode 100644
index 000000000000..723279c975b1
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_reg_defs.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "../../i915/i915_reg_defs.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_trace.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_trace.h
new file mode 100644
index 000000000000..d429d421ac70
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_trace.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#define trace_i915_reg_rw(a...) do { } while (0)
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_utils.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_utils.h
new file mode 100644
index 000000000000..1d7c4360e5c0
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_utils.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "../../i915/i915_utils.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_vgpu.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_vgpu.h
new file mode 100644
index 000000000000..80b024d435dc
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_vgpu.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _I915_VGPU_H_
+#define _I915_VGPU_H_
+
+#include <linux/types.h>
+
+struct drm_i915_private;
+struct i915_ggtt;
+
+static inline void intel_vgpu_detect(struct drm_i915_private *i915)
+{
+}
+static inline bool intel_vgpu_active(struct drm_i915_private *i915)
+{
+	return false;
+}
+static inline void intel_vgpu_register(struct drm_i915_private *i915)
+{
+}
+static inline bool intel_vgpu_has_full_ppgtt(struct drm_i915_private *i915)
+{
+	return false;
+}
+static inline bool intel_vgpu_has_hwsp_emulation(struct drm_i915_private *i915)
+{
+	return false;
+}
+static inline bool intel_vgpu_has_huge_gtt(struct drm_i915_private *i915)
+{
+	return false;
+}
+static inline int intel_vgt_balloon(struct i915_ggtt *ggtt)
+{
+	return 0;
+}
+static inline void intel_vgt_deballoon(struct i915_ggtt *ggtt)
+{
+}
+
+#endif /* _I915_VGPU_H_ */
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h
new file mode 100644
index 000000000000..a20d2638ea7a
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef I915_VMA_H
+#define I915_VMA_H
+
+#include <uapi/drm/i915_drm.h>
+#include <drm/drm_mm.h>
+
+/* We don't want these from i915_drm.h in case of Xe */
+#undef I915_TILING_X
+#undef I915_TILING_Y
+#define I915_TILING_X 0
+#define I915_TILING_Y 0
+
+struct xe_bo;
+
+struct i915_vma {
+	struct xe_bo *bo, *dpt;
+	struct drm_mm_node node;
+};
+
+#define i915_ggtt_clear_scanout(bo) do { } while (0)
+
+#define i915_vma_fence_id(vma) -1
+
+static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
+{
+	return vma->node.start;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma_types.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma_types.h
new file mode 100644
index 000000000000..e7aaf50f5485
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma_types.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/types.h>
+#include <linux/build_bug.h>
+
+/* XX: Figure out how to handle this vma mapping in xe */
+struct intel_remapped_plane_info {
+	/* in gtt pages */
+	u32 offset:31;
+	u32 linear:1;
+	union {
+		/* in gtt pages for !linear */
+		struct {
+			u16 width;
+			u16 height;
+			u16 src_stride;
+			u16 dst_stride;
+		};
+
+		/* in gtt pages for linear */
+		u32 size;
+	};
+} __packed;
+
+struct intel_remapped_info {
+	struct intel_remapped_plane_info plane[4];
+	/* in gtt pages */
+	u32 plane_alignment;
+} __packed;
+
+struct intel_rotation_info {
+	struct intel_remapped_plane_info plane[2];
+} __packed;
+
+enum i915_gtt_view_type {
+	I915_GTT_VIEW_NORMAL = 0,
+	I915_GTT_VIEW_ROTATED = sizeof(struct intel_rotation_info),
+	I915_GTT_VIEW_REMAPPED = sizeof(struct intel_remapped_info),
+};
+
+static inline void assert_i915_gem_gtt_types(void)
+{
+	BUILD_BUG_ON(sizeof(struct intel_rotation_info) != 2 * sizeof(u32) + 8 * sizeof(u16));
+	BUILD_BUG_ON(sizeof(struct intel_remapped_info) != 5 * sizeof(u32) + 16 * sizeof(u16));
+
+	/* Check that rotation/remapped shares offsets for simplicity */
+	BUILD_BUG_ON(offsetof(struct intel_remapped_info, plane[0]) !=
+		     offsetof(struct intel_rotation_info, plane[0]));
+	BUILD_BUG_ON(offsetofend(struct intel_remapped_info, plane[1]) !=
+		     offsetofend(struct intel_rotation_info, plane[1]));
+
+	/* As we encode the size of each branch inside the union into its type,
+	 * we have to be careful that each branch has a unique size.
+	 */
+	switch ((enum i915_gtt_view_type)0) {
+	case I915_GTT_VIEW_NORMAL:
+	case I915_GTT_VIEW_ROTATED:
+	case I915_GTT_VIEW_REMAPPED:
+		/* gcc complains if these are identical cases */
+		break;
+	}
+}
+
+struct i915_gtt_view {
+	enum i915_gtt_view_type type;
+	union {
+		/* Members need to contain no holes/padding */
+		struct intel_rotation_info rotated;
+		struct intel_remapped_info remapped;
+	};
+};
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_clock_gating.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_clock_gating.h
new file mode 100644
index 000000000000..ce986f0e8f38
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_clock_gating.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "../../i915/intel_clock_gating.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_gt_types.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_gt_types.h
new file mode 100644
index 000000000000..c15806d6c4f7
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_gt_types.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __INTEL_GT_TYPES__
+#define __INTEL_GT_TYPES__
+
+#define intel_gt_support_legacy_fencing(gt) 0
+
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_mchbar_regs.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_mchbar_regs.h
new file mode 100644
index 000000000000..55b316985340
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_mchbar_regs.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "../../i915/intel_mchbar_regs.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_pci_config.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_pci_config.h
new file mode 100644
index 000000000000..8c15867fd613
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_pci_config.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "../../i915/intel_pci_config.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h
new file mode 100644
index 000000000000..0c47661bdc6a
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __INTEL_PCODE_H__
+#define __INTEL_PCODE_H__
+
+#include "intel_uncore.h"
+#include "xe_pcode.h"
+
+static inline int
+snb_pcode_write_timeout(struct intel_uncore *uncore, u32 mbox, u32 val,
+			int fast_timeout_us, int slow_timeout_ms)
+{
+	return xe_pcode_write_timeout(__compat_uncore_to_gt(uncore), mbox, val,
+				      slow_timeout_ms ?: 1);
+}
+
+static inline int
+snb_pcode_write(struct intel_uncore *uncore, u32 mbox, u32 val)
+{
+
+	return xe_pcode_write(__compat_uncore_to_gt(uncore), mbox, val);
+}
+
+static inline int
+snb_pcode_read(struct intel_uncore *uncore, u32 mbox, u32 *val, u32 *val1)
+{
+	return xe_pcode_read(__compat_uncore_to_gt(uncore), mbox, val, val1);
+}
+
+static inline int
+skl_pcode_request(struct intel_uncore *uncore, u32 mbox,
+		  u32 request, u32 reply_mask, u32 reply,
+		  int timeout_base_ms)
+{
+	return xe_pcode_request(__compat_uncore_to_gt(uncore), mbox, request, reply_mask, reply,
+				timeout_base_ms);
+}
+
+#endif /* __INTEL_PCODE_H__ */
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h
new file mode 100644
index 000000000000..89da3cc62f39
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "intel_wakeref.h"
+
+#define intel_runtime_pm xe_runtime_pm
+
+static inline void disable_rpm_wakeref_asserts(void *rpm)
+{
+}
+
+static inline void enable_rpm_wakeref_asserts(void *rpm)
+{
+}
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_step.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_step.h
new file mode 100644
index 000000000000..0006ef812346
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_step.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __INTEL_STEP_H__
+#define __INTEL_STEP_H__
+
+#include "xe_device_types.h"
+#include "xe_step.h"
+
+#define intel_display_step_name xe_display_step_name
+
+static inline
+const char *xe_display_step_name(struct xe_device *xe)
+{
+	return xe_step_name(xe->info.step.display);
+}
+
+#endif /* __INTEL_STEP_H__ */
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_uc_fw.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_uc_fw.h
new file mode 100644
index 000000000000..009745328992
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_uc_fw.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _INTEL_UC_FW_H_
+#define _INTEL_UC_FW_H_
+
+#define INTEL_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git"
+
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h
new file mode 100644
index 000000000000..cd26ddc0f69e
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h
@@ -0,0 +1,175 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __INTEL_UNCORE_H__
+#define __INTEL_UNCORE_H__
+
+#include "xe_device.h"
+#include "xe_device_types.h"
+#include "xe_mmio.h"
+
+static inline struct xe_gt *__compat_uncore_to_gt(struct intel_uncore *uncore)
+{
+	struct xe_device *xe = container_of(uncore, struct xe_device, uncore);
+
+	return xe_root_mmio_gt(xe);
+}
+
+static inline u32 intel_uncore_read(struct intel_uncore *uncore,
+				    i915_reg_t i915_reg)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	return xe_mmio_read32(__compat_uncore_to_gt(uncore), reg);
+}
+
+static inline u32 intel_uncore_read8(struct intel_uncore *uncore,
+				     i915_reg_t i915_reg)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	return xe_mmio_read8(__compat_uncore_to_gt(uncore), reg);
+}
+
+static inline u32 intel_uncore_read16(struct intel_uncore *uncore,
+				      i915_reg_t i915_reg)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	return xe_mmio_read16(__compat_uncore_to_gt(uncore), reg);
+}
+
+static inline u64
+intel_uncore_read64_2x32(struct intel_uncore *uncore,
+			 i915_reg_t i915_lower_reg, i915_reg_t i915_upper_reg)
+{
+	struct xe_reg lower_reg = XE_REG(i915_mmio_reg_offset(i915_lower_reg));
+	struct xe_reg upper_reg = XE_REG(i915_mmio_reg_offset(i915_upper_reg));
+	u32 upper, lower, old_upper;
+	int loop = 0;
+
+	upper = xe_mmio_read32(__compat_uncore_to_gt(uncore), upper_reg);
+	do {
+		old_upper = upper;
+		lower = xe_mmio_read32(__compat_uncore_to_gt(uncore), lower_reg);
+		upper = xe_mmio_read32(__compat_uncore_to_gt(uncore), upper_reg);
+	} while (upper != old_upper && loop++ < 2);
+
+	return (u64)upper << 32 | lower;
+}
+
+static inline void intel_uncore_posting_read(struct intel_uncore *uncore,
+					     i915_reg_t i915_reg)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	xe_mmio_read32(__compat_uncore_to_gt(uncore), reg);
+}
+
+static inline void intel_uncore_write(struct intel_uncore *uncore,
+				      i915_reg_t i915_reg, u32 val)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	xe_mmio_write32(__compat_uncore_to_gt(uncore), reg, val);
+}
+
+static inline u32 intel_uncore_rmw(struct intel_uncore *uncore,
+				   i915_reg_t i915_reg, u32 clear, u32 set)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	return xe_mmio_rmw32(__compat_uncore_to_gt(uncore), reg, clear, set);
+}
+
+static inline int intel_wait_for_register(struct intel_uncore *uncore,
+					  i915_reg_t i915_reg, u32 mask,
+					  u32 value, unsigned int timeout)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	return xe_mmio_wait32(__compat_uncore_to_gt(uncore), reg, mask, value,
+			      timeout * USEC_PER_MSEC, NULL, false);
+}
+
+static inline int intel_wait_for_register_fw(struct intel_uncore *uncore,
+					     i915_reg_t i915_reg, u32 mask,
+					     u32 value, unsigned int timeout)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	return xe_mmio_wait32(__compat_uncore_to_gt(uncore), reg, mask, value,
+			      timeout * USEC_PER_MSEC, NULL, false);
+}
+
+static inline int
+__intel_wait_for_register(struct intel_uncore *uncore, i915_reg_t i915_reg,
+			  u32 mask, u32 value, unsigned int fast_timeout_us,
+			  unsigned int slow_timeout_ms, u32 *out_value)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	return xe_mmio_wait32(__compat_uncore_to_gt(uncore), reg, mask, value,
+			      fast_timeout_us + 1000 * slow_timeout_ms,
+			      out_value, false);
+}
+
+static inline u32 intel_uncore_read_fw(struct intel_uncore *uncore,
+				       i915_reg_t i915_reg)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	return xe_mmio_read32(__compat_uncore_to_gt(uncore), reg);
+}
+
+static inline void intel_uncore_write_fw(struct intel_uncore *uncore,
+					 i915_reg_t i915_reg, u32 val)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	xe_mmio_write32(__compat_uncore_to_gt(uncore), reg, val);
+}
+
+static inline u32 intel_uncore_read_notrace(struct intel_uncore *uncore,
+					    i915_reg_t i915_reg)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	return xe_mmio_read32(__compat_uncore_to_gt(uncore), reg);
+}
+
+static inline void intel_uncore_write_notrace(struct intel_uncore *uncore,
+					      i915_reg_t i915_reg, u32 val)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	xe_mmio_write32(__compat_uncore_to_gt(uncore), reg, val);
+}
+
+static inline void __iomem *intel_uncore_regs(struct intel_uncore *uncore)
+{
+	struct xe_device *xe = container_of(uncore, struct xe_device, uncore);
+
+	return xe_device_get_root_tile(xe)->mmio.regs;
+}
+
+/*
+ * The raw_reg_{read,write} macros are intended as a micro-optimization for
+ * interrupt handlers so that the pointer indirection on uncore->regs can
+ * be computed once (and presumably cached in a register) instead of generating
+ * extra load instructions for each MMIO access.
+ *
+ * Given that these macros are only intended for non-GSI interrupt registers
+ * (and the goal is to avoid extra instructions generated by the compiler),
+ * these macros do not account for uncore->gsi_offset.  Any caller that needs
+ * to use these macros on a GSI register is responsible for adding the
+ * appropriate GSI offset to the 'base' parameter.
+ */
+#define raw_reg_read(base, reg) \
+	readl(base + i915_mmio_reg_offset(reg))
+#define raw_reg_write(base, reg, value) \
+	writel(value, base + i915_mmio_reg_offset(reg))
+
+#endif /* __INTEL_UNCORE_H__ */
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h
new file mode 100644
index 000000000000..ecb1c0707706
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/types.h>
+
+typedef unsigned long intel_wakeref_t;
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/pxp/intel_pxp.h b/drivers/gpu/drm/xe/compat-i915-headers/pxp/intel_pxp.h
new file mode 100644
index 000000000000..c2c30ece8f77
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/pxp/intel_pxp.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __INTEL_PXP_H__
+#define __INTEL_PXP_H__
+
+#include <linux/errno.h>
+#include <linux/types.h>
+
+struct drm_i915_gem_object;
+struct intel_pxp;
+
+static inline int intel_pxp_key_check(struct intel_pxp *pxp,
+				      struct drm_i915_gem_object *obj,
+				      bool assign)
+{
+	return -ENODEV;
+}
+
+static inline bool
+i915_gem_object_is_protected(const struct drm_i915_gem_object *obj)
+{
+	return false;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_dram.h b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_dram.h
new file mode 100644
index 000000000000..65707e20c557
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_dram.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "../../../i915/soc/intel_dram.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_gmch.h b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_gmch.h
new file mode 100644
index 000000000000..33c5257b3a71
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_gmch.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "../../../i915/soc/intel_gmch.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_pch.h b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_pch.h
new file mode 100644
index 000000000000..9c46556d33a4
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_pch.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "../../../i915/soc/intel_pch.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband.h b/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband.h
new file mode 100644
index 000000000000..ec6f12de5727
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband.h
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2013-2021 Intel Corporation
+ */
+
+#ifndef _VLV_SIDEBAND_H_
+#define _VLV_SIDEBAND_H_
+
+#include <linux/types.h>
+
+#include "vlv_sideband_reg.h"
+
+enum pipe;
+struct drm_i915_private;
+
+enum {
+	VLV_IOSF_SB_BUNIT,
+	VLV_IOSF_SB_CCK,
+	VLV_IOSF_SB_CCU,
+	VLV_IOSF_SB_DPIO,
+	VLV_IOSF_SB_FLISDSI,
+	VLV_IOSF_SB_GPIO,
+	VLV_IOSF_SB_NC,
+	VLV_IOSF_SB_PUNIT,
+};
+
+static inline void vlv_iosf_sb_get(struct drm_i915_private *i915, unsigned long ports)
+{
+}
+static inline u32 vlv_iosf_sb_read(struct drm_i915_private *i915, u8 port, u32 reg)
+{
+	return 0;
+}
+static inline void vlv_iosf_sb_write(struct drm_i915_private *i915,
+				     u8 port, u32 reg, u32 val)
+{
+}
+static inline void vlv_iosf_sb_put(struct drm_i915_private *i915, unsigned long ports)
+{
+}
+static inline void vlv_bunit_get(struct drm_i915_private *i915)
+{
+}
+static inline u32 vlv_bunit_read(struct drm_i915_private *i915, u32 reg)
+{
+	return 0;
+}
+static inline void vlv_bunit_write(struct drm_i915_private *i915, u32 reg, u32 val)
+{
+}
+static inline void vlv_bunit_put(struct drm_i915_private *i915)
+{
+}
+static inline void vlv_cck_get(struct drm_i915_private *i915)
+{
+}
+static inline u32 vlv_cck_read(struct drm_i915_private *i915, u32 reg)
+{
+	return 0;
+}
+static inline void vlv_cck_write(struct drm_i915_private *i915, u32 reg, u32 val)
+{
+}
+static inline void vlv_cck_put(struct drm_i915_private *i915)
+{
+}
+static inline void vlv_ccu_get(struct drm_i915_private *i915)
+{
+}
+static inline u32 vlv_ccu_read(struct drm_i915_private *i915, u32 reg)
+{
+	return 0;
+}
+static inline void vlv_ccu_write(struct drm_i915_private *i915, u32 reg, u32 val)
+{
+}
+static inline void vlv_ccu_put(struct drm_i915_private *i915)
+{
+}
+static inline void vlv_dpio_get(struct drm_i915_private *i915)
+{
+}
+static inline u32 vlv_dpio_read(struct drm_i915_private *i915, int pipe, int reg)
+{
+	return 0;
+}
+static inline void vlv_dpio_write(struct drm_i915_private *i915,
+				  int pipe, int reg, u32 val)
+{
+}
+static inline void vlv_dpio_put(struct drm_i915_private *i915)
+{
+}
+static inline void vlv_flisdsi_get(struct drm_i915_private *i915)
+{
+}
+static inline u32 vlv_flisdsi_read(struct drm_i915_private *i915, u32 reg)
+{
+	return 0;
+}
+static inline void vlv_flisdsi_write(struct drm_i915_private *i915, u32 reg, u32 val)
+{
+}
+static inline void vlv_flisdsi_put(struct drm_i915_private *i915)
+{
+}
+static inline void vlv_nc_get(struct drm_i915_private *i915)
+{
+}
+static inline u32 vlv_nc_read(struct drm_i915_private *i915, u8 addr)
+{
+	return 0;
+}
+static inline void vlv_nc_put(struct drm_i915_private *i915)
+{
+}
+static inline void vlv_punit_get(struct drm_i915_private *i915)
+{
+}
+static inline u32 vlv_punit_read(struct drm_i915_private *i915, u32 addr)
+{
+	return 0;
+}
+static inline int vlv_punit_write(struct drm_i915_private *i915, u32 addr, u32 val)
+{
+	return 0;
+}
+static inline void vlv_punit_put(struct drm_i915_private *i915)
+{
+}
+
+#endif /* _VLV_SIDEBAND_H_ */
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband_reg.h b/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband_reg.h
new file mode 100644
index 000000000000..949f134ce3cf
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband_reg.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "../../i915/vlv_sideband_reg.h"
diff --git a/drivers/gpu/drm/xe/display/ext/i915_irq.c b/drivers/gpu/drm/xe/display/ext/i915_irq.c
new file mode 100644
index 000000000000..bee191a4a97d
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/ext/i915_irq.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "i915_irq.h"
+#include "i915_reg.h"
+#include "intel_uncore.h"
+
+void gen3_irq_reset(struct intel_uncore *uncore, i915_reg_t imr,
+		    i915_reg_t iir, i915_reg_t ier)
+{
+	intel_uncore_write(uncore, imr, 0xffffffff);
+	intel_uncore_posting_read(uncore, imr);
+
+	intel_uncore_write(uncore, ier, 0);
+
+	/* IIR can theoretically queue up two events. Be paranoid. */
+	intel_uncore_write(uncore, iir, 0xffffffff);
+	intel_uncore_posting_read(uncore, iir);
+	intel_uncore_write(uncore, iir, 0xffffffff);
+	intel_uncore_posting_read(uncore, iir);
+}
+
+/*
+ * We should clear IMR at preinstall/uninstall, and just check at postinstall.
+ */
+void gen3_assert_iir_is_zero(struct intel_uncore *uncore, i915_reg_t reg)
+{
+	struct xe_device *xe = container_of(uncore, struct xe_device, uncore);
+	u32 val = intel_uncore_read(uncore, reg);
+
+	if (val == 0)
+		return;
+
+	drm_WARN(&xe->drm, 1,
+		 "Interrupt register 0x%x is not zero: 0x%08x\n",
+		 i915_mmio_reg_offset(reg), val);
+	intel_uncore_write(uncore, reg, 0xffffffff);
+	intel_uncore_posting_read(uncore, reg);
+	intel_uncore_write(uncore, reg, 0xffffffff);
+	intel_uncore_posting_read(uncore, reg);
+}
+
+void gen3_irq_init(struct intel_uncore *uncore,
+		   i915_reg_t imr, u32 imr_val,
+		   i915_reg_t ier, u32 ier_val,
+		   i915_reg_t iir)
+{
+	gen3_assert_iir_is_zero(uncore, iir);
+
+	intel_uncore_write(uncore, ier, ier_val);
+	intel_uncore_write(uncore, imr, imr_val);
+	intel_uncore_posting_read(uncore, imr);
+}
+
+bool intel_irqs_enabled(struct xe_device *xe)
+{
+	/*
+	 * XXX: i915 has a racy handling of the irq.enabled, since it doesn't
+	 * lock its transitions. Because of that, the irq.enabled sometimes
+	 * is not read with the irq.lock in place.
+	 * However, the most critical cases like vblank and page flips are
+	 * properly using the locks.
+	 * We cannot take the lock in here or run any kind of assert because
+	 * of i915 inconsistency.
+	 * But at this point the xe irq is better protected against races,
+	 * although the full solution would be protecting the i915 side.
+	 */
+	return xe->irq.enabled;
+}
+
+void intel_synchronize_irq(struct xe_device *xe)
+{
+	synchronize_irq(to_pci_dev(xe->drm.dev)->irq);
+}
diff --git a/drivers/gpu/drm/xe/display/ext/i915_utils.c b/drivers/gpu/drm/xe/display/ext/i915_utils.c
new file mode 100644
index 000000000000..43b10a2cc508
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/ext/i915_utils.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "i915_drv.h"
+
+bool i915_vtd_active(struct drm_i915_private *i915)
+{
+	if (device_iommu_mapped(i915->drm.dev))
+		return true;
+
+	/* Running as a guest, we assume the host is enforcing VT'd */
+	return i915_run_as_guest();
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
+
+/* i915 specific, just put here for shutting it up */
+int __i915_inject_probe_error(struct drm_i915_private *i915, int err,
+			      const char *func, int line)
+{
+	return 0;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/display/intel_fb_bo.c b/drivers/gpu/drm/xe/display/intel_fb_bo.c
new file mode 100644
index 000000000000..b21da7b745a5
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/intel_fb_bo.c
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include <drm/drm_modeset_helper.h>
+
+#include "i915_drv.h"
+#include "intel_display_types.h"
+#include "intel_fb_bo.h"
+
+void intel_fb_bo_framebuffer_fini(struct xe_bo *bo)
+{
+	if (bo->flags & XE_BO_CREATE_PINNED_BIT) {
+		/* Unpin our kernel fb first */
+		xe_bo_lock(bo, false);
+		xe_bo_unpin(bo);
+		xe_bo_unlock(bo);
+	}
+	xe_bo_put(bo);
+}
+
+int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb,
+				 struct xe_bo *bo,
+				 struct drm_mode_fb_cmd2 *mode_cmd)
+{
+	struct drm_i915_private *i915 = to_i915(bo->ttm.base.dev);
+	int ret;
+
+	xe_bo_get(bo);
+
+	ret = ttm_bo_reserve(&bo->ttm, true, false, NULL);
+	if (ret)
+		return ret;
+
+	if (!(bo->flags & XE_BO_SCANOUT_BIT)) {
+		/*
+		 * XE_BO_SCANOUT_BIT should ideally be set at creation, or is
+		 * automatically set when creating FB. We cannot change caching
+		 * mode when the boect is VM_BINDed, so we can only set
+		 * coherency with display when unbound.
+		 */
+		if (XE_IOCTL_DBG(i915, !list_empty(&bo->ttm.base.gpuva.list))) {
+			ttm_bo_unreserve(&bo->ttm);
+			return -EINVAL;
+		}
+		bo->flags |= XE_BO_SCANOUT_BIT;
+	}
+	ttm_bo_unreserve(&bo->ttm);
+
+	return ret;
+}
+
+struct xe_bo *intel_fb_bo_lookup_valid_bo(struct drm_i915_private *i915,
+					  struct drm_file *filp,
+					  const struct drm_mode_fb_cmd2 *mode_cmd)
+{
+	struct drm_i915_gem_object *bo;
+	struct drm_gem_object *gem = drm_gem_object_lookup(filp, mode_cmd->handles[0]);
+
+	if (!gem)
+		return ERR_PTR(-ENOENT);
+
+	bo = gem_to_xe_bo(gem);
+	/* Require vram placement or dma-buf import */
+	if (IS_DGFX(i915) &&
+	    !xe_bo_can_migrate(gem_to_xe_bo(gem), XE_PL_VRAM0) &&
+	    bo->ttm.type != ttm_bo_type_sg) {
+		drm_gem_object_put(gem);
+		return ERR_PTR(-EREMOTE);
+	}
+
+	return bo;
+}
diff --git a/drivers/gpu/drm/xe/display/intel_fb_bo.h b/drivers/gpu/drm/xe/display/intel_fb_bo.h
new file mode 100644
index 000000000000..5d365b925b7a
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/intel_fb_bo.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef __INTEL_FB_BO_H__
+#define __INTEL_FB_BO_H__
+
+struct drm_file;
+struct drm_mode_fb_cmd2;
+struct drm_i915_private;
+struct intel_framebuffer;
+struct xe_bo;
+
+void intel_fb_bo_framebuffer_fini(struct xe_bo *bo);
+int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb,
+				 struct xe_bo *bo,
+				 struct drm_mode_fb_cmd2 *mode_cmd);
+
+struct xe_bo *intel_fb_bo_lookup_valid_bo(struct drm_i915_private *i915,
+					  struct drm_file *filp,
+					  const struct drm_mode_fb_cmd2 *mode_cmd);
+
+#endif
diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c
new file mode 100644
index 000000000000..51ae3561fd0d
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "intel_fbdev_fb.h"
+
+#include <drm/drm_fb_helper.h>
+
+#include "xe_gt.h"
+#include "xe_ttm_stolen_mgr.h"
+
+#include "i915_drv.h"
+#include "intel_display_types.h"
+
+struct drm_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
+			 struct drm_fb_helper_surface_size *sizes)
+{
+	struct drm_framebuffer *fb;
+	struct drm_device *dev = helper->dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_mode_fb_cmd2 mode_cmd = {};
+	struct drm_i915_gem_object *obj;
+	int size;
+
+	/* we don't do packed 24bpp */
+	if (sizes->surface_bpp == 24)
+		sizes->surface_bpp = 32;
+
+	mode_cmd.width = sizes->surface_width;
+	mode_cmd.height = sizes->surface_height;
+
+	mode_cmd.pitches[0] = ALIGN(mode_cmd.width *
+				    DIV_ROUND_UP(sizes->surface_bpp, 8), XE_PAGE_SIZE);
+	mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
+							  sizes->surface_depth);
+
+	size = mode_cmd.pitches[0] * mode_cmd.height;
+	size = PAGE_ALIGN(size);
+	obj = ERR_PTR(-ENODEV);
+
+	if (!IS_DGFX(dev_priv)) {
+		obj = xe_bo_create_pin_map(dev_priv, xe_device_get_root_tile(dev_priv),
+					   NULL, size,
+					   ttm_bo_type_kernel, XE_BO_SCANOUT_BIT |
+					   XE_BO_CREATE_STOLEN_BIT |
+					   XE_BO_CREATE_PINNED_BIT);
+		if (!IS_ERR(obj))
+			drm_info(&dev_priv->drm, "Allocated fbdev into stolen\n");
+		else
+			drm_info(&dev_priv->drm, "Allocated fbdev into stolen failed: %li\n", PTR_ERR(obj));
+	}
+	if (IS_ERR(obj)) {
+		obj = xe_bo_create_pin_map(dev_priv, xe_device_get_root_tile(dev_priv), NULL, size,
+					  ttm_bo_type_kernel, XE_BO_SCANOUT_BIT |
+					  XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(dev_priv)) |
+					  XE_BO_CREATE_PINNED_BIT);
+	}
+
+	if (IS_ERR(obj)) {
+		drm_err(&dev_priv->drm, "failed to allocate framebuffer (%pe)\n", obj);
+		fb = ERR_PTR(-ENOMEM);
+		goto err;
+	}
+
+	fb = intel_framebuffer_create(obj, &mode_cmd);
+	if (IS_ERR(fb)) {
+		xe_bo_unpin_map_no_vm(obj);
+		goto err;
+	}
+
+	drm_gem_object_put(intel_bo_to_drm_bo(obj));
+	return fb;
+
+err:
+	return fb;
+}
+
+int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info,
+			      struct drm_i915_gem_object *obj, struct i915_vma *vma)
+{
+	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
+
+	if (!(obj->flags & XE_BO_CREATE_SYSTEM_BIT)) {
+		if (obj->flags & XE_BO_CREATE_STOLEN_BIT)
+			info->fix.smem_start = xe_ttm_stolen_io_offset(obj, 0);
+		else
+			info->fix.smem_start =
+				pci_resource_start(pdev, 2) +
+				xe_bo_addr(obj, 0, XE_PAGE_SIZE);
+
+		info->fix.smem_len = obj->ttm.base.size;
+	} else {
+		/* XXX: Pure fiction, as the BO may not be physically accessible.. */
+		info->fix.smem_start = 0;
+		info->fix.smem_len = obj->ttm.base.size;
+	}
+	XE_WARN_ON(iosys_map_is_null(&obj->vmap));
+
+	info->screen_base = obj->vmap.vaddr_iomem;
+	info->screen_size = intel_bo_to_drm_bo(obj)->size;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.h b/drivers/gpu/drm/xe/display/intel_fbdev_fb.h
new file mode 100644
index 000000000000..ea186772e0bb
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __INTEL_FBDEV_FB_H__
+#define __INTEL_FBDEV_FB_H__
+
+struct drm_fb_helper;
+struct drm_fb_helper_surface_size;
+struct drm_i915_gem_object;
+struct drm_i915_private;
+struct fb_info;
+struct i915_vma;
+
+struct drm_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
+			 struct drm_fb_helper_surface_size *sizes);
+int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info,
+			      struct drm_i915_gem_object *obj, struct i915_vma *vma);
+
+#endif
diff --git a/drivers/gpu/drm/xe/display/xe_display_misc.c b/drivers/gpu/drm/xe/display/xe_display_misc.c
new file mode 100644
index 000000000000..242c2ef4ca93
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/xe_display_misc.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "intel_display_types.h"
+
+struct pci_dev;
+
+unsigned int intel_gmch_vga_set_decode(struct pci_dev *pdev, bool enable_decode);
+
+unsigned int intel_gmch_vga_set_decode(struct pci_dev *pdev, bool enable_decode)
+{
+	/* ToDo: Implement the actual handling of vga decode */
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/display/xe_display_rps.c b/drivers/gpu/drm/xe/display/xe_display_rps.c
new file mode 100644
index 000000000000..ab21c581c192
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/xe_display_rps.c
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "intel_display_rps.h"
+
+void intel_display_rps_boost_after_vblank(struct drm_crtc *crtc,
+					  struct dma_fence *fence)
+{
+}
+
+void intel_display_rps_mark_interactive(struct drm_i915_private *i915,
+					struct intel_atomic_state *state,
+					bool interactive)
+{
+}
diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
new file mode 100644
index 000000000000..27c2fb1c002a
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023, Intel Corporation.
+ */
+
+#include "i915_drv.h"
+#include "i915_vma.h"
+#include "intel_display_types.h"
+#include "intel_dsb_buffer.h"
+#include "xe_bo.h"
+#include "xe_gt.h"
+
+u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf)
+{
+	return xe_bo_ggtt_addr(dsb_buf->vma->bo);
+}
+
+void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val)
+{
+	iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val);
+}
+
+u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx)
+{
+	return iosys_map_rd(&dsb_buf->vma->bo->vmap, idx * 4, u32);
+}
+
+void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size)
+{
+	WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf->cmd_buf));
+
+	iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val, size);
+}
+
+bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, size_t size)
+{
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+
+	vma = kzalloc(sizeof(*vma), GFP_KERNEL);
+	if (!vma)
+		return false;
+
+	obj = xe_bo_create_pin_map(i915, xe_device_get_root_tile(i915),
+				   NULL, PAGE_ALIGN(size),
+				   ttm_bo_type_kernel,
+				   XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(i915)) |
+				   XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(obj)) {
+		kfree(vma);
+		return false;
+	}
+
+	vma->bo = obj;
+	dsb_buf->vma = vma;
+	dsb_buf->buf_size = size;
+
+	return true;
+}
+
+void intel_dsb_buffer_cleanup(struct intel_dsb_buffer *dsb_buf)
+{
+	xe_bo_unpin_map_no_vm(dsb_buf->vma->bo);
+	kfree(dsb_buf->vma);
+}
+
+void intel_dsb_buffer_flush_map(struct intel_dsb_buffer *dsb_buf)
+{
+	/* TODO: add xe specific flush_map() for dsb buffer object. */
+}
diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c
new file mode 100644
index 000000000000..722c84a56607
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c
@@ -0,0 +1,384 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "intel_display_types.h"
+#include "intel_dpt.h"
+#include "intel_fb.h"
+#include "intel_fb_pin.h"
+#include "xe_ggtt.h"
+#include "xe_gt.h"
+
+#include <drm/ttm/ttm_bo.h>
+
+static void
+write_dpt_rotated(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, u32 bo_ofs,
+		  u32 width, u32 height, u32 src_stride, u32 dst_stride)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+	struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt;
+	u32 column, row;
+
+	/* TODO: Maybe rewrite so we can traverse the bo addresses sequentially,
+	 * by writing dpt/ggtt in a different order?
+	 */
+
+	for (column = 0; column < width; column++) {
+		u32 src_idx = src_stride * (height - 1) + column + bo_ofs;
+
+		for (row = 0; row < height; row++) {
+			u64 pte = ggtt->pt_ops->pte_encode_bo(bo, src_idx * XE_PAGE_SIZE,
+							      xe->pat.idx[XE_CACHE_WB]);
+
+			iosys_map_wr(map, *dpt_ofs, u64, pte);
+			*dpt_ofs += 8;
+			src_idx -= src_stride;
+		}
+
+		/* The DE ignores the PTEs for the padding tiles */
+		*dpt_ofs += (dst_stride - height) * 8;
+	}
+
+	/* Align to next page */
+	*dpt_ofs = ALIGN(*dpt_ofs, 4096);
+}
+
+static void
+write_dpt_remapped(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs,
+		   u32 bo_ofs, u32 width, u32 height, u32 src_stride,
+		   u32 dst_stride)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+	struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt;
+	u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, u16 pat_index)
+		= ggtt->pt_ops->pte_encode_bo;
+	u32 column, row;
+
+	for (row = 0; row < height; row++) {
+		u32 src_idx = src_stride * row + bo_ofs;
+
+		for (column = 0; column < width; column++) {
+			iosys_map_wr(map, *dpt_ofs, u64,
+				     pte_encode_bo(bo, src_idx * XE_PAGE_SIZE,
+				     xe->pat.idx[XE_CACHE_WB]));
+
+			*dpt_ofs += 8;
+			src_idx++;
+		}
+
+		/* The DE ignores the PTEs for the padding tiles */
+		*dpt_ofs += (dst_stride - width) * 8;
+	}
+
+	/* Align to next page */
+	*dpt_ofs = ALIGN(*dpt_ofs, 4096);
+}
+
+static int __xe_pin_fb_vma_dpt(struct intel_framebuffer *fb,
+			       const struct i915_gtt_view *view,
+			       struct i915_vma *vma)
+{
+	struct xe_device *xe = to_xe_device(fb->base.dev);
+	struct xe_tile *tile0 = xe_device_get_root_tile(xe);
+	struct xe_ggtt *ggtt = tile0->mem.ggtt;
+	struct xe_bo *bo = intel_fb_obj(&fb->base), *dpt;
+	u32 dpt_size, size = bo->ttm.base.size;
+
+	if (view->type == I915_GTT_VIEW_NORMAL)
+		dpt_size = ALIGN(size / XE_PAGE_SIZE * 8, XE_PAGE_SIZE);
+	else if (view->type == I915_GTT_VIEW_REMAPPED)
+		dpt_size = ALIGN(intel_remapped_info_size(&fb->remapped_view.gtt.remapped) * 8,
+				 XE_PAGE_SIZE);
+	else
+		/* display uses 4K tiles instead of bytes here, convert to entries.. */
+		dpt_size = ALIGN(intel_rotation_info_size(&view->rotated) * 8,
+				 XE_PAGE_SIZE);
+
+	if (IS_DGFX(xe))
+		dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size,
+					   ttm_bo_type_kernel,
+					   XE_BO_CREATE_VRAM0_BIT |
+					   XE_BO_CREATE_GGTT_BIT);
+	else
+		dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size,
+					   ttm_bo_type_kernel,
+					   XE_BO_CREATE_STOLEN_BIT |
+					   XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(dpt))
+		dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size,
+					   ttm_bo_type_kernel,
+					   XE_BO_CREATE_SYSTEM_BIT |
+					   XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(dpt))
+		return PTR_ERR(dpt);
+
+	if (view->type == I915_GTT_VIEW_NORMAL) {
+		u32 x;
+
+		for (x = 0; x < size / XE_PAGE_SIZE; x++) {
+			u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x * XE_PAGE_SIZE,
+							      xe->pat.idx[XE_CACHE_WB]);
+
+			iosys_map_wr(&dpt->vmap, x * 8, u64, pte);
+		}
+	} else if (view->type == I915_GTT_VIEW_REMAPPED) {
+		const struct intel_remapped_info *remap_info = &view->remapped;
+		u32 i, dpt_ofs = 0;
+
+		for (i = 0; i < ARRAY_SIZE(remap_info->plane); i++)
+			write_dpt_remapped(bo, &dpt->vmap, &dpt_ofs,
+					   remap_info->plane[i].offset,
+					   remap_info->plane[i].width,
+					   remap_info->plane[i].height,
+					   remap_info->plane[i].src_stride,
+					   remap_info->plane[i].dst_stride);
+
+	} else {
+		const struct intel_rotation_info *rot_info = &view->rotated;
+		u32 i, dpt_ofs = 0;
+
+		for (i = 0; i < ARRAY_SIZE(rot_info->plane); i++)
+			write_dpt_rotated(bo, &dpt->vmap, &dpt_ofs,
+					  rot_info->plane[i].offset,
+					  rot_info->plane[i].width,
+					  rot_info->plane[i].height,
+					  rot_info->plane[i].src_stride,
+					  rot_info->plane[i].dst_stride);
+	}
+
+	vma->dpt = dpt;
+	vma->node = dpt->ggtt_node;
+	return 0;
+}
+
+static void
+write_ggtt_rotated(struct xe_bo *bo, struct xe_ggtt *ggtt, u32 *ggtt_ofs, u32 bo_ofs,
+		   u32 width, u32 height, u32 src_stride, u32 dst_stride)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+	u32 column, row;
+
+	for (column = 0; column < width; column++) {
+		u32 src_idx = src_stride * (height - 1) + column + bo_ofs;
+
+		for (row = 0; row < height; row++) {
+			u64 pte = ggtt->pt_ops->pte_encode_bo(bo, src_idx * XE_PAGE_SIZE,
+							      xe->pat.idx[XE_CACHE_WB]);
+
+			xe_ggtt_set_pte(ggtt, *ggtt_ofs, pte);
+			*ggtt_ofs += XE_PAGE_SIZE;
+			src_idx -= src_stride;
+		}
+
+		/* The DE ignores the PTEs for the padding tiles */
+		*ggtt_ofs += (dst_stride - height) * XE_PAGE_SIZE;
+	}
+}
+
+static int __xe_pin_fb_vma_ggtt(struct intel_framebuffer *fb,
+				const struct i915_gtt_view *view,
+				struct i915_vma *vma)
+{
+	struct xe_bo *bo = intel_fb_obj(&fb->base);
+	struct xe_device *xe = to_xe_device(fb->base.dev);
+	struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt;
+	u32 align;
+	int ret;
+
+	/* TODO: Consider sharing framebuffer mapping?
+	 * embed i915_vma inside intel_framebuffer
+	 */
+	xe_device_mem_access_get(tile_to_xe(ggtt->tile));
+	ret = mutex_lock_interruptible(&ggtt->lock);
+	if (ret)
+		goto out;
+
+	align = XE_PAGE_SIZE;
+	if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K)
+		align = max_t(u32, align, SZ_64K);
+
+	if (bo->ggtt_node.size && view->type == I915_GTT_VIEW_NORMAL) {
+		vma->node = bo->ggtt_node;
+	} else if (view->type == I915_GTT_VIEW_NORMAL) {
+		u32 x, size = bo->ttm.base.size;
+
+		ret = xe_ggtt_insert_special_node_locked(ggtt, &vma->node, size,
+							 align, 0);
+		if (ret)
+			goto out_unlock;
+
+		for (x = 0; x < size; x += XE_PAGE_SIZE) {
+			u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x,
+							      xe->pat.idx[XE_CACHE_WB]);
+
+			xe_ggtt_set_pte(ggtt, vma->node.start + x, pte);
+		}
+	} else {
+		u32 i, ggtt_ofs;
+		const struct intel_rotation_info *rot_info = &view->rotated;
+
+		/* display seems to use tiles instead of bytes here, so convert it back.. */
+		u32 size = intel_rotation_info_size(rot_info) * XE_PAGE_SIZE;
+
+		ret = xe_ggtt_insert_special_node_locked(ggtt, &vma->node, size,
+							 align, 0);
+		if (ret)
+			goto out_unlock;
+
+		ggtt_ofs = vma->node.start;
+
+		for (i = 0; i < ARRAY_SIZE(rot_info->plane); i++)
+			write_ggtt_rotated(bo, ggtt, &ggtt_ofs,
+					   rot_info->plane[i].offset,
+					   rot_info->plane[i].width,
+					   rot_info->plane[i].height,
+					   rot_info->plane[i].src_stride,
+					   rot_info->plane[i].dst_stride);
+	}
+
+	xe_ggtt_invalidate(ggtt);
+out_unlock:
+	mutex_unlock(&ggtt->lock);
+out:
+	xe_device_mem_access_put(tile_to_xe(ggtt->tile));
+	return ret;
+}
+
+static struct i915_vma *__xe_pin_fb_vma(struct intel_framebuffer *fb,
+					const struct i915_gtt_view *view)
+{
+	struct drm_device *dev = fb->base.dev;
+	struct xe_device *xe = to_xe_device(dev);
+	struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL);
+	struct xe_bo *bo = intel_fb_obj(&fb->base);
+	int ret;
+
+	if (!vma)
+		return ERR_PTR(-ENODEV);
+
+	if (IS_DGFX(to_xe_device(bo->ttm.base.dev)) &&
+	    intel_fb_rc_ccs_cc_plane(&fb->base) >= 0 &&
+	    !(bo->flags & XE_BO_NEEDS_CPU_ACCESS)) {
+		struct xe_tile *tile = xe_device_get_root_tile(xe);
+
+		/*
+		 * If we need to able to access the clear-color value stored in
+		 * the buffer, then we require that such buffers are also CPU
+		 * accessible.  This is important on small-bar systems where
+		 * only some subset of VRAM is CPU accessible.
+		 */
+		if (tile->mem.vram.io_size < tile->mem.vram.usable_size) {
+			ret = -EINVAL;
+			goto err;
+		}
+	}
+
+	/*
+	 * Pin the framebuffer, we can't use xe_bo_(un)pin functions as the
+	 * assumptions are incorrect for framebuffers
+	 */
+	ret = ttm_bo_reserve(&bo->ttm, false, false, NULL);
+	if (ret)
+		goto err;
+
+	if (IS_DGFX(xe))
+		ret = xe_bo_migrate(bo, XE_PL_VRAM0);
+	else
+		ret = xe_bo_validate(bo, NULL, true);
+	if (!ret)
+		ttm_bo_pin(&bo->ttm);
+	ttm_bo_unreserve(&bo->ttm);
+	if (ret)
+		goto err;
+
+	vma->bo = bo;
+	if (intel_fb_uses_dpt(&fb->base))
+		ret = __xe_pin_fb_vma_dpt(fb, view, vma);
+	else
+		ret = __xe_pin_fb_vma_ggtt(fb, view, vma);
+	if (ret)
+		goto err_unpin;
+
+	return vma;
+
+err_unpin:
+	ttm_bo_reserve(&bo->ttm, false, false, NULL);
+	ttm_bo_unpin(&bo->ttm);
+	ttm_bo_unreserve(&bo->ttm);
+err:
+	kfree(vma);
+	return ERR_PTR(ret);
+}
+
+static void __xe_unpin_fb_vma(struct i915_vma *vma)
+{
+	struct xe_device *xe = to_xe_device(vma->bo->ttm.base.dev);
+	struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt;
+
+	if (vma->dpt)
+		xe_bo_unpin_map_no_vm(vma->dpt);
+	else if (!drm_mm_node_allocated(&vma->bo->ggtt_node) ||
+		 vma->bo->ggtt_node.start != vma->node.start)
+		xe_ggtt_remove_node(ggtt, &vma->node);
+
+	ttm_bo_reserve(&vma->bo->ttm, false, false, NULL);
+	ttm_bo_unpin(&vma->bo->ttm);
+	ttm_bo_unreserve(&vma->bo->ttm);
+	kfree(vma);
+}
+
+struct i915_vma *
+intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
+			   bool phys_cursor,
+			   const struct i915_gtt_view *view,
+			   bool uses_fence,
+			   unsigned long *out_flags)
+{
+	*out_flags = 0;
+
+	return __xe_pin_fb_vma(to_intel_framebuffer(fb), view);
+}
+
+void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags)
+{
+	__xe_unpin_fb_vma(vma);
+}
+
+int intel_plane_pin_fb(struct intel_plane_state *plane_state)
+{
+	struct drm_framebuffer *fb = plane_state->hw.fb;
+	struct xe_bo *bo = intel_fb_obj(fb);
+	struct i915_vma *vma;
+
+	/* We reject creating !SCANOUT fb's, so this is weird.. */
+	drm_WARN_ON(bo->ttm.base.dev, !(bo->flags & XE_BO_SCANOUT_BIT));
+
+	vma = __xe_pin_fb_vma(to_intel_framebuffer(fb), &plane_state->view.gtt);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	plane_state->ggtt_vma = vma;
+	return 0;
+}
+
+void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state)
+{
+	__xe_unpin_fb_vma(old_plane_state->ggtt_vma);
+	old_plane_state->ggtt_vma = NULL;
+}
+
+/*
+ * For Xe introduce dummy intel_dpt_create which just return NULL and
+ * intel_dpt_destroy which does nothing.
+ */
+struct i915_address_space *intel_dpt_create(struct intel_framebuffer *fb)
+{
+	return NULL;
+}
+
+void intel_dpt_destroy(struct i915_address_space *vm)
+{
+	return;
+}
+\ No newline at end of file
diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
new file mode 100644
index 000000000000..0f11a39333e2
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023, Intel Corporation.
+ */
+
+#include "i915_drv.h"
+#include "intel_hdcp_gsc.h"
+
+bool intel_hdcp_gsc_cs_required(struct drm_i915_private *i915)
+{
+	return true;
+}
+
+bool intel_hdcp_gsc_check_status(struct drm_i915_private *i915)
+{
+	return false;
+}
+
+int intel_hdcp_gsc_init(struct drm_i915_private *i915)
+{
+	drm_info(&i915->drm, "HDCP support not yet implemented\n");
+	return -ENODEV;
+}
+
+void intel_hdcp_gsc_fini(struct drm_i915_private *i915)
+{
+}
+
+ssize_t intel_hdcp_gsc_msg_send(struct drm_i915_private *i915, u8 *msg_in,
+				size_t msg_in_len, u8 *msg_out,
+				size_t msg_out_len)
+{
+	return -ENODEV;
+}
diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c
new file mode 100644
index 000000000000..ccf83c12b545
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c
@@ -0,0 +1,291 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+/* for ioread64 */
+#include <linux/io-64-nonatomic-lo-hi.h>
+
+#include "xe_ggtt.h"
+
+#include "i915_drv.h"
+#include "intel_atomic_plane.h"
+#include "intel_display.h"
+#include "intel_display_types.h"
+#include "intel_fb.h"
+#include "intel_fb_pin.h"
+#include "intel_frontbuffer.h"
+#include "intel_plane_initial.h"
+
+static bool
+intel_reuse_initial_plane_obj(struct drm_i915_private *i915,
+			      const struct intel_initial_plane_config *plane_config,
+			      struct drm_framebuffer **fb)
+{
+	struct intel_crtc *crtc;
+
+	for_each_intel_crtc(&i915->drm, crtc) {
+		struct intel_crtc_state *crtc_state =
+			to_intel_crtc_state(crtc->base.state);
+		struct intel_plane *plane =
+			to_intel_plane(crtc->base.primary);
+		struct intel_plane_state *plane_state =
+			to_intel_plane_state(plane->base.state);
+
+		if (!crtc_state->uapi.active)
+			continue;
+
+		if (!plane_state->ggtt_vma)
+			continue;
+
+		if (intel_plane_ggtt_offset(plane_state) == plane_config->base) {
+			*fb = plane_state->hw.fb;
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static struct xe_bo *
+initial_plane_bo(struct xe_device *xe,
+		 struct intel_initial_plane_config *plane_config)
+{
+	struct xe_tile *tile0 = xe_device_get_root_tile(xe);
+	struct xe_bo *bo;
+	resource_size_t phys_base;
+	u32 base, size, flags;
+	u64 page_size = xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K;
+
+	if (plane_config->size == 0)
+		return NULL;
+
+	flags = XE_BO_CREATE_PINNED_BIT | XE_BO_SCANOUT_BIT | XE_BO_CREATE_GGTT_BIT;
+
+	base = round_down(plane_config->base, page_size);
+	if (IS_DGFX(xe)) {
+		u64 __iomem *gte = tile0->mem.ggtt->gsm;
+		u64 pte;
+
+		gte += base / XE_PAGE_SIZE;
+
+		pte = ioread64(gte);
+		if (!(pte & XE_GGTT_PTE_DM)) {
+			drm_err(&xe->drm,
+				"Initial plane programming missing DM bit\n");
+			return NULL;
+		}
+
+		phys_base = pte & ~(page_size - 1);
+		flags |= XE_BO_CREATE_VRAM0_BIT;
+
+		/*
+		 * We don't currently expect this to ever be placed in the
+		 * stolen portion.
+		 */
+		if (phys_base >= tile0->mem.vram.usable_size) {
+			drm_err(&xe->drm,
+				"Initial plane programming using invalid range, phys_base=%pa\n",
+				&phys_base);
+			return NULL;
+		}
+
+		drm_dbg(&xe->drm,
+			"Using phys_base=%pa, based on initial plane programming\n",
+			&phys_base);
+	} else {
+		struct ttm_resource_manager *stolen = ttm_manager_type(&xe->ttm, XE_PL_STOLEN);
+
+		if (!stolen)
+			return NULL;
+		phys_base = base;
+		flags |= XE_BO_CREATE_STOLEN_BIT;
+
+		/*
+		 * If the FB is too big, just don't use it since fbdev is not very
+		 * important and we should probably use that space with FBC or other
+		 * features.
+		 */
+		if (IS_ENABLED(CONFIG_FRAMEBUFFER_CONSOLE) &&
+		    plane_config->size * 2 >> PAGE_SHIFT >= stolen->size)
+			return NULL;
+	}
+
+	size = round_up(plane_config->base + plane_config->size,
+			page_size);
+	size -= base;
+
+	bo = xe_bo_create_pin_map_at(xe, tile0, NULL, size, phys_base,
+				     ttm_bo_type_kernel, flags);
+	if (IS_ERR(bo)) {
+		drm_dbg(&xe->drm,
+			"Failed to create bo phys_base=%pa size %u with flags %x: %li\n",
+			&phys_base, size, flags, PTR_ERR(bo));
+		return NULL;
+	}
+
+	return bo;
+}
+
+static bool
+intel_alloc_initial_plane_obj(struct intel_crtc *crtc,
+			      struct intel_initial_plane_config *plane_config)
+{
+	struct drm_device *dev = crtc->base.dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_mode_fb_cmd2 mode_cmd = { 0 };
+	struct drm_framebuffer *fb = &plane_config->fb->base;
+	struct xe_bo *bo;
+
+	switch (fb->modifier) {
+	case DRM_FORMAT_MOD_LINEAR:
+	case I915_FORMAT_MOD_X_TILED:
+	case I915_FORMAT_MOD_Y_TILED:
+	case I915_FORMAT_MOD_4_TILED:
+		break;
+	default:
+		drm_dbg(&dev_priv->drm,
+			"Unsupported modifier for initial FB: 0x%llx\n",
+			fb->modifier);
+		return false;
+	}
+
+	mode_cmd.pixel_format = fb->format->format;
+	mode_cmd.width = fb->width;
+	mode_cmd.height = fb->height;
+	mode_cmd.pitches[0] = fb->pitches[0];
+	mode_cmd.modifier[0] = fb->modifier;
+	mode_cmd.flags = DRM_MODE_FB_MODIFIERS;
+
+	bo = initial_plane_bo(dev_priv, plane_config);
+	if (!bo)
+		return false;
+
+	if (intel_framebuffer_init(to_intel_framebuffer(fb),
+				   bo, &mode_cmd)) {
+		drm_dbg_kms(&dev_priv->drm, "intel fb init failed\n");
+		goto err_bo;
+	}
+	/* Reference handed over to fb */
+	xe_bo_put(bo);
+
+	return true;
+
+err_bo:
+	xe_bo_unpin_map_no_vm(bo);
+	return false;
+}
+
+static void
+intel_find_initial_plane_obj(struct intel_crtc *crtc,
+			     struct intel_initial_plane_config *plane_config)
+{
+	struct drm_device *dev = crtc->base.dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct intel_plane *plane =
+		to_intel_plane(crtc->base.primary);
+	struct intel_plane_state *plane_state =
+		to_intel_plane_state(plane->base.state);
+	struct intel_crtc_state *crtc_state =
+		to_intel_crtc_state(crtc->base.state);
+	struct drm_framebuffer *fb;
+	struct i915_vma *vma;
+
+	/*
+	 * TODO:
+	 *   Disable planes if get_initial_plane_config() failed.
+	 *   Make sure things work if the surface base is not page aligned.
+	 */
+	if (!plane_config->fb)
+		return;
+
+	if (intel_alloc_initial_plane_obj(crtc, plane_config))
+		fb = &plane_config->fb->base;
+	else if (!intel_reuse_initial_plane_obj(dev_priv, plane_config, &fb))
+		goto nofb;
+
+	plane_state->uapi.rotation = plane_config->rotation;
+	intel_fb_fill_view(to_intel_framebuffer(fb),
+			   plane_state->uapi.rotation, &plane_state->view);
+
+	vma = intel_pin_and_fence_fb_obj(fb, false, &plane_state->view.gtt,
+					 false, &plane_state->flags);
+	if (IS_ERR(vma))
+		goto nofb;
+
+	plane_state->ggtt_vma = vma;
+	plane_state->uapi.src_x = 0;
+	plane_state->uapi.src_y = 0;
+	plane_state->uapi.src_w = fb->width << 16;
+	plane_state->uapi.src_h = fb->height << 16;
+
+	plane_state->uapi.crtc_x = 0;
+	plane_state->uapi.crtc_y = 0;
+	plane_state->uapi.crtc_w = fb->width;
+	plane_state->uapi.crtc_h = fb->height;
+
+	plane_state->uapi.fb = fb;
+	drm_framebuffer_get(fb);
+
+	plane_state->uapi.crtc = &crtc->base;
+	intel_plane_copy_uapi_to_hw_state(plane_state, plane_state, crtc);
+
+	atomic_or(plane->frontbuffer_bit, &to_intel_frontbuffer(fb)->bits);
+
+	plane_config->vma = vma;
+
+	/*
+	 * Flip to the newly created mapping ASAP, so we can re-use the
+	 * first part of GGTT for WOPCM, prevent flickering, and prevent
+	 * the lookup of sysmem scratch pages.
+	 */
+	plane->check_plane(crtc_state, plane_state);
+	plane->async_flip(plane, crtc_state, plane_state, true);
+	return;
+
+nofb:
+	/*
+	 * We've failed to reconstruct the BIOS FB.  Current display state
+	 * indicates that the primary plane is visible, but has a NULL FB,
+	 * which will lead to problems later if we don't fix it up.  The
+	 * simplest solution is to just disable the primary plane now and
+	 * pretend the BIOS never had it enabled.
+	 */
+	intel_plane_disable_noatomic(crtc, plane);
+}
+
+static void plane_config_fini(struct intel_initial_plane_config *plane_config)
+{
+	if (plane_config->fb) {
+		struct drm_framebuffer *fb = &plane_config->fb->base;
+
+		/* We may only have the stub and not a full framebuffer */
+		if (drm_framebuffer_read_refcount(fb))
+			drm_framebuffer_put(fb);
+		else
+			kfree(fb);
+	}
+}
+
+void intel_crtc_initial_plane_config(struct intel_crtc *crtc)
+{
+	struct xe_device *xe = to_xe_device(crtc->base.dev);
+	struct intel_initial_plane_config plane_config = {};
+
+	/*
+	 * Note that reserving the BIOS fb up front prevents us
+	 * from stuffing other stolen allocations like the ring
+	 * on top.  This prevents some ugliness at boot time, and
+	 * can even allow for smooth boot transitions if the BIOS
+	 * fb is large enough for the active pipe configuration.
+	 */
+	xe->display.funcs.display->get_initial_plane_config(crtc, &plane_config);
+
+	/*
+	 * If the fb is shared between multiple heads, we'll
+	 * just get the first one.
+	 */
+	intel_find_initial_plane_obj(crtc, &plane_config);
+
+	plane_config_fini(&plane_config);
+}
diff --git a/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h b/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h
new file mode 100644
index 000000000000..8e6dd061f2ae
--- /dev/null
+++ b/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h
@@ -0,0 +1,160 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GFXPIPE_COMMANDS_H_
+#define _XE_GFXPIPE_COMMANDS_H_
+
+#include "instructions/xe_instr_defs.h"
+
+#define GFXPIPE_PIPELINE		REG_GENMASK(28, 27)
+#define   PIPELINE_COMMON		REG_FIELD_PREP(GFXPIPE_PIPELINE, 0x0)
+#define   PIPELINE_SINGLE_DW		REG_FIELD_PREP(GFXPIPE_PIPELINE, 0x1)
+#define   PIPELINE_COMPUTE		REG_FIELD_PREP(GFXPIPE_PIPELINE, 0x2)
+#define   PIPELINE_3D			REG_FIELD_PREP(GFXPIPE_PIPELINE, 0x3)
+
+#define GFXPIPE_OPCODE			REG_GENMASK(26, 24)
+#define GFXPIPE_SUBOPCODE		REG_GENMASK(23, 16)
+
+#define GFXPIPE_MATCH_MASK		(XE_INSTR_CMD_TYPE | \
+					 GFXPIPE_PIPELINE | \
+					 GFXPIPE_OPCODE | \
+					 GFXPIPE_SUBOPCODE)
+
+#define GFXPIPE_COMMON_CMD(opcode, subopcode) \
+	(XE_INSTR_GFXPIPE | PIPELINE_COMMON | \
+	 REG_FIELD_PREP(GFXPIPE_OPCODE, opcode) | \
+	 REG_FIELD_PREP(GFXPIPE_SUBOPCODE, subopcode))
+
+#define GFXPIPE_SINGLE_DW_CMD(opcode, subopcode) \
+	(XE_INSTR_GFXPIPE | PIPELINE_SINGLE_DW | \
+	 REG_FIELD_PREP(GFXPIPE_OPCODE, opcode) | \
+	 REG_FIELD_PREP(GFXPIPE_SUBOPCODE, subopcode))
+
+#define GFXPIPE_3D_CMD(opcode, subopcode) \
+	(XE_INSTR_GFXPIPE | PIPELINE_3D | \
+	 REG_FIELD_PREP(GFXPIPE_OPCODE, opcode) | \
+	 REG_FIELD_PREP(GFXPIPE_SUBOPCODE, subopcode))
+
+#define GFXPIPE_COMPUTE_CMD(opcode, subopcode) \
+	(XE_INSTR_GFXPIPE | PIPELINE_COMPUTE | \
+	 REG_FIELD_PREP(GFXPIPE_OPCODE, opcode) | \
+	 REG_FIELD_PREP(GFXPIPE_SUBOPCODE, subopcode))
+
+#define STATE_BASE_ADDRESS			GFXPIPE_COMMON_CMD(0x1, 0x1)
+#define STATE_SIP				GFXPIPE_COMMON_CMD(0x1, 0x2)
+#define GPGPU_CSR_BASE_ADDRESS			GFXPIPE_COMMON_CMD(0x1, 0x4)
+#define STATE_COMPUTE_MODE			GFXPIPE_COMMON_CMD(0x1, 0x5)
+#define CMD_3DSTATE_BTD				GFXPIPE_COMMON_CMD(0x1, 0x6)
+
+#define CMD_3DSTATE_VF_STATISTICS		GFXPIPE_SINGLE_DW_CMD(0x0, 0xB)
+
+#define PIPELINE_SELECT				GFXPIPE_SINGLE_DW_CMD(0x1, 0x4)
+
+#define CMD_3DSTATE_DRAWING_RECTANGLE_FAST	GFXPIPE_3D_CMD(0x0, 0x0)
+#define CMD_3DSTATE_CLEAR_PARAMS		GFXPIPE_3D_CMD(0x0, 0x4)
+#define CMD_3DSTATE_DEPTH_BUFFER		GFXPIPE_3D_CMD(0x0, 0x5)
+#define CMD_3DSTATE_STENCIL_BUFFER		GFXPIPE_3D_CMD(0x0, 0x6)
+#define CMD_3DSTATE_HIER_DEPTH_BUFFER		GFXPIPE_3D_CMD(0x0, 0x7)
+#define CMD_3DSTATE_VERTEX_BUFFERS		GFXPIPE_3D_CMD(0x0, 0x8)
+#define CMD_3DSTATE_VERTEX_ELEMENTS		GFXPIPE_3D_CMD(0x0, 0x9)
+#define CMD_3DSTATE_INDEX_BUFFER		GFXPIPE_3D_CMD(0x0, 0xA)
+#define CMD_3DSTATE_VF				GFXPIPE_3D_CMD(0x0, 0xC)
+#define CMD_3DSTATE_MULTISAMPLE			GFXPIPE_3D_CMD(0x0, 0xD)
+#define CMD_3DSTATE_CC_STATE_POINTERS		GFXPIPE_3D_CMD(0x0, 0xE)
+#define CMD_3DSTATE_SCISSOR_STATE_POINTERS	GFXPIPE_3D_CMD(0x0, 0xF)
+#define CMD_3DSTATE_VS				GFXPIPE_3D_CMD(0x0, 0x10)
+#define CMD_3DSTATE_GS				GFXPIPE_3D_CMD(0x0, 0x11)
+#define CMD_3DSTATE_CLIP			GFXPIPE_3D_CMD(0x0, 0x12)
+#define CMD_3DSTATE_SF				GFXPIPE_3D_CMD(0x0, 0x13)
+#define CMD_3DSTATE_WM				GFXPIPE_3D_CMD(0x0, 0x14)
+#define CMD_3DSTATE_CONSTANT_VS			GFXPIPE_3D_CMD(0x0, 0x15)
+#define CMD_3DSTATE_CONSTANT_GS			GFXPIPE_3D_CMD(0x0, 0x16)
+#define CMD_3DSTATE_SAMPLE_MASK			GFXPIPE_3D_CMD(0x0, 0x18)
+#define CMD_3DSTATE_CONSTANT_HS			GFXPIPE_3D_CMD(0x0, 0x19)
+#define CMD_3DSTATE_CONSTANT_DS			GFXPIPE_3D_CMD(0x0, 0x1A)
+#define CMD_3DSTATE_HS				GFXPIPE_3D_CMD(0x0, 0x1B)
+#define CMD_3DSTATE_TE				GFXPIPE_3D_CMD(0x0, 0x1C)
+#define CMD_3DSTATE_DS				GFXPIPE_3D_CMD(0x0, 0x1D)
+#define CMD_3DSTATE_STREAMOUT			GFXPIPE_3D_CMD(0x0, 0x1E)
+#define CMD_3DSTATE_SBE				GFXPIPE_3D_CMD(0x0, 0x1F)
+#define CMD_3DSTATE_PS				GFXPIPE_3D_CMD(0x0, 0x20)
+#define CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP	GFXPIPE_3D_CMD(0x0, 0x21)
+#define CMD_3DSTATE_CPS_POINTERS		GFXPIPE_3D_CMD(0x0, 0x22)
+#define CMD_3DSTATE_VIEWPORT_STATE_POINTERS_CC	GFXPIPE_3D_CMD(0x0, 0x23)
+#define CMD_3DSTATE_BLEND_STATE_POINTERS	GFXPIPE_3D_CMD(0x0, 0x24)
+#define CMD_3DSTATE_BINDING_TABLE_POINTERS_VS	GFXPIPE_3D_CMD(0x0, 0x26)
+#define CMD_3DSTATE_BINDING_TABLE_POINTERS_HS	GFXPIPE_3D_CMD(0x0, 0x27)
+#define CMD_3DSTATE_BINDING_TABLE_POINTERS_DS	GFXPIPE_3D_CMD(0x0, 0x28)
+#define CMD_3DSTATE_BINDING_TABLE_POINTERS_GS	GFXPIPE_3D_CMD(0x0, 0x29)
+#define CMD_3DSTATE_BINDING_TABLE_POINTERS_PS	GFXPIPE_3D_CMD(0x0, 0x2A)
+#define CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS	GFXPIPE_3D_CMD(0x0, 0x2B)
+#define CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS	GFXPIPE_3D_CMD(0x0, 0x2C)
+#define CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS	GFXPIPE_3D_CMD(0x0, 0x2D)
+#define CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS	GFXPIPE_3D_CMD(0x0, 0x2E)
+#define CMD_3DSTATE_SAMPLER_STATE_POINTERS_PS	GFXPIPE_3D_CMD(0x0, 0x2F)
+#define CMD_3DSTATE_VF_INSTANCING		GFXPIPE_3D_CMD(0x0, 0x49)
+#define CMD_3DSTATE_VF_SGVS			GFXPIPE_3D_CMD(0x0, 0x4A)
+#define CMD_3DSTATE_VF_TOPOLOGY			GFXPIPE_3D_CMD(0x0, 0x4B)
+#define CMD_3DSTATE_WM_CHROMAKEY		GFXPIPE_3D_CMD(0x0, 0x4C)
+#define CMD_3DSTATE_PS_BLEND			GFXPIPE_3D_CMD(0x0, 0x4D)
+#define CMD_3DSTATE_WM_DEPTH_STENCIL		GFXPIPE_3D_CMD(0x0, 0x4E)
+#define CMD_3DSTATE_PS_EXTRA			GFXPIPE_3D_CMD(0x0, 0x4F)
+#define CMD_3DSTATE_RASTER			GFXPIPE_3D_CMD(0x0, 0x50)
+#define CMD_3DSTATE_SBE_SWIZ			GFXPIPE_3D_CMD(0x0, 0x51)
+#define CMD_3DSTATE_WM_HZ_OP			GFXPIPE_3D_CMD(0x0, 0x52)
+#define CMD_3DSTATE_VF_COMPONENT_PACKING	GFXPIPE_3D_CMD(0x0, 0x55)
+#define CMD_3DSTATE_VF_SGVS_2			GFXPIPE_3D_CMD(0x0, 0x56)
+#define CMD_3DSTATE_VFG				GFXPIPE_3D_CMD(0x0, 0x57)
+#define CMD_3DSTATE_URB_ALLOC_VS		GFXPIPE_3D_CMD(0x0, 0x58)
+#define CMD_3DSTATE_URB_ALLOC_HS		GFXPIPE_3D_CMD(0x0, 0x59)
+#define CMD_3DSTATE_URB_ALLOC_DS		GFXPIPE_3D_CMD(0x0, 0x5A)
+#define CMD_3DSTATE_URB_ALLOC_GS		GFXPIPE_3D_CMD(0x0, 0x5B)
+#define CMD_3DSTATE_SO_BUFFER_INDEX_0		GFXPIPE_3D_CMD(0x0, 0x60)
+#define CMD_3DSTATE_SO_BUFFER_INDEX_1		GFXPIPE_3D_CMD(0x0, 0x61)
+#define CMD_3DSTATE_SO_BUFFER_INDEX_2		GFXPIPE_3D_CMD(0x0, 0x62)
+#define CMD_3DSTATE_SO_BUFFER_INDEX_3		GFXPIPE_3D_CMD(0x0, 0x63)
+#define CMD_3DSTATE_PRIMITIVE_REPLICATION	GFXPIPE_3D_CMD(0x0, 0x6C)
+#define CMD_3DSTATE_TBIMR_TILE_PASS_INFO	GFXPIPE_3D_CMD(0x0, 0x6E)
+#define CMD_3DSTATE_AMFS			GFXPIPE_3D_CMD(0x0, 0x6F)
+#define CMD_3DSTATE_DEPTH_BOUNDS		GFXPIPE_3D_CMD(0x0, 0x71)
+#define CMD_3DSTATE_AMFS_TEXTURE_POINTERS	GFXPIPE_3D_CMD(0x0, 0x72)
+#define CMD_3DSTATE_CONSTANT_TS_POINTER		GFXPIPE_3D_CMD(0x0, 0x73)
+#define CMD_3DSTATE_MESH_CONTROL		GFXPIPE_3D_CMD(0x0, 0x77)
+#define CMD_3DSTATE_MESH_DISTRIB		GFXPIPE_3D_CMD(0x0, 0x78)
+#define CMD_3DSTATE_TASK_REDISTRIB		GFXPIPE_3D_CMD(0x0, 0x79)
+#define CMD_3DSTATE_MESH_SHADER			GFXPIPE_3D_CMD(0x0, 0x7A)
+#define CMD_3DSTATE_MESH_SHADER_DATA		GFXPIPE_3D_CMD(0x0, 0x7B)
+#define CMD_3DSTATE_TASK_CONTROL		GFXPIPE_3D_CMD(0x0, 0x7C)
+#define CMD_3DSTATE_TASK_SHADER			GFXPIPE_3D_CMD(0x0, 0x7D)
+#define CMD_3DSTATE_TASK_SHADER_DATA		GFXPIPE_3D_CMD(0x0, 0x7E)
+#define CMD_3DSTATE_URB_ALLOC_MESH		GFXPIPE_3D_CMD(0x0, 0x7F)
+#define CMD_3DSTATE_URB_ALLOC_TASK		GFXPIPE_3D_CMD(0x0, 0x80)
+#define CMD_3DSTATE_CLIP_MESH			GFXPIPE_3D_CMD(0x0, 0x81)
+#define CMD_3DSTATE_SBE_MESH			GFXPIPE_3D_CMD(0x0, 0x82)
+#define CMD_3DSTATE_CPSIZE_CONTROL_BUFFER	GFXPIPE_3D_CMD(0x0, 0x83)
+
+#define CMD_3DSTATE_DRAWING_RECTANGLE		GFXPIPE_3D_CMD(0x1, 0x0)
+#define CMD_3DSTATE_CHROMA_KEY			GFXPIPE_3D_CMD(0x1, 0x4)
+#define CMD_3DSTATE_POLY_STIPPLE_OFFSET		GFXPIPE_3D_CMD(0x1, 0x6)
+#define CMD_3DSTATE_POLY_STIPPLE_PATTERN	GFXPIPE_3D_CMD(0x1, 0x7)
+#define CMD_3DSTATE_LINE_STIPPLE		GFXPIPE_3D_CMD(0x1, 0x8)
+#define CMD_3DSTATE_AA_LINE_PARAMETERS		GFXPIPE_3D_CMD(0x1, 0xA)
+#define CMD_3DSTATE_MONOFILTER_SIZE		GFXPIPE_3D_CMD(0x1, 0x11)
+#define CMD_3DSTATE_PUSH_CONSTANT_ALLOC_VS	GFXPIPE_3D_CMD(0x1, 0x12)
+#define CMD_3DSTATE_PUSH_CONSTANT_ALLOC_HS	GFXPIPE_3D_CMD(0x1, 0x13)
+#define CMD_3DSTATE_PUSH_CONSTANT_ALLOC_DS	GFXPIPE_3D_CMD(0x1, 0x14)
+#define CMD_3DSTATE_PUSH_CONSTANT_ALLOC_GS	GFXPIPE_3D_CMD(0x1, 0x15)
+#define CMD_3DSTATE_PUSH_CONSTANT_ALLOC_PS	GFXPIPE_3D_CMD(0x1, 0x16)
+#define CMD_3DSTATE_SO_DECL_LIST		GFXPIPE_3D_CMD(0x1, 0x17)
+#define   CMD_3DSTATE_SO_DECL_LIST_DW_LEN	REG_GENMASK(8, 0)
+#define CMD_3DSTATE_SO_BUFFER			GFXPIPE_3D_CMD(0x1, 0x18)
+#define CMD_3DSTATE_BINDING_TABLE_POOL_ALLOC	GFXPIPE_3D_CMD(0x1, 0x19)
+#define CMD_3DSTATE_SAMPLE_PATTERN		GFXPIPE_3D_CMD(0x1, 0x1C)
+#define CMD_3DSTATE_3D_MODE			GFXPIPE_3D_CMD(0x1, 0x1E)
+#define CMD_3DSTATE_SUBSLICE_HASH_TABLE		GFXPIPE_3D_CMD(0x1, 0x1F)
+#define CMD_3DSTATE_SLICE_TABLE_STATE_POINTERS	GFXPIPE_3D_CMD(0x1, 0x20)
+#define CMD_3DSTATE_PTBR_TILE_PASS_INFO		GFXPIPE_3D_CMD(0x1, 0x22)
+
+#endif
diff --git a/drivers/gpu/drm/xe/instructions/xe_gsc_commands.h b/drivers/gpu/drm/xe/instructions/xe_gsc_commands.h
new file mode 100644
index 000000000000..f8949cad9d0f
--- /dev/null
+++ b/drivers/gpu/drm/xe/instructions/xe_gsc_commands.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GSC_COMMANDS_H_
+#define _XE_GSC_COMMANDS_H_
+
+#include "instructions/xe_instr_defs.h"
+
+/*
+ * All GSCCS-specific commands have fixed length, so we can include it in the
+ * defines. Note that the generic GSC command header structure includes an
+ * optional data field in bits 9-21, but there are no commands that actually use
+ * it; some of the commands are instead defined as having an extended length
+ * field spanning bits 0-15, even if the extra bits are not required because the
+ * longest GSCCS command is only 8 dwords. To handle this, the defines below use
+ * a single field for both data and len. If we ever get a commands that does
+ * actually have data and this approach doesn't work for it we can re-work it
+ * at that point.
+ */
+
+#define GSC_OPCODE		REG_GENMASK(28, 22)
+#define GSC_CMD_DATA_AND_LEN	REG_GENMASK(21, 0)
+
+#define __GSC_INSTR(op, dl) \
+	(XE_INSTR_GSC | \
+	REG_FIELD_PREP(GSC_OPCODE, op) | \
+	REG_FIELD_PREP(GSC_CMD_DATA_AND_LEN, dl))
+
+#define GSC_HECI_CMD_PKT __GSC_INSTR(0, 6)
+
+#define GSC_FW_LOAD __GSC_INSTR(1, 2)
+#define   GSC_FW_LOAD_LIMIT_VALID REG_BIT(31)
+
+#endif
diff --git a/drivers/gpu/drm/xe/instructions/xe_instr_defs.h b/drivers/gpu/drm/xe/instructions/xe_instr_defs.h
new file mode 100644
index 000000000000..04179b2a48e1
--- /dev/null
+++ b/drivers/gpu/drm/xe/instructions/xe_instr_defs.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_INSTR_DEFS_H_
+#define _XE_INSTR_DEFS_H_
+
+#include "regs/xe_reg_defs.h"
+
+/*
+ * The first dword of any GPU instruction is the "instruction header."  Bits
+ * 31:29 identify the general type of the command and determine how exact
+ * opcodes and sub-opcodes will be encoded in the remaining bits.
+ */
+#define XE_INSTR_CMD_TYPE		GENMASK(31, 29)
+#define   XE_INSTR_MI			REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x0)
+#define   XE_INSTR_GSC			REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x2)
+#define   XE_INSTR_GFXPIPE		REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x3)
+
+/*
+ * Most (but not all) instructions have a "length" field in the instruction
+ * header.  The value expected is the total number of dwords for the
+ * instruction, minus two.
+ *
+ * Some instructions have length fields longer or shorter than 8 bits, but
+ * those are rare.  This definition can be used for the common case where
+ * the length field is from 7:0.
+ */
+#define XE_INSTR_LEN_MASK		GENMASK(7, 0)
+#define XE_INSTR_NUM_DW(x)		REG_FIELD_PREP(XE_INSTR_LEN_MASK, (x) - 2)
+
+#endif
diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
new file mode 100644
index 000000000000..1cfa96167fde
--- /dev/null
+++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_MI_COMMANDS_H_
+#define _XE_MI_COMMANDS_H_
+
+#include "instructions/xe_instr_defs.h"
+
+/*
+ * MI (Memory Interface) commands are supported by all GT engines.  They
+ * provide general memory operations and command streamer control.  MI commands
+ * have a command type of 0x0 (MI_COMMAND) in bits 31:29 of the instruction
+ * header dword and a specific MI opcode in bits 28:23.
+ */
+
+#define MI_OPCODE			REG_GENMASK(28, 23)
+#define MI_SUBOPCODE			REG_GENMASK(22, 17)  /* used with MI_EXPANSION */
+
+#define __MI_INSTR(opcode) \
+	(XE_INSTR_MI | REG_FIELD_PREP(MI_OPCODE, opcode))
+
+#define MI_NOOP				__MI_INSTR(0x0)
+#define MI_USER_INTERRUPT		__MI_INSTR(0x2)
+#define MI_ARB_CHECK			__MI_INSTR(0x5)
+
+#define MI_ARB_ON_OFF			__MI_INSTR(0x8)
+#define   MI_ARB_ENABLE			REG_BIT(0)
+#define   MI_ARB_DISABLE		0x0
+
+#define MI_BATCH_BUFFER_END		__MI_INSTR(0xA)
+#define MI_TOPOLOGY_FILTER		__MI_INSTR(0xD)
+#define MI_FORCE_WAKEUP			__MI_INSTR(0x1D)
+
+#define MI_STORE_DATA_IMM		__MI_INSTR(0x20)
+#define   MI_SDI_GGTT			REG_BIT(22)
+#define   MI_SDI_LEN_DW			GENMASK(9, 0)
+#define   MI_SDI_NUM_DW(x)		REG_FIELD_PREP(MI_SDI_LEN_DW, (x) + 3 - 2)
+#define   MI_SDI_NUM_QW(x)		(REG_FIELD_PREP(MI_SDI_LEN_DW, 2 * (x) + 3 - 2) | \
+					 REG_BIT(21))
+
+#define MI_LOAD_REGISTER_IMM		__MI_INSTR(0x22)
+#define   MI_LRI_LRM_CS_MMIO		REG_BIT(19)
+#define   MI_LRI_MMIO_REMAP_EN		REG_BIT(17)
+#define   MI_LRI_NUM_REGS(x)		XE_INSTR_NUM_DW(2 * (x) + 1)
+#define   MI_LRI_FORCE_POSTED		REG_BIT(12)
+
+#define MI_FLUSH_DW			__MI_INSTR(0x26)
+#define   MI_FLUSH_DW_STORE_INDEX	REG_BIT(21)
+#define   MI_INVALIDATE_TLB		REG_BIT(18)
+#define   MI_FLUSH_DW_CCS		REG_BIT(16)
+#define   MI_FLUSH_DW_OP_STOREDW	REG_BIT(14)
+#define   MI_FLUSH_DW_LEN_DW		REG_GENMASK(5, 0)
+#define   MI_FLUSH_IMM_DW		REG_FIELD_PREP(MI_FLUSH_DW_LEN_DW, 4 - 2)
+#define   MI_FLUSH_IMM_QW		REG_FIELD_PREP(MI_FLUSH_DW_LEN_DW, 5 - 2)
+#define   MI_FLUSH_DW_USE_GTT		REG_BIT(2)
+
+#define MI_BATCH_BUFFER_START		__MI_INSTR(0x31)
+
+#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
new file mode 100644
index 000000000000..5592774fc690
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -0,0 +1,184 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_ENGINE_REGS_H_
+#define _XE_ENGINE_REGS_H_
+
+#include <asm/page.h>
+
+#include "regs/xe_reg_defs.h"
+
+/*
+ * These *_BASE values represent the MMIO offset where each hardware engine's
+ * registers start.  The other definitions in this header are parameterized
+ * macros that will take one of these values as a parameter.
+ */
+#define RENDER_RING_BASE			0x02000
+#define BSD_RING_BASE				0x1c0000
+#define BSD2_RING_BASE				0x1c4000
+#define BSD3_RING_BASE				0x1d0000
+#define BSD4_RING_BASE				0x1d4000
+#define XEHP_BSD5_RING_BASE			0x1e0000
+#define XEHP_BSD6_RING_BASE			0x1e4000
+#define XEHP_BSD7_RING_BASE			0x1f0000
+#define XEHP_BSD8_RING_BASE			0x1f4000
+#define VEBOX_RING_BASE				0x1c8000
+#define VEBOX2_RING_BASE			0x1d8000
+#define XEHP_VEBOX3_RING_BASE			0x1e8000
+#define XEHP_VEBOX4_RING_BASE			0x1f8000
+#define COMPUTE0_RING_BASE			0x1a000
+#define COMPUTE1_RING_BASE			0x1c000
+#define COMPUTE2_RING_BASE			0x1e000
+#define COMPUTE3_RING_BASE			0x26000
+#define BLT_RING_BASE				0x22000
+#define XEHPC_BCS1_RING_BASE			0x3e0000
+#define XEHPC_BCS2_RING_BASE			0x3e2000
+#define XEHPC_BCS3_RING_BASE			0x3e4000
+#define XEHPC_BCS4_RING_BASE			0x3e6000
+#define XEHPC_BCS5_RING_BASE			0x3e8000
+#define XEHPC_BCS6_RING_BASE			0x3ea000
+#define XEHPC_BCS7_RING_BASE			0x3ec000
+#define XEHPC_BCS8_RING_BASE			0x3ee000
+#define GSCCS_RING_BASE				0x11a000
+
+#define RING_TAIL(base)				XE_REG((base) + 0x30)
+
+#define RING_HEAD(base)				XE_REG((base) + 0x34)
+#define   HEAD_ADDR				0x001FFFFC
+
+#define RING_START(base)			XE_REG((base) + 0x38)
+
+#define RING_CTL(base)				XE_REG((base) + 0x3c)
+#define   RING_CTL_SIZE(size)			((size) - PAGE_SIZE) /* in bytes -> pages */
+#define   RING_CTL_SIZE(size)			((size) - PAGE_SIZE) /* in bytes -> pages */
+
+#define RING_PSMI_CTL(base)			XE_REG((base) + 0x50, XE_REG_OPTION_MASKED)
+#define   RC_SEMA_IDLE_MSG_DISABLE		REG_BIT(12)
+#define   WAIT_FOR_EVENT_POWER_DOWN_DISABLE	REG_BIT(7)
+#define   IDLE_MSG_DISABLE			REG_BIT(0)
+
+#define RING_PWRCTX_MAXCNT(base)		XE_REG((base) + 0x54)
+#define   IDLE_WAIT_TIME			REG_GENMASK(19, 0)
+
+#define RING_ACTHD_UDW(base)			XE_REG((base) + 0x5c)
+#define RING_DMA_FADD_UDW(base)			XE_REG((base) + 0x60)
+#define RING_IPEHR(base)			XE_REG((base) + 0x68)
+#define RING_ACTHD(base)			XE_REG((base) + 0x74)
+#define RING_DMA_FADD(base)			XE_REG((base) + 0x78)
+#define RING_HWS_PGA(base)			XE_REG((base) + 0x80)
+#define RING_HWSTAM(base)			XE_REG((base) + 0x98)
+#define RING_MI_MODE(base)			XE_REG((base) + 0x9c)
+#define RING_NOPID(base)			XE_REG((base) + 0x94)
+
+#define FF_THREAD_MODE(base)			XE_REG((base) + 0xa0)
+#define   FF_TESSELATION_DOP_GATE_DISABLE	BIT(19)
+
+#define RING_IMR(base)				XE_REG((base) + 0xa8)
+
+#define RING_EIR(base)				XE_REG((base) + 0xb0)
+#define RING_EMR(base)				XE_REG((base) + 0xb4)
+#define RING_ESR(base)				XE_REG((base) + 0xb8)
+
+#define RING_CMD_CCTL(base)			XE_REG((base) + 0xc4, XE_REG_OPTION_MASKED)
+/*
+ * CMD_CCTL read/write fields take a MOCS value and _not_ a table index.
+ * The lsb of each can be considered a separate enabling bit for encryption.
+ * 6:0 == default MOCS value for reads  =>  6:1 == table index for reads.
+ * 13:7 == default MOCS value for writes => 13:8 == table index for writes.
+ * 15:14 == Reserved => 31:30 are set to 0.
+ */
+#define   CMD_CCTL_WRITE_OVERRIDE_MASK		REG_GENMASK(13, 8)
+#define   CMD_CCTL_READ_OVERRIDE_MASK		REG_GENMASK(6, 1)
+
+#define CSFE_CHICKEN1(base)			XE_REG((base) + 0xd4, XE_REG_OPTION_MASKED)
+#define   GHWSP_CSB_REPORT_DIS			REG_BIT(15)
+#define   PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS	REG_BIT(14)
+
+#define FF_SLICE_CS_CHICKEN1(base)		XE_REG((base) + 0xe0, XE_REG_OPTION_MASKED)
+#define   FFSC_PERCTX_PREEMPT_CTRL		REG_BIT(14)
+
+#define FF_SLICE_CS_CHICKEN2(base)		XE_REG((base) + 0xe4, XE_REG_OPTION_MASKED)
+#define   PERF_FIX_BALANCING_CFE_DISABLE	REG_BIT(15)
+
+#define CS_DEBUG_MODE1(base)			XE_REG((base) + 0xec, XE_REG_OPTION_MASKED)
+#define   FF_DOP_CLOCK_GATE_DISABLE		REG_BIT(1)
+#define   REPLAY_MODE_GRANULARITY		REG_BIT(0)
+
+#define RING_BBADDR(base)			XE_REG((base) + 0x140)
+#define RING_BBADDR_UDW(base)			XE_REG((base) + 0x168)
+
+#define BCS_SWCTRL(base)			XE_REG((base) + 0x200, XE_REG_OPTION_MASKED)
+#define   BCS_SWCTRL_DISABLE_256B		REG_BIT(2)
+
+/* Handling MOCS value in BLIT_CCTL like it was done CMD_CCTL */
+#define BLIT_CCTL(base)				XE_REG((base) + 0x204)
+#define   BLIT_CCTL_DST_MOCS_MASK		REG_GENMASK(14, 9)
+#define   BLIT_CCTL_SRC_MOCS_MASK		REG_GENMASK(6, 1)
+
+#define RING_EXECLIST_STATUS_LO(base)		XE_REG((base) + 0x234)
+#define RING_EXECLIST_STATUS_HI(base)		XE_REG((base) + 0x234 + 4)
+
+#define RING_CONTEXT_CONTROL(base)		XE_REG((base) + 0x244)
+#define	  CTX_CTRL_INHIBIT_SYN_CTX_SWITCH	REG_BIT(3)
+#define	  CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT	REG_BIT(0)
+
+#define RING_MODE(base)				XE_REG((base) + 0x29c)
+#define   GFX_DISABLE_LEGACY_MODE		REG_BIT(3)
+
+#define RING_TIMESTAMP(base)			XE_REG((base) + 0x358)
+
+#define RING_TIMESTAMP_UDW(base)		XE_REG((base) + 0x358 + 4)
+#define   RING_VALID_MASK			0x00000001
+#define   RING_VALID				0x00000001
+#define   STOP_RING				REG_BIT(8)
+#define   TAIL_ADDR				0x001FFFF8
+
+#define RING_CTX_TIMESTAMP(base)		XE_REG((base) + 0x3a8)
+
+#define RING_FORCE_TO_NONPRIV(base, i)		XE_REG(((base) + 0x4d0) + (i) * 4)
+#define   RING_FORCE_TO_NONPRIV_DENY		REG_BIT(30)
+#define   RING_FORCE_TO_NONPRIV_ACCESS_MASK	REG_GENMASK(29, 28)
+#define   RING_FORCE_TO_NONPRIV_ACCESS_RW	REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_ACCESS_MASK, 0)
+#define   RING_FORCE_TO_NONPRIV_ACCESS_RD	REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_ACCESS_MASK, 1)
+#define   RING_FORCE_TO_NONPRIV_ACCESS_WR	REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_ACCESS_MASK, 2)
+#define   RING_FORCE_TO_NONPRIV_ACCESS_INVALID	REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_ACCESS_MASK, 3)
+#define   RING_FORCE_TO_NONPRIV_ADDRESS_MASK	REG_GENMASK(25, 2)
+#define   RING_FORCE_TO_NONPRIV_RANGE_MASK	REG_GENMASK(1, 0)
+#define   RING_FORCE_TO_NONPRIV_RANGE_1		REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_RANGE_MASK, 0)
+#define   RING_FORCE_TO_NONPRIV_RANGE_4		REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_RANGE_MASK, 1)
+#define   RING_FORCE_TO_NONPRIV_RANGE_16	REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_RANGE_MASK, 2)
+#define   RING_FORCE_TO_NONPRIV_RANGE_64	REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_RANGE_MASK, 3)
+#define   RING_FORCE_TO_NONPRIV_MASK_VALID	(RING_FORCE_TO_NONPRIV_RANGE_MASK | \
+						 RING_FORCE_TO_NONPRIV_ACCESS_MASK | \
+						 RING_FORCE_TO_NONPRIV_DENY)
+#define   RING_MAX_NONPRIV_SLOTS  12
+
+#define RING_EXECLIST_SQ_CONTENTS_LO(base)	XE_REG((base) + 0x510)
+#define RING_EXECLIST_SQ_CONTENTS_HI(base)	XE_REG((base) + 0x510 + 4)
+
+#define RING_EXECLIST_CONTROL(base)		XE_REG((base) + 0x550)
+#define	  EL_CTRL_LOAD				REG_BIT(0)
+
+#define CS_CHICKEN1(base)			XE_REG((base) + 0x580, XE_REG_OPTION_MASKED)
+#define   PREEMPT_GPGPU_LEVEL(hi, lo)		(((hi) << 2) | ((lo) << 1))
+#define   PREEMPT_GPGPU_MID_THREAD_LEVEL	PREEMPT_GPGPU_LEVEL(0, 0)
+#define   PREEMPT_GPGPU_THREAD_GROUP_LEVEL	PREEMPT_GPGPU_LEVEL(0, 1)
+#define   PREEMPT_GPGPU_COMMAND_LEVEL		PREEMPT_GPGPU_LEVEL(1, 0)
+#define   PREEMPT_GPGPU_LEVEL_MASK		PREEMPT_GPGPU_LEVEL(1, 1)
+#define   PREEMPT_3D_OBJECT_LEVEL		REG_BIT(0)
+
+#define VDBOX_CGCTL3F08(base)			XE_REG((base) + 0x3f08)
+#define   CG3DDISHRS_CLKGATE_DIS		REG_BIT(5)
+
+#define VDBOX_CGCTL3F10(base)			XE_REG((base) + 0x3f10)
+#define   IECPUNIT_CLKGATE_DIS			REG_BIT(22)
+
+#define VDBOX_CGCTL3F18(base)			XE_REG((base) + 0x3f18)
+#define   ALNUNIT_CLKGATE_DIS			REG_BIT(13)
+
+#define VDBOX_CGCTL3F1C(base)			XE_REG((base) + 0x3f1c)
+#define   MFXPIPE_CLKGATE_DIS			REG_BIT(3)
+
+#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
new file mode 100644
index 000000000000..a255946b6f77
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GPU_COMMANDS_H_
+#define _XE_GPU_COMMANDS_H_
+
+#include "regs/xe_reg_defs.h"
+
+#define XY_CTRL_SURF_COPY_BLT		((2 << 29) | (0x48 << 22) | 3)
+#define   SRC_ACCESS_TYPE_SHIFT		21
+#define   DST_ACCESS_TYPE_SHIFT		20
+#define   CCS_SIZE_MASK			GENMASK(17, 8)
+#define   XE2_CCS_SIZE_MASK		GENMASK(18, 9)
+#define   XY_CTRL_SURF_MOCS_MASK	GENMASK(31, 26)
+#define   XE2_XY_CTRL_SURF_MOCS_INDEX_MASK	GENMASK(31, 28)
+#define   NUM_CCS_BYTES_PER_BLOCK	256
+#define   NUM_BYTES_PER_CCS_BYTE(_xe)	(GRAPHICS_VER(_xe) >= 20 ? 512 : 256)
+
+#define XY_FAST_COLOR_BLT_CMD		(2 << 29 | 0x44 << 22)
+#define   XY_FAST_COLOR_BLT_DEPTH_32	(2 << 19)
+#define   XY_FAST_COLOR_BLT_DW		16
+#define   XY_FAST_COLOR_BLT_MOCS_MASK	GENMASK(27, 22)
+#define   XE2_XY_FAST_COLOR_BLT_MOCS_INDEX_MASK	GENMASK(27, 24)
+#define   XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31
+
+#define XY_FAST_COPY_BLT_CMD		(2 << 29 | 0x42 << 22)
+#define   XY_FAST_COPY_BLT_DEPTH_32	(3<<24)
+#define   XY_FAST_COPY_BLT_D1_SRC_TILE4	REG_BIT(31)
+#define   XY_FAST_COPY_BLT_D1_DST_TILE4	REG_BIT(30)
+#define   XE2_XY_FAST_COPY_BLT_MOCS_INDEX_MASK	GENMASK(23, 20)
+
+#define	PVC_MEM_SET_CMD		(2 << 29 | 0x5b << 22)
+#define   PVC_MEM_SET_CMD_LEN_DW	7
+#define   PVC_MEM_SET_MATRIX		REG_BIT(17)
+#define   PVC_MEM_SET_DATA_FIELD	GENMASK(31, 24)
+/* Bspec lists field as [6:0], but index alone is from [6:1] */
+#define   PVC_MEM_SET_MOCS_INDEX_MASK	GENMASK(6, 1)
+#define   XE2_MEM_SET_MOCS_INDEX_MASK	GENMASK(6, 3)
+
+#define GFX_OP_PIPE_CONTROL(len)	((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
+
+#define	  PIPE_CONTROL0_HDC_PIPELINE_FLUSH		BIT(9)	/* gen12 */
+
+#define   PIPE_CONTROL_COMMAND_CACHE_INVALIDATE		(1<<29)
+#define   PIPE_CONTROL_TILE_CACHE_FLUSH			(1<<28)
+#define   PIPE_CONTROL_AMFS_FLUSH			(1<<25)
+#define   PIPE_CONTROL_GLOBAL_GTT_IVB			(1<<24)
+#define   PIPE_CONTROL_LRI_POST_SYNC			BIT(23)
+#define   PIPE_CONTROL_STORE_DATA_INDEX			(1<<21)
+#define   PIPE_CONTROL_CS_STALL				(1<<20)
+#define   PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET		(1<<19)
+#define	  PIPE_CONTROL_TLB_INVALIDATE			BIT(18)
+#define   PIPE_CONTROL_PSD_SYNC				(1<<17)
+#define   PIPE_CONTROL_QW_WRITE				(1<<14)
+#define   PIPE_CONTROL_DEPTH_STALL			(1<<13)
+#define   PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH	(1<<12)
+#define   PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE	(1<<11)
+#define   PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE		(1<<10)
+#define   PIPE_CONTROL_INDIRECT_STATE_DISABLE		(1<<9)
+#define   PIPE_CONTROL_FLUSH_ENABLE			(1<<7)
+#define   PIPE_CONTROL_DC_FLUSH_ENABLE			(1<<5)
+#define   PIPE_CONTROL_VF_CACHE_INVALIDATE		(1<<4)
+#define   PIPE_CONTROL_CONST_CACHE_INVALIDATE		(1<<3)
+#define   PIPE_CONTROL_STATE_CACHE_INVALIDATE		(1<<2)
+#define   PIPE_CONTROL_STALL_AT_SCOREBOARD		(1<<1)
+#define   PIPE_CONTROL_DEPTH_CACHE_FLUSH		(1<<0)
+
+#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h
new file mode 100644
index 000000000000..9886ec9cb08e
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GSC_REGS_H_
+#define _XE_GSC_REGS_H_
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+#include "regs/xe_reg_defs.h"
+
+/* Definitions of GSC H/W registers, bits, etc */
+
+#define MTL_GSC_HECI1_BASE	0x00116000
+#define MTL_GSC_HECI2_BASE	0x00117000
+
+#define HECI_H_CSR(base)	XE_REG((base) + 0x4)
+#define   HECI_H_CSR_IE		REG_BIT(0)
+#define   HECI_H_CSR_IS		REG_BIT(1)
+#define   HECI_H_CSR_IG		REG_BIT(2)
+#define   HECI_H_CSR_RDY	REG_BIT(3)
+#define   HECI_H_CSR_RST	REG_BIT(4)
+
+/*
+ * The FWSTS register values are FW defined and can be different between
+ * HECI1 and HECI2
+ */
+#define HECI_FWSTS1(base)				XE_REG((base) + 0xc40)
+#define   HECI1_FWSTS1_CURRENT_STATE			REG_GENMASK(3, 0)
+#define   HECI1_FWSTS1_CURRENT_STATE_RESET		0
+#define   HECI1_FWSTS1_PROXY_STATE_NORMAL		5
+#define   HECI1_FWSTS1_INIT_COMPLETE			REG_BIT(9)
+#define HECI_FWSTS5(base)				XE_REG((base) + 0xc68)
+#define   HECI1_FWSTS5_HUC_AUTH_DONE			REG_BIT(19)
+
+#define HECI_H_GS1(base)	XE_REG((base) + 0xc4c)
+#define   HECI_H_GS1_ER_PREP	REG_BIT(0)
+
+#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
new file mode 100644
index 000000000000..1dd361046b5d
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -0,0 +1,478 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GT_REGS_H_
+#define _XE_GT_REGS_H_
+
+#include "regs/xe_reg_defs.h"
+
+/*
+ * The GSI register range [0x0 - 0x40000) is replicated at a higher offset
+ * for the media GT.  xe_mmio and xe_gt_mcr functions will automatically
+ * translate offsets by MEDIA_GT_GSI_OFFSET when operating on the media GT.
+ */
+#define MEDIA_GT_GSI_OFFSET				0x380000
+#define MEDIA_GT_GSI_LENGTH				0x40000
+
+/* MTL workpoint reg to get core C state and actual freq of 3D, SAMedia */
+#define MTL_MIRROR_TARGET_WP1				XE_REG(0xc60)
+#define   MTL_CAGF_MASK					REG_GENMASK(8, 0)
+#define   MTL_CC_MASK					REG_GENMASK(12, 9)
+
+/* RPM unit config (Gen8+) */
+#define RPM_CONFIG0					XE_REG(0xd00)
+#define   RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK		REG_GENMASK(5, 3)
+#define   RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ		0
+#define   RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ	1
+#define   RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ	2
+#define   RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ		3
+#define   RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK		REG_GENMASK(2, 1)
+
+#define FORCEWAKE_ACK_MEDIA_VDBOX(n)		XE_REG(0xd50 + (n) * 4)
+#define FORCEWAKE_ACK_MEDIA_VEBOX(n)		XE_REG(0xd70 + (n) * 4)
+#define FORCEWAKE_ACK_RENDER			XE_REG(0xd84)
+
+#define GMD_ID					XE_REG(0xd8c)
+#define   GMD_ID_ARCH_MASK			REG_GENMASK(31, 22)
+#define   GMD_ID_RELEASE_MASK			REG_GENMASK(21, 14)
+#define   GMD_ID_REVID				REG_GENMASK(5, 0)
+
+#define FORCEWAKE_ACK_GSC			XE_REG(0xdf8)
+#define FORCEWAKE_ACK_GT_MTL			XE_REG(0xdfc)
+
+#define MCFG_MCR_SELECTOR			XE_REG(0xfd0)
+#define MTL_MCR_SELECTOR			XE_REG(0xfd4)
+#define SF_MCR_SELECTOR				XE_REG(0xfd8)
+#define MCR_SELECTOR				XE_REG(0xfdc)
+#define GAM_MCR_SELECTOR			XE_REG(0xfe0)
+#define   MCR_MULTICAST				REG_BIT(31)
+#define   MCR_SLICE_MASK			REG_GENMASK(30, 27)
+#define   MCR_SLICE(slice)			REG_FIELD_PREP(MCR_SLICE_MASK, slice)
+#define   MCR_SUBSLICE_MASK			REG_GENMASK(26, 24)
+#define   MCR_SUBSLICE(subslice)		REG_FIELD_PREP(MCR_SUBSLICE_MASK, subslice)
+#define   MTL_MCR_GROUPID			REG_GENMASK(11, 8)
+#define   MTL_MCR_INSTANCEID			REG_GENMASK(3, 0)
+
+#define PS_INVOCATION_COUNT			XE_REG(0x2348)
+
+#define XELP_GLOBAL_MOCS(i)			XE_REG(0x4000 + (i) * 4)
+#define XEHP_GLOBAL_MOCS(i)			XE_REG_MCR(0x4000 + (i) * 4)
+#define CCS_AUX_INV				XE_REG(0x4208)
+
+#define VD0_AUX_INV				XE_REG(0x4218)
+#define VE0_AUX_INV				XE_REG(0x4238)
+
+#define VE1_AUX_INV				XE_REG(0x42b8)
+#define   AUX_INV				REG_BIT(0)
+
+#define XEHP_TILE_ADDR_RANGE(_idx)		XE_REG_MCR(0x4900 + (_idx) * 4)
+#define XEHP_FLAT_CCS_BASE_ADDR			XE_REG_MCR(0x4910)
+
+#define WM_CHICKEN3				XE_REG_MCR(0x5588, XE_REG_OPTION_MASKED)
+#define   HIZ_PLANE_COMPRESSION_DIS		REG_BIT(10)
+
+#define CHICKEN_RASTER_2			XE_REG_MCR(0x6208, XE_REG_OPTION_MASKED)
+#define   TBIMR_FAST_CLIP			REG_BIT(5)
+
+#define FF_MODE					XE_REG_MCR(0x6210)
+#define   DIS_TE_AUTOSTRIP			REG_BIT(31)
+#define   DIS_MESH_PARTIAL_AUTOSTRIP		REG_BIT(16)
+#define   DIS_MESH_AUTOSTRIP			REG_BIT(15)
+
+#define VFLSKPD					XE_REG_MCR(0x62a8, XE_REG_OPTION_MASKED)
+#define   DIS_PARTIAL_AUTOSTRIP			REG_BIT(9)
+#define   DIS_AUTOSTRIP				REG_BIT(6)
+#define   DIS_OVER_FETCH_CACHE			REG_BIT(1)
+#define   DIS_MULT_MISS_RD_SQUASH		REG_BIT(0)
+
+#define FF_MODE2				XE_REG(0x6604)
+#define XEHP_FF_MODE2				XE_REG_MCR(0x6604)
+#define   FF_MODE2_GS_TIMER_MASK		REG_GENMASK(31, 24)
+#define   FF_MODE2_GS_TIMER_224			REG_FIELD_PREP(FF_MODE2_GS_TIMER_MASK, 224)
+#define   FF_MODE2_TDS_TIMER_MASK		REG_GENMASK(23, 16)
+#define   FF_MODE2_TDS_TIMER_128		REG_FIELD_PREP(FF_MODE2_TDS_TIMER_MASK, 4)
+
+#define CACHE_MODE_1				XE_REG(0x7004, XE_REG_OPTION_MASKED)
+#define   MSAA_OPTIMIZATION_REDUC_DISABLE	REG_BIT(11)
+
+#define COMMON_SLICE_CHICKEN1			XE_REG(0x7010)
+
+#define HIZ_CHICKEN					XE_REG(0x7018, XE_REG_OPTION_MASKED)
+#define   DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE	REG_BIT(14)
+#define   HZ_DEPTH_TEST_LE_GE_OPT_DISABLE		REG_BIT(13)
+
+#define XEHP_PSS_MODE2				XE_REG_MCR(0x703c, XE_REG_OPTION_MASKED)
+#define   SCOREBOARD_STALL_FLUSH_CONTROL	REG_BIT(5)
+
+#define XEHP_PSS_CHICKEN			XE_REG_MCR(0x7044, XE_REG_OPTION_MASKED)
+#define   FLSH_IGNORES_PSD			REG_BIT(10)
+#define   FD_END_COLLECT			REG_BIT(5)
+
+#define COMMON_SLICE_CHICKEN4			XE_REG(0x7300, XE_REG_OPTION_MASKED)
+#define   DISABLE_TDC_LOAD_BALANCING_CALC	REG_BIT(6)
+
+#define COMMON_SLICE_CHICKEN3				XE_REG(0x7304, XE_REG_OPTION_MASKED)
+#define XEHP_COMMON_SLICE_CHICKEN3			XE_REG_MCR(0x7304, XE_REG_OPTION_MASKED)
+#define   DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN	REG_BIT(12)
+#define   XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE		REG_BIT(12)
+#define   BLEND_EMB_FIX_DISABLE_IN_RCC			REG_BIT(11)
+#define   DISABLE_CPS_AWARE_COLOR_PIPE			REG_BIT(9)
+
+#define XEHP_SLICE_COMMON_ECO_CHICKEN1		XE_REG_MCR(0x731c, XE_REG_OPTION_MASKED)
+#define   MSC_MSAA_REODER_BUF_BYPASS_DISABLE	REG_BIT(14)
+
+#define VF_PREEMPTION				XE_REG(0x83a4, XE_REG_OPTION_MASKED)
+#define   PREEMPTION_VERTEX_COUNT		REG_GENMASK(15, 0)
+
+#define VF_SCRATCHPAD				XE_REG(0x83a8, XE_REG_OPTION_MASKED)
+#define   XE2_VFG_TED_CREDIT_INTERFACE_DISABLE	REG_BIT(13)
+
+#define VFG_PREEMPTION_CHICKEN			XE_REG(0x83b4, XE_REG_OPTION_MASKED)
+#define   POLYGON_TRIFAN_LINELOOP_DISABLE	REG_BIT(4)
+
+#define SQCNT1					XE_REG_MCR(0x8718)
+#define XELPMP_SQCNT1				XE_REG(0x8718)
+#define   ENFORCE_RAR				REG_BIT(23)
+
+#define XEHP_SQCM				XE_REG_MCR(0x8724)
+#define   EN_32B_ACCESS				REG_BIT(30)
+
+#define XE2_FLAT_CCS_BASE_RANGE_LOWER		XE_REG_MCR(0x8800)
+#define   XE2_FLAT_CCS_ENABLE			REG_BIT(0)
+
+#define GSCPSMI_BASE				XE_REG(0x880c)
+
+/* Fuse readout registers for GT */
+#define XEHP_FUSE4				XE_REG(0x9114)
+#define   CCS_EN_MASK				REG_GENMASK(19, 16)
+#define   GT_L3_EXC_MASK			REG_GENMASK(6, 4)
+
+#define	MIRROR_FUSE3				XE_REG(0x9118)
+#define   XE2_NODE_ENABLE_MASK			REG_GENMASK(31, 16)
+#define   L3BANK_PAIR_COUNT			4
+#define   L3BANK_MASK				REG_GENMASK(3, 0)
+/* on Xe_HP the same fuses indicates mslices instead of L3 banks */
+#define   MAX_MSLICES				4
+#define   MEML3_EN_MASK				REG_GENMASK(3, 0)
+
+#define XELP_EU_ENABLE				XE_REG(0x9134)	/* "_DISABLE" on Xe_LP */
+#define   XELP_EU_MASK				REG_GENMASK(7, 0)
+#define XELP_GT_GEOMETRY_DSS_ENABLE		XE_REG(0x913c)
+
+#define GT_VEBOX_VDBOX_DISABLE			XE_REG(0x9140)
+#define   GT_VEBOX_DISABLE_MASK			REG_GENMASK(19, 16)
+#define   GT_VDBOX_DISABLE_MASK			REG_GENMASK(7, 0)
+
+#define XEHP_GT_COMPUTE_DSS_ENABLE		XE_REG(0x9144)
+#define XEHPC_GT_COMPUTE_DSS_ENABLE_EXT		XE_REG(0x9148)
+#define XE2_GT_COMPUTE_DSS_2			XE_REG(0x914c)
+#define XE2_GT_GEOMETRY_DSS_1			XE_REG(0x9150)
+#define XE2_GT_GEOMETRY_DSS_2			XE_REG(0x9154)
+
+#define GDRST					XE_REG(0x941c)
+#define   GRDOM_GUC				REG_BIT(3)
+#define   GRDOM_FULL				REG_BIT(0)
+
+#define MISCCPCTL				XE_REG(0x9424)
+#define   DOP_CLOCK_GATE_RENDER_ENABLE		REG_BIT(1)
+
+#define UNSLCGCTL9430				XE_REG(0x9430)
+#define   MSQDUNIT_CLKGATE_DIS			REG_BIT(3)
+
+#define UNSLICE_UNIT_LEVEL_CLKGATE		XE_REG(0x9434)
+#define   VFUNIT_CLKGATE_DIS			REG_BIT(20)
+#define   TSGUNIT_CLKGATE_DIS			REG_BIT(17) /* XEHPSDV */
+#define   CG3DDISCFEG_CLKGATE_DIS		REG_BIT(17) /* DG2 */
+#define   GAMEDIA_CLKGATE_DIS			REG_BIT(11)
+#define   HSUNIT_CLKGATE_DIS			REG_BIT(8)
+#define   VSUNIT_CLKGATE_DIS			REG_BIT(3)
+
+#define UNSLCGCTL9440				XE_REG(0x9440)
+#define   GAMTLBOACS_CLKGATE_DIS		REG_BIT(28)
+#define   GAMTLBVDBOX5_CLKGATE_DIS		REG_BIT(27)
+#define   GAMTLBVDBOX6_CLKGATE_DIS		REG_BIT(26)
+#define   GAMTLBVDBOX3_CLKGATE_DIS		REG_BIT(24)
+#define   GAMTLBVDBOX4_CLKGATE_DIS		REG_BIT(23)
+#define   GAMTLBVDBOX7_CLKGATE_DIS		REG_BIT(22)
+#define   GAMTLBVDBOX2_CLKGATE_DIS		REG_BIT(21)
+#define   GAMTLBVDBOX0_CLKGATE_DIS		REG_BIT(17)
+#define   GAMTLBKCR_CLKGATE_DIS			REG_BIT(16)
+#define   GAMTLBGUC_CLKGATE_DIS			REG_BIT(15)
+#define   GAMTLBBLT_CLKGATE_DIS			REG_BIT(14)
+#define   GAMTLBVDBOX1_CLKGATE_DIS		REG_BIT(6)
+
+#define UNSLCGCTL9444				XE_REG(0x9444)
+#define   GAMTLBGFXA0_CLKGATE_DIS		REG_BIT(30)
+#define   GAMTLBGFXA1_CLKGATE_DIS		REG_BIT(29)
+#define   GAMTLBCOMPA0_CLKGATE_DIS		REG_BIT(28)
+#define   GAMTLBCOMPA1_CLKGATE_DIS		REG_BIT(27)
+#define   GAMTLBCOMPB0_CLKGATE_DIS		REG_BIT(26)
+#define   GAMTLBCOMPB1_CLKGATE_DIS		REG_BIT(25)
+#define   GAMTLBCOMPC0_CLKGATE_DIS		REG_BIT(24)
+#define   GAMTLBCOMPC1_CLKGATE_DIS		REG_BIT(23)
+#define   GAMTLBCOMPD0_CLKGATE_DIS		REG_BIT(22)
+#define   GAMTLBCOMPD1_CLKGATE_DIS		REG_BIT(21)
+#define   GAMTLBMERT_CLKGATE_DIS		REG_BIT(20)
+#define   GAMTLBVEBOX3_CLKGATE_DIS		REG_BIT(19)
+#define   GAMTLBVEBOX2_CLKGATE_DIS		REG_BIT(18)
+#define   GAMTLBVEBOX1_CLKGATE_DIS		REG_BIT(17)
+#define   GAMTLBVEBOX0_CLKGATE_DIS		REG_BIT(16)
+#define   LTCDD_CLKGATE_DIS			REG_BIT(10)
+
+#define XEHP_SLICE_UNIT_LEVEL_CLKGATE		XE_REG_MCR(0x94d4)
+#define   L3_CR2X_CLKGATE_DIS			REG_BIT(17)
+#define   L3_CLKGATE_DIS			REG_BIT(16)
+#define   NODEDSS_CLKGATE_DIS			REG_BIT(12)
+#define   MSCUNIT_CLKGATE_DIS			REG_BIT(10)
+#define   RCCUNIT_CLKGATE_DIS			REG_BIT(7)
+#define   SARBUNIT_CLKGATE_DIS			REG_BIT(5)
+#define   SBEUNIT_CLKGATE_DIS			REG_BIT(4)
+
+#define UNSLICE_UNIT_LEVEL_CLKGATE2		XE_REG(0x94e4)
+#define   VSUNIT_CLKGATE2_DIS			REG_BIT(19)
+
+#define SUBSLICE_UNIT_LEVEL_CLKGATE		XE_REG_MCR(0x9524)
+#define   DSS_ROUTER_CLKGATE_DIS		REG_BIT(28)
+#define   GWUNIT_CLKGATE_DIS			REG_BIT(16)
+
+#define SUBSLICE_UNIT_LEVEL_CLKGATE2		XE_REG_MCR(0x9528)
+#define   CPSSUNIT_CLKGATE_DIS			REG_BIT(9)
+
+#define SSMCGCTL9530				XE_REG_MCR(0x9530)
+#define   RTFUNIT_CLKGATE_DIS			REG_BIT(18)
+
+#define DFR_RATIO_EN_AND_CHICKEN		XE_REG_MCR(0x9550)
+#define   DFR_DISABLE				REG_BIT(9)
+
+#define RPNSWREQ				XE_REG(0xa008)
+#define   REQ_RATIO_MASK			REG_GENMASK(31, 23)
+
+#define RP_CONTROL				XE_REG(0xa024)
+#define   RPSWCTL_MASK				REG_GENMASK(10, 9)
+#define   RPSWCTL_ENABLE			REG_FIELD_PREP(RPSWCTL_MASK, 2)
+#define   RPSWCTL_DISABLE			REG_FIELD_PREP(RPSWCTL_MASK, 0)
+#define RC_CONTROL				XE_REG(0xa090)
+#define   RC_CTL_HW_ENABLE			REG_BIT(31)
+#define   RC_CTL_TO_MODE			REG_BIT(28)
+#define   RC_CTL_RC6_ENABLE			REG_BIT(18)
+#define RC_STATE				XE_REG(0xa094)
+#define RC_IDLE_HYSTERSIS			XE_REG(0xa0ac)
+
+#define PMINTRMSK				XE_REG(0xa168)
+#define   PMINTR_DISABLE_REDIRECT_TO_GUC	REG_BIT(31)
+#define   ARAT_EXPIRED_INTRMSK			REG_BIT(9)
+
+#define FORCEWAKE_GT				XE_REG(0xa188)
+
+#define PG_ENABLE				XE_REG(0xa210)
+
+#define CTC_MODE				XE_REG(0xa26c)
+#define   CTC_SHIFT_PARAMETER_MASK		REG_GENMASK(2, 1)
+#define   CTC_SOURCE_DIVIDE_LOGIC		REG_BIT(0)
+
+#define FORCEWAKE_RENDER			XE_REG(0xa278)
+#define FORCEWAKE_MEDIA_VDBOX(n)		XE_REG(0xa540 + (n) * 4)
+#define FORCEWAKE_MEDIA_VEBOX(n)		XE_REG(0xa560 + (n) * 4)
+#define FORCEWAKE_GSC				XE_REG(0xa618)
+
+#define XEHPC_LNCFMISCCFGREG0			XE_REG_MCR(0xb01c, XE_REG_OPTION_MASKED)
+#define   XEHPC_OVRLSCCC			REG_BIT(0)
+
+/* L3 Cache Control */
+#define XELP_LNCFCMOCS(i)			XE_REG(0xb020 + (i) * 4)
+#define XEHP_LNCFCMOCS(i)			XE_REG_MCR(0xb020 + (i) * 4)
+#define LNCFCMOCS_REG_COUNT			32
+
+#define XEHP_L3NODEARBCFG			XE_REG_MCR(0xb0b4)
+#define   XEHP_LNESPARE				REG_BIT(19)
+
+#define XEHP_L3SQCREG5				XE_REG_MCR(0xb158)
+#define   L3_PWM_TIMER_INIT_VAL_MASK		REG_GENMASK(9, 0)
+
+#define XEHP_L3SCQREG7				XE_REG_MCR(0xb188)
+#define   BLEND_FILL_CACHING_OPT_DIS		REG_BIT(3)
+
+#define XEHPC_L3CLOS_MASK(i)			XE_REG_MCR(0xb194 + (i) * 8)
+
+#define XE2LPM_L3SQCREG5			XE_REG_MCR(0xb658)
+
+#define XEHP_MERT_MOD_CTRL			XE_REG_MCR(0xcf28)
+#define RENDER_MOD_CTRL				XE_REG_MCR(0xcf2c)
+#define COMP_MOD_CTRL				XE_REG_MCR(0xcf30)
+#define XEHP_VDBX_MOD_CTRL			XE_REG_MCR(0xcf34)
+#define XELPMP_VDBX_MOD_CTRL			XE_REG(0xcf34)
+#define XEHP_VEBX_MOD_CTRL			XE_REG_MCR(0xcf38)
+#define XELPMP_VEBX_MOD_CTRL			XE_REG(0xcf38)
+#define   FORCE_MISS_FTLB			REG_BIT(3)
+
+#define XEHP_GAMSTLB_CTRL			XE_REG_MCR(0xcf4c)
+#define   CONTROL_BLOCK_CLKGATE_DIS		REG_BIT(12)
+#define   EGRESS_BLOCK_CLKGATE_DIS		REG_BIT(11)
+#define   TAG_BLOCK_CLKGATE_DIS			REG_BIT(7)
+
+#define XEHP_GAMCNTRL_CTRL			XE_REG_MCR(0xcf54)
+#define   INVALIDATION_BROADCAST_MODE_DIS	REG_BIT(12)
+#define   GLOBAL_INVALIDATION_MODE		REG_BIT(2)
+
+#define HALF_SLICE_CHICKEN5			XE_REG_MCR(0xe188, XE_REG_OPTION_MASKED)
+#define   DISABLE_SAMPLE_G_PERFORMANCE		REG_BIT(0)
+
+#define SAMPLER_MODE				XE_REG_MCR(0xe18c, XE_REG_OPTION_MASKED)
+#define   ENABLE_SMALLPL			REG_BIT(15)
+#define   SC_DISABLE_POWER_OPTIMIZATION_EBB	REG_BIT(9)
+#define   SAMPLER_ENABLE_HEADLESS_MSG		REG_BIT(5)
+#define   INDIRECT_STATE_BASE_ADDR_OVERRIDE	REG_BIT(0)
+
+#define HALF_SLICE_CHICKEN7				XE_REG_MCR(0xe194, XE_REG_OPTION_MASKED)
+#define   DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA	REG_BIT(15)
+
+#define CACHE_MODE_SS				XE_REG_MCR(0xe420, XE_REG_OPTION_MASKED)
+#define   DISABLE_ECC				REG_BIT(5)
+#define   ENABLE_PREFETCH_INTO_IC		REG_BIT(3)
+
+#define ROW_CHICKEN4				XE_REG_MCR(0xe48c, XE_REG_OPTION_MASKED)
+#define   DISABLE_GRF_CLEAR			REG_BIT(13)
+#define   XEHP_DIS_BBL_SYSPIPE			REG_BIT(11)
+#define   DISABLE_TDL_PUSH			REG_BIT(9)
+#define   DIS_PICK_2ND_EU			REG_BIT(7)
+#define   DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX	REG_BIT(4)
+#define   THREAD_EX_ARB_MODE			REG_GENMASK(3, 2)
+#define   THREAD_EX_ARB_MODE_RR_AFTER_DEP	REG_FIELD_PREP(THREAD_EX_ARB_MODE, 0x2)
+
+#define ROW_CHICKEN3				XE_REG_MCR(0xe49c, XE_REG_OPTION_MASKED)
+#define   DIS_FIX_EOT1_FLUSH			REG_BIT(9)
+
+#define ROW_CHICKEN				XE_REG_MCR(0xe4f0, XE_REG_OPTION_MASKED)
+#define   UGM_BACKUP_MODE			REG_BIT(13)
+#define   MDQ_ARBITRATION_MODE			REG_BIT(12)
+#define   EARLY_EOT_DIS				REG_BIT(1)
+
+#define ROW_CHICKEN2				XE_REG_MCR(0xe4f4, XE_REG_OPTION_MASKED)
+#define   DISABLE_READ_SUPPRESSION		REG_BIT(15)
+#define   DISABLE_EARLY_READ			REG_BIT(14)
+#define   ENABLE_LARGE_GRF_MODE			REG_BIT(12)
+#define   PUSH_CONST_DEREF_HOLD_DIS		REG_BIT(8)
+#define   DISABLE_DOP_GATING			REG_BIT(0)
+
+#define RT_CTRL					XE_REG_MCR(0xe530)
+#define   DIS_NULL_QUERY			REG_BIT(10)
+
+#define XEHP_HDC_CHICKEN0					XE_REG_MCR(0xe5f0, XE_REG_OPTION_MASKED)
+#define   LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK	REG_GENMASK(13, 11)
+#define   DIS_ATOMIC_CHAINING_TYPED_WRITES	REG_BIT(3)
+
+#define LSC_CHICKEN_BIT_0			XE_REG_MCR(0xe7c8)
+#define   DISABLE_D8_D16_COASLESCE		REG_BIT(30)
+#define   TGM_WRITE_EOM_FORCE			REG_BIT(17)
+#define   FORCE_1_SUB_MESSAGE_PER_FRAGMENT	REG_BIT(15)
+#define   SEQUENTIAL_ACCESS_UPGRADE_DISABLE	REG_BIT(13)
+
+#define LSC_CHICKEN_BIT_0_UDW			XE_REG_MCR(0xe7c8 + 4)
+#define   UGM_FRAGMENT_THRESHOLD_TO_3		REG_BIT(58 - 32)
+#define   DIS_CHAIN_2XSIMD8			REG_BIT(55 - 32)
+#define   XE2_ALLOC_DPA_STARVE_FIX_DIS		REG_BIT(47 - 32)
+#define   ENABLE_SMP_LD_RENDER_SURFACE_CONTROL	REG_BIT(44 - 32)
+#define   FORCE_SLM_FENCE_SCOPE_TO_TILE		REG_BIT(42 - 32)
+#define   FORCE_UGM_FENCE_SCOPE_TO_TILE		REG_BIT(41 - 32)
+#define   MAXREQS_PER_BANK			REG_GENMASK(39 - 32, 37 - 32)
+#define   DISABLE_128B_EVICTION_COMMAND_UDW	REG_BIT(36 - 32)
+
+#define SARB_CHICKEN1				XE_REG_MCR(0xe90c)
+#define   COMP_CKN_IN				REG_GENMASK(30, 29)
+
+#define RCU_MODE				XE_REG(0x14800, XE_REG_OPTION_MASKED)
+#define   RCU_MODE_FIXED_SLICE_CCS_MODE		REG_BIT(1)
+#define   RCU_MODE_CCS_ENABLE			REG_BIT(0)
+
+/*
+ * Total of 4 cslices, where each cslice is in the form:
+ *   [0-3]     CCS ID
+ *   [4-6]     RSVD
+ *   [7]       Disabled
+ */
+#define CCS_MODE				XE_REG(0x14804)
+#define   CCS_MODE_CSLICE_0_3_MASK		REG_GENMASK(11, 0) /* 3 bits per cslice */
+#define   CCS_MODE_CSLICE_MASK			0x7 /* CCS0-3 + rsvd */
+#define   CCS_MODE_CSLICE_WIDTH			ilog2(CCS_MODE_CSLICE_MASK + 1)
+#define   CCS_MODE_CSLICE(cslice, ccs) \
+	((ccs) << ((cslice) * CCS_MODE_CSLICE_WIDTH))
+
+#define FORCEWAKE_ACK_GT			XE_REG(0x130044)
+#define   FORCEWAKE_KERNEL			BIT(0)
+#define   FORCEWAKE_USER			BIT(1)
+#define   FORCEWAKE_KERNEL_FALLBACK		BIT(15)
+
+#define MTL_MEDIA_PERF_LIMIT_REASONS		XE_REG(0x138030)
+#define MTL_MEDIA_MC6				XE_REG(0x138048)
+
+#define GT_CORE_STATUS				XE_REG(0x138060)
+#define   RCN_MASK				REG_GENMASK(2, 0)
+#define   GT_C0					0
+#define   GT_C6					3
+
+#define GT_GFX_RC6_LOCKED			XE_REG(0x138104)
+#define GT_GFX_RC6				XE_REG(0x138108)
+
+#define GT0_PERF_LIMIT_REASONS			XE_REG(0x1381a8)
+#define   GT0_PERF_LIMIT_REASONS_MASK		0xde3
+#define   PROCHOT_MASK				REG_BIT(0)
+#define   THERMAL_LIMIT_MASK			REG_BIT(1)
+#define   RATL_MASK				REG_BIT(5)
+#define   VR_THERMALERT_MASK			REG_BIT(6)
+#define   VR_TDC_MASK				REG_BIT(7)
+#define   POWER_LIMIT_4_MASK			REG_BIT(8)
+#define   POWER_LIMIT_1_MASK			REG_BIT(10)
+#define   POWER_LIMIT_2_MASK			REG_BIT(11)
+
+#define GT_PERF_STATUS				XE_REG(0x1381b4)
+#define   VOLTAGE_MASK				REG_GENMASK(10, 0)
+
+#define GT_INTR_DW(x)				XE_REG(0x190018 + ((x) * 4))
+
+#define RENDER_COPY_INTR_ENABLE			XE_REG(0x190030)
+#define VCS_VECS_INTR_ENABLE			XE_REG(0x190034)
+#define GUC_SG_INTR_ENABLE			XE_REG(0x190038)
+#define   ENGINE1_MASK				REG_GENMASK(31, 16)
+#define   ENGINE0_MASK				REG_GENMASK(15, 0)
+#define GPM_WGBOXPERF_INTR_ENABLE		XE_REG(0x19003c)
+#define GUNIT_GSC_INTR_ENABLE			XE_REG(0x190044)
+#define CCS_RSVD_INTR_ENABLE			XE_REG(0x190048)
+
+#define INTR_IDENTITY_REG(x)			XE_REG(0x190060 + ((x) * 4))
+#define   INTR_DATA_VALID			REG_BIT(31)
+#define   INTR_ENGINE_INSTANCE(x)		REG_FIELD_GET(GENMASK(25, 20), x)
+#define   INTR_ENGINE_CLASS(x)			REG_FIELD_GET(GENMASK(18, 16), x)
+#define   INTR_ENGINE_INTR(x)			REG_FIELD_GET(GENMASK(15, 0), x)
+#define   OTHER_GUC_INSTANCE			0
+#define   OTHER_GSC_INSTANCE			6
+
+#define IIR_REG_SELECTOR(x)			XE_REG(0x190070 + ((x) * 4))
+#define RCS0_RSVD_INTR_MASK			XE_REG(0x190090)
+#define BCS_RSVD_INTR_MASK			XE_REG(0x1900a0)
+#define VCS0_VCS1_INTR_MASK			XE_REG(0x1900a8)
+#define VCS2_VCS3_INTR_MASK			XE_REG(0x1900ac)
+#define VECS0_VECS1_INTR_MASK			XE_REG(0x1900d0)
+#define GUC_SG_INTR_MASK			XE_REG(0x1900e8)
+#define GPM_WGBOXPERF_INTR_MASK			XE_REG(0x1900ec)
+#define GUNIT_GSC_INTR_MASK			XE_REG(0x1900f4)
+#define CCS0_CCS1_INTR_MASK			XE_REG(0x190100)
+#define CCS2_CCS3_INTR_MASK			XE_REG(0x190104)
+#define XEHPC_BCS1_BCS2_INTR_MASK		XE_REG(0x190110)
+#define XEHPC_BCS3_BCS4_INTR_MASK		XE_REG(0x190114)
+#define XEHPC_BCS5_BCS6_INTR_MASK		XE_REG(0x190118)
+#define XEHPC_BCS7_BCS8_INTR_MASK		XE_REG(0x19011c)
+#define   GT_WAIT_SEMAPHORE_INTERRUPT		REG_BIT(11)
+#define   GT_CONTEXT_SWITCH_INTERRUPT		REG_BIT(8)
+#define   GT_RENDER_PIPECTL_NOTIFY_INTERRUPT	REG_BIT(4)
+#define   GT_CS_MASTER_ERROR_INTERRUPT		REG_BIT(3)
+#define   GT_RENDER_USER_INTERRUPT		REG_BIT(0)
+
+#define PVC_GT0_PACKAGE_ENERGY_STATUS		XE_REG(0x281004)
+#define PVC_GT0_PACKAGE_RAPL_LIMIT		XE_REG(0x281008)
+#define PVC_GT0_PACKAGE_POWER_SKU_UNIT		XE_REG(0x281068)
+#define PVC_GT0_PLATFORM_ENERGY_STATUS		XE_REG(0x28106c)
+#define PVC_GT0_PACKAGE_POWER_SKU		XE_REG(0x281080)
+
+#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_guc_regs.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
new file mode 100644
index 000000000000..92320bbc9d3d
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
@@ -0,0 +1,143 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_REGS_H_
+#define _XE_GUC_REGS_H_
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+#include "regs/xe_reg_defs.h"
+
+/* Definitions of GuC H/W registers, bits, etc */
+
+#define DIST_DBS_POPULATED			XE_REG(0xd08)
+#define   DOORBELLS_PER_SQIDI_MASK		REG_GENMASK(23, 16)
+#define   SQIDIS_DOORBELL_EXIST_MASK		REG_GENMASK(15, 0)
+
+#define DRBREGL(x)				XE_REG(0x1000 + (x) * 8)
+#define   DRB_VALID				REG_BIT(0)
+#define DRBREGU(x)				XE_REG(0x1000 + (x) * 8 + 4)
+
+#define GTCR					XE_REG(0x4274)
+#define   GTCR_INVALIDATE			REG_BIT(0)
+
+#define GUC_ARAT_C6DIS				XE_REG(0xa178)
+
+#define GUC_STATUS				XE_REG(0xc000)
+#define   GS_AUTH_STATUS_MASK			REG_GENMASK(31, 30)
+#define   GS_AUTH_STATUS_BAD			REG_FIELD_PREP(GS_AUTH_STATUS_MASK, 0x1)
+#define   GS_AUTH_STATUS_GOOD			REG_FIELD_PREP(GS_AUTH_STATUS_MASK, 0x2)
+#define   GS_MIA_MASK				REG_GENMASK(18, 16)
+#define   GS_MIA_CORE_STATE			REG_FIELD_PREP(GS_MIA_MASK, 0x1)
+#define   GS_MIA_HALT_REQUESTED			REG_FIELD_PREP(GS_MIA_MASK, 0x2)
+#define   GS_MIA_ISR_ENTRY			REG_FIELD_PREP(GS_MIA_MASK, 0x4)
+#define   GS_UKERNEL_MASK			REG_GENMASK(15, 8)
+#define   GS_BOOTROM_MASK			REG_GENMASK(7, 1)
+#define   GS_BOOTROM_RSA_FAILED			REG_FIELD_PREP(GS_BOOTROM_MASK, 0x50)
+#define   GS_BOOTROM_JUMP_PASSED		REG_FIELD_PREP(GS_BOOTROM_MASK, 0x76)
+#define   GS_MIA_IN_RESET			REG_BIT(0)
+
+#define GUC_WOPCM_SIZE				XE_REG(0xc050)
+#define   GUC_WOPCM_SIZE_MASK			REG_GENMASK(31, 12)
+#define   GUC_WOPCM_SIZE_LOCKED			REG_BIT(0)
+
+#define GUC_SHIM_CONTROL			XE_REG(0xc064)
+#define   GUC_MOCS_INDEX_MASK			REG_GENMASK(27, 24)
+#define   GUC_SHIM_WC_ENABLE			REG_BIT(21)
+#define   GUC_ENABLE_MIA_CLOCK_GATING		REG_BIT(15)
+#define   GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA	REG_BIT(10)
+#define   GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA	REG_BIT(9)
+#define   GUC_MSGCH_ENABLE			REG_BIT(4)
+#define   GUC_ENABLE_MIA_CACHING		REG_BIT(2)
+#define   GUC_ENABLE_READ_CACHE_LOGIC		REG_BIT(1)
+#define   GUC_DISABLE_SRAM_INIT_TO_ZEROES	REG_BIT(0)
+
+#define SOFT_SCRATCH(n)				XE_REG(0xc180 + (n) * 4)
+#define SOFT_SCRATCH_COUNT			16
+
+#define HUC_KERNEL_LOAD_INFO			XE_REG(0xc1dc)
+#define   HUC_LOAD_SUCCESSFUL			REG_BIT(0)
+
+#define UOS_RSA_SCRATCH(i)			XE_REG(0xc200 + (i) * 4)
+#define UOS_RSA_SCRATCH_COUNT			64
+
+#define DMA_ADDR_0_LOW				XE_REG(0xc300)
+#define DMA_ADDR_0_HIGH				XE_REG(0xc304)
+#define DMA_ADDR_1_LOW				XE_REG(0xc308)
+#define DMA_ADDR_1_HIGH				XE_REG(0xc30c)
+#define   DMA_ADDR_SPACE_MASK			REG_GENMASK(20, 16)
+#define   DMA_ADDRESS_SPACE_WOPCM		REG_FIELD_PREP(DMA_ADDR_SPACE_MASK, 7)
+#define   DMA_ADDRESS_SPACE_GGTT		REG_FIELD_PREP(DMA_ADDR_SPACE_MASK, 8)
+#define DMA_COPY_SIZE				XE_REG(0xc310)
+#define DMA_CTRL				XE_REG(0xc314)
+#define   HUC_UKERNEL				REG_BIT(9)
+#define   UOS_MOVE				REG_BIT(4)
+#define   START_DMA				REG_BIT(0)
+#define DMA_GUC_WOPCM_OFFSET			XE_REG(0xc340)
+#define   GUC_WOPCM_OFFSET_SHIFT		14
+#define   GUC_WOPCM_OFFSET_MASK			REG_GENMASK(31, GUC_WOPCM_OFFSET_SHIFT)
+#define   HUC_LOADING_AGENT_GUC			REG_BIT(1)
+#define   GUC_WOPCM_OFFSET_VALID		REG_BIT(0)
+#define GUC_MAX_IDLE_COUNT			XE_REG(0xc3e4)
+
+#define GUC_SEND_INTERRUPT			XE_REG(0xc4c8)
+#define   GUC_SEND_TRIGGER			REG_BIT(0)
+
+#define GUC_BCS_RCS_IER				XE_REG(0xc550)
+#define GUC_VCS2_VCS1_IER			XE_REG(0xc554)
+#define GUC_WD_VECS_IER				XE_REG(0xc558)
+#define GUC_PM_P24C_IER				XE_REG(0xc55c)
+
+#define GUC_TLB_INV_CR				XE_REG(0xcee8)
+#define   GUC_TLB_INV_CR_INVALIDATE		REG_BIT(0)
+
+#define HUC_STATUS2				XE_REG(0xd3b0)
+#define   HUC_FW_VERIFIED			REG_BIT(7)
+
+#define GT_PM_CONFIG				XE_REG(0x13816c)
+#define   GT_DOORBELL_ENABLE			REG_BIT(0)
+
+#define GUC_HOST_INTERRUPT			XE_REG(0x1901f0)
+
+#define VF_SW_FLAG(n)				XE_REG(0x190240 + (n) * 4)
+#define VF_SW_FLAG_COUNT			4
+
+#define MED_GUC_HOST_INTERRUPT			XE_REG(0x190304)
+
+#define MED_VF_SW_FLAG(n)			XE_REG(0x190310 + (n) * 4)
+#define MED_VF_SW_FLAG_COUNT			4
+
+/* GuC Interrupt Vector */
+#define GUC_INTR_GUC2HOST			REG_BIT(15)
+#define GUC_INTR_EXEC_ERROR			REG_BIT(14)
+#define GUC_INTR_DISPLAY_EVENT			REG_BIT(13)
+#define GUC_INTR_SEM_SIG			REG_BIT(12)
+#define GUC_INTR_IOMMU2GUC			REG_BIT(11)
+#define GUC_INTR_DOORBELL_RANG			REG_BIT(10)
+#define GUC_INTR_DMA_DONE			REG_BIT(9)
+#define GUC_INTR_FATAL_ERROR			REG_BIT(8)
+#define GUC_INTR_NOTIF_ERROR			REG_BIT(7)
+#define GUC_INTR_SW_INT_6			REG_BIT(6)
+#define GUC_INTR_SW_INT_5			REG_BIT(5)
+#define GUC_INTR_SW_INT_4			REG_BIT(4)
+#define GUC_INTR_SW_INT_3			REG_BIT(3)
+#define GUC_INTR_SW_INT_2			REG_BIT(2)
+#define GUC_INTR_SW_INT_1			REG_BIT(1)
+#define GUC_INTR_SW_INT_0			REG_BIT(0)
+
+#define GUC_NUM_DOORBELLS			256
+
+/* format of the HW-monitored doorbell cacheline */
+struct guc_doorbell_info {
+	u32 db_status;
+#define GUC_DOORBELL_DISABLED			0
+#define GUC_DOORBELL_ENABLED			1
+
+	u32 cookie;
+	u32 reserved[14];
+} __packed;
+
+#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
new file mode 100644
index 000000000000..4be81abc86ad
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_LRC_LAYOUT_H_
+#define _XE_LRC_LAYOUT_H_
+
+#define CTX_CONTEXT_CONTROL		(0x02 + 1)
+#define CTX_RING_HEAD			(0x04 + 1)
+#define CTX_RING_TAIL			(0x06 + 1)
+#define CTX_RING_START			(0x08 + 1)
+#define CTX_RING_CTL			(0x0a + 1)
+#define CTX_PDP0_UDW			(0x30 + 1)
+#define CTX_PDP0_LDW			(0x32 + 1)
+
+#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h
new file mode 100644
index 000000000000..519dd1067a19
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_MCHBAR_REGS_H_
+#define _XE_MCHBAR_REGS_H_
+
+#include "regs/xe_reg_defs.h"
+
+/*
+ * MCHBAR mirror.
+ *
+ * This mirrors the MCHBAR MMIO space whose location is determined by
+ * device 0 function 0's pci config register 0x44 or 0x48 and matches it in
+ * every way.
+ */
+
+#define MCHBAR_MIRROR_BASE_SNB			0x140000
+
+#define PCU_CR_PACKAGE_POWER_SKU		XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5930)
+#define   PKG_TDP				GENMASK_ULL(14, 0)
+#define   PKG_MIN_PWR				GENMASK_ULL(30, 16)
+#define   PKG_MAX_PWR				GENMASK_ULL(46, 32)
+#define   PKG_MAX_WIN				GENMASK_ULL(54, 48)
+#define     PKG_MAX_WIN_X			GENMASK_ULL(54, 53)
+#define     PKG_MAX_WIN_Y			GENMASK_ULL(52, 48)
+
+
+#define PCU_CR_PACKAGE_POWER_SKU_UNIT		XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5938)
+#define   PKG_PWR_UNIT				REG_GENMASK(3, 0)
+#define   PKG_ENERGY_UNIT			REG_GENMASK(12, 8)
+#define   PKG_TIME_UNIT				REG_GENMASK(19, 16)
+
+#define PCU_CR_PACKAGE_ENERGY_STATUS		XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x593c)
+
+#define PCU_CR_PACKAGE_RAPL_LIMIT		XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x59a0)
+#define   PKG_PWR_LIM_1				REG_GENMASK(14, 0)
+#define   PKG_PWR_LIM_1_EN			REG_BIT(15)
+#define   PKG_PWR_LIM_1_TIME			REG_GENMASK(23, 17)
+#define   PKG_PWR_LIM_1_TIME_X			REG_GENMASK(23, 22)
+#define   PKG_PWR_LIM_1_TIME_Y			REG_GENMASK(21, 17)
+
+#endif /* _XE_MCHBAR_REGS_H_ */
diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
new file mode 100644
index 000000000000..c50e7650c09a
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_REG_DEFS_H_
+#define _XE_REG_DEFS_H_
+
+#include "compat-i915-headers/i915_reg_defs.h"
+
+/**
+ * struct xe_reg - Register definition
+ *
+ * Register defintion to be used by the individual register. Although the same
+ * definition is used for xe_reg and xe_reg_mcr, they use different internal
+ * APIs for accesses.
+ */
+struct xe_reg {
+	union {
+		struct {
+			/** @addr: address */
+			u32 addr:28;
+			/**
+			 * @masked: register is "masked", with upper 16bits used
+			 * to identify the bits that are updated on the lower
+			 * bits
+			 */
+			u32 masked:1;
+			/**
+			 * @mcr: register is multicast/replicated in the
+			 * hardware and needs special handling. Any register
+			 * with this set should also use a type of xe_reg_mcr_t.
+			 * It's only here so the few places that deal with MCR
+			 * registers specially (xe_sr.c) and tests using the raw
+			 * value can inspect it.
+			 */
+			u32 mcr:1;
+			/**
+			 * @ext: access MMIO extension space for current register.
+			 */
+			u32 ext:1;
+		};
+		/** @raw: Raw value with both address and options */
+		u32 raw;
+	};
+};
+
+/**
+ * struct xe_reg_mcr - MCR register definition
+ *
+ * MCR register is the same as a regular register, but uses another type since
+ * the internal API used for accessing them is different: it's never correct to
+ * use regular MMIO access.
+ */
+struct xe_reg_mcr {
+	/** @__reg: The register */
+	struct xe_reg __reg;
+};
+
+
+/**
+ * XE_REG_OPTION_MASKED - Register is "masked", with upper 16 bits marking the
+ * written bits on the lower 16 bits.
+ *
+ * It only applies to registers explicitly marked in bspec with
+ * "Access: Masked". Registers with this option can have write operations to
+ * specific lower bits by setting the corresponding upper bits. Other bits will
+ * not be affected. This allows register writes without needing a RMW cycle and
+ * without caching in software the register value.
+ *
+ * Example: a write with value 0x00010001 will set bit 0 and all other bits
+ * retain their previous values.
+ *
+ * To be used with XE_REG(). XE_REG_MCR() and XE_REG_INITIALIZER()
+ */
+#define XE_REG_OPTION_MASKED		.masked = 1
+
+/**
+ * XE_REG_INITIALIZER - Initializer for xe_reg_t.
+ * @r_: Register offset
+ * @...: Additional options like access mode. See struct xe_reg for available
+ *       options.
+ *
+ * Register field is mandatory, and additional options may be passed as
+ * arguments. Usually ``XE_REG()`` should be preferred since it creates an
+ * object of the right type. However when initializing static const storage,
+ * where a compound statement is not allowed, this can be used instead.
+ */
+#define XE_REG_INITIALIZER(r_, ...)    { .addr = r_, __VA_ARGS__ }
+
+
+/**
+ * XE_REG - Create a struct xe_reg from offset and additional flags
+ * @r_: Register offset
+ * @...: Additional options like access mode. See struct xe_reg for available
+ *       options.
+ */
+#define XE_REG(r_, ...)		((const struct xe_reg)XE_REG_INITIALIZER(r_, ##__VA_ARGS__))
+
+/**
+ * XE_REG_EXT - Create a struct xe_reg from extension offset and additional
+ * flags
+ * @r_: Register extension offset
+ * @...: Additional options like access mode. See struct xe_reg for available
+ *       options.
+ */
+#define XE_REG_EXT(r_, ...)	\
+	((const struct xe_reg)XE_REG_INITIALIZER(r_, ##__VA_ARGS__, .ext = 1))
+
+/**
+ * XE_REG_MCR - Create a struct xe_reg_mcr from offset and additional flags
+ * @r_: Register offset
+ * @...: Additional options like access mode. See struct xe_reg for available
+ *       options.
+ */
+#define XE_REG_MCR(r_, ...)	((const struct xe_reg_mcr){					\
+				 .__reg = XE_REG_INITIALIZER(r_,  ##__VA_ARGS__, .mcr = 1)	\
+				 })
+
+#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h
new file mode 100644
index 000000000000..2c214bb9b671
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_regs.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+#ifndef _XE_REGS_H_
+#define _XE_REGS_H_
+
+#include "regs/xe_reg_defs.h"
+
+#define TIMESTAMP_OVERRIDE					XE_REG(0x44074)
+#define   TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK	REG_GENMASK(15, 12)
+#define   TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK		REG_GENMASK(9, 0)
+
+#define PCU_IRQ_OFFSET				0x444e0
+#define GU_MISC_IRQ_OFFSET			0x444f0
+#define   GU_MISC_GSE				REG_BIT(27)
+
+#define SOFTWARE_FLAGS_SPR33			XE_REG(0x4f084)
+
+#define GU_CNTL_PROTECTED			XE_REG(0x10100C)
+#define   DRIVERINT_FLR_DIS			REG_BIT(31)
+
+#define GU_CNTL					XE_REG(0x101010)
+#define   LMEM_INIT				REG_BIT(7)
+#define   DRIVERFLR				REG_BIT(31)
+
+#define GU_DEBUG				XE_REG(0x101018)
+#define   DRIVERFLR_STATUS			REG_BIT(31)
+
+#define XEHP_CLOCK_GATE_DIS			XE_REG(0x101014)
+#define   SGSI_SIDECLK_DIS			REG_BIT(17)
+
+#define GGC					XE_REG(0x108040)
+#define   GMS_MASK				REG_GENMASK(15, 8)
+#define   GGMS_MASK				REG_GENMASK(7, 6)
+
+#define DSMBASE					XE_REG(0x1080C0)
+#define   BDSM_MASK				REG_GENMASK64(63, 20)
+
+#define GSMBASE					XE_REG(0x108100)
+
+#define STOLEN_RESERVED				XE_REG(0x1082c0)
+#define   WOPCM_SIZE_MASK			REG_GENMASK64(9, 7)
+
+#define MTL_RP_STATE_CAP			XE_REG(0x138000)
+
+#define MTL_GT_RPE_FREQUENCY			XE_REG(0x13800c)
+
+#define MTL_MEDIAP_STATE_CAP			XE_REG(0x138020)
+#define   MTL_RPN_CAP_MASK			REG_GENMASK(24, 16)
+#define   MTL_RP0_CAP_MASK			REG_GENMASK(8, 0)
+
+#define MTL_MPE_FREQUENCY			XE_REG(0x13802c)
+#define   MTL_RPE_MASK				REG_GENMASK(8, 0)
+
+#define DG1_MSTR_TILE_INTR			XE_REG(0x190008)
+#define   DG1_MSTR_IRQ				REG_BIT(31)
+#define   DG1_MSTR_TILE(t)			REG_BIT(t)
+
+#define GFX_MSTR_IRQ				XE_REG(0x190010)
+#define   MASTER_IRQ				REG_BIT(31)
+#define   GU_MISC_IRQ				REG_BIT(29)
+#define   DISPLAY_IRQ				REG_BIT(16)
+#define   GT_DW_IRQ(x)				REG_BIT(x)
+
+#define PVC_RP_STATE_CAP			XE_REG(0x281014)
+
+#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_sriov_regs.h b/drivers/gpu/drm/xe/regs/xe_sriov_regs.h
new file mode 100644
index 000000000000..58a4e0fad1e1
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_sriov_regs.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _REGS_XE_SRIOV_REGS_H_
+#define _REGS_XE_SRIOV_REGS_H_
+
+#include "regs/xe_reg_defs.h"
+
+#define XE2_LMEM_CFG			XE_REG(0x48b0)
+
+#define LMEM_CFG			XE_REG(0xcf58)
+#define   LMEM_EN			REG_BIT(31)
+#define   LMTT_DIR_PTR			REG_GENMASK(30, 0) /* in multiples of 64KB */
+
+#endif
diff --git a/drivers/gpu/drm/xe/tests/Makefile b/drivers/gpu/drm/xe/tests/Makefile
new file mode 100644
index 000000000000..39d8a0892274
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_DRM_XE_KUNIT_TEST) += \
+	xe_bo_test.o \
+	xe_dma_buf_test.o \
+	xe_migrate_test.o \
+	xe_mocs_test.o \
+	xe_pci_test.o \
+	xe_rtp_test.o \
+	xe_wa_test.o
diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
new file mode 100644
index 000000000000..3436fd9cf2b2
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -0,0 +1,352 @@
+// SPDX-License-Identifier: GPL-2.0 AND MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <kunit/test.h>
+#include <kunit/visibility.h>
+
+#include "tests/xe_bo_test.h"
+#include "tests/xe_pci_test.h"
+#include "tests/xe_test.h"
+
+#include "xe_bo_evict.h"
+#include "xe_pci.h"
+#include "xe_pm.h"
+
+static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo,
+			    bool clear, u64 get_val, u64 assign_val,
+			    struct kunit *test)
+{
+	struct dma_fence *fence;
+	struct ttm_tt *ttm;
+	struct page *page;
+	pgoff_t ccs_page;
+	long timeout;
+	u64 *cpu_map;
+	int ret;
+	u32 offset;
+
+	/* Move bo to VRAM if not already there. */
+	ret = xe_bo_validate(bo, NULL, false);
+	if (ret) {
+		KUNIT_FAIL(test, "Failed to validate bo.\n");
+		return ret;
+	}
+
+	/* Optionally clear bo *and* CCS data in VRAM. */
+	if (clear) {
+		fence = xe_migrate_clear(tile->migrate, bo, bo->ttm.resource);
+		if (IS_ERR(fence)) {
+			KUNIT_FAIL(test, "Failed to submit bo clear.\n");
+			return PTR_ERR(fence);
+		}
+		dma_fence_put(fence);
+	}
+
+	/* Evict to system. CCS data should be copied. */
+	ret = xe_bo_evict(bo, true);
+	if (ret) {
+		KUNIT_FAIL(test, "Failed to evict bo.\n");
+		return ret;
+	}
+
+	/* Sync all migration blits */
+	timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
+					DMA_RESV_USAGE_KERNEL,
+					true,
+					5 * HZ);
+	if (timeout <= 0) {
+		KUNIT_FAIL(test, "Failed to sync bo eviction.\n");
+		return -ETIME;
+	}
+
+	/*
+	 * Bo with CCS data is now in system memory. Verify backing store
+	 * and data integrity. Then assign for the next testing round while
+	 * we still have a CPU map.
+	 */
+	ttm = bo->ttm.ttm;
+	if (!ttm || !ttm_tt_is_populated(ttm)) {
+		KUNIT_FAIL(test, "Bo was not in expected placement.\n");
+		return -EINVAL;
+	}
+
+	ccs_page = xe_bo_ccs_pages_start(bo) >> PAGE_SHIFT;
+	if (ccs_page >= ttm->num_pages) {
+		KUNIT_FAIL(test, "No TTM CCS pages present.\n");
+		return -EINVAL;
+	}
+
+	page = ttm->pages[ccs_page];
+	cpu_map = kmap_local_page(page);
+
+	/* Check first CCS value */
+	if (cpu_map[0] != get_val) {
+		KUNIT_FAIL(test,
+			   "Expected CCS readout 0x%016llx, got 0x%016llx.\n",
+			   (unsigned long long)get_val,
+			   (unsigned long long)cpu_map[0]);
+		ret = -EINVAL;
+	}
+
+	/* Check last CCS value, or at least last value in page. */
+	offset = xe_device_ccs_bytes(tile_to_xe(tile), bo->size);
+	offset = min_t(u32, offset, PAGE_SIZE) / sizeof(u64) - 1;
+	if (cpu_map[offset] != get_val) {
+		KUNIT_FAIL(test,
+			   "Expected CCS readout 0x%016llx, got 0x%016llx.\n",
+			   (unsigned long long)get_val,
+			   (unsigned long long)cpu_map[offset]);
+		ret = -EINVAL;
+	}
+
+	cpu_map[0] = assign_val;
+	cpu_map[offset] = assign_val;
+	kunmap_local(cpu_map);
+
+	return ret;
+}
+
+static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile,
+			      struct kunit *test)
+{
+	struct xe_bo *bo;
+
+	int ret;
+
+	/* TODO: Sanity check */
+	unsigned int bo_flags = XE_BO_CREATE_VRAM_IF_DGFX(tile);
+
+	if (IS_DGFX(xe))
+		kunit_info(test, "Testing vram id %u\n", tile->id);
+	else
+		kunit_info(test, "Testing system memory\n");
+
+	bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC,
+			       ttm_bo_type_device, bo_flags);
+	if (IS_ERR(bo)) {
+		KUNIT_FAIL(test, "Failed to create bo.\n");
+		return;
+	}
+
+	xe_bo_lock(bo, false);
+
+	kunit_info(test, "Verifying that CCS data is cleared on creation.\n");
+	ret = ccs_test_migrate(tile, bo, false, 0ULL, 0xdeadbeefdeadbeefULL,
+			       test);
+	if (ret)
+		goto out_unlock;
+
+	kunit_info(test, "Verifying that CCS data survives migration.\n");
+	ret = ccs_test_migrate(tile, bo, false, 0xdeadbeefdeadbeefULL,
+			       0xdeadbeefdeadbeefULL, test);
+	if (ret)
+		goto out_unlock;
+
+	kunit_info(test, "Verifying that CCS data can be properly cleared.\n");
+	ret = ccs_test_migrate(tile, bo, true, 0ULL, 0ULL, test);
+
+out_unlock:
+	xe_bo_unlock(bo);
+	xe_bo_put(bo);
+}
+
+static int ccs_test_run_device(struct xe_device *xe)
+{
+	struct kunit *test = xe_cur_kunit();
+	struct xe_tile *tile;
+	int id;
+
+	if (!xe_device_has_flat_ccs(xe)) {
+		kunit_info(test, "Skipping non-flat-ccs device.\n");
+		return 0;
+	}
+
+	xe_device_mem_access_get(xe);
+
+	for_each_tile(tile, xe, id) {
+		/* For igfx run only for primary tile */
+		if (!IS_DGFX(xe) && id > 0)
+			continue;
+		ccs_test_run_tile(xe, tile, test);
+	}
+
+	xe_device_mem_access_put(xe);
+
+	return 0;
+}
+
+void xe_ccs_migrate_kunit(struct kunit *test)
+{
+	xe_call_for_each_device(ccs_test_run_device);
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_ccs_migrate_kunit);
+
+static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struct kunit *test)
+{
+	struct xe_bo *bo, *external;
+	unsigned int bo_flags = XE_BO_CREATE_VRAM_IF_DGFX(tile);
+	struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate);
+	struct xe_gt *__gt;
+	int err, i, id;
+
+	kunit_info(test, "Testing device %s vram id %u\n",
+		   dev_name(xe->drm.dev), tile->id);
+
+	for (i = 0; i < 2; ++i) {
+		xe_vm_lock(vm, false);
+		bo = xe_bo_create_user(xe, NULL, vm, 0x10000,
+				       DRM_XE_GEM_CPU_CACHING_WC,
+				       ttm_bo_type_device,
+				       bo_flags);
+		xe_vm_unlock(vm);
+		if (IS_ERR(bo)) {
+			KUNIT_FAIL(test, "bo create err=%pe\n", bo);
+			break;
+		}
+
+		external = xe_bo_create_user(xe, NULL, NULL, 0x10000,
+					     DRM_XE_GEM_CPU_CACHING_WC,
+					     ttm_bo_type_device, bo_flags);
+		if (IS_ERR(external)) {
+			KUNIT_FAIL(test, "external bo create err=%pe\n", external);
+			goto cleanup_bo;
+		}
+
+		xe_bo_lock(external, false);
+		err = xe_bo_pin_external(external);
+		xe_bo_unlock(external);
+		if (err) {
+			KUNIT_FAIL(test, "external bo pin err=%pe\n",
+				   ERR_PTR(err));
+			goto cleanup_external;
+		}
+
+		err = xe_bo_evict_all(xe);
+		if (err) {
+			KUNIT_FAIL(test, "evict err=%pe\n", ERR_PTR(err));
+			goto cleanup_all;
+		}
+
+		for_each_gt(__gt, xe, id)
+			xe_gt_sanitize(__gt);
+		err = xe_bo_restore_kernel(xe);
+		/*
+		 * Snapshotting the CTB and copying back a potentially old
+		 * version seems risky, depending on what might have been
+		 * inflight. Also it seems snapshotting the ADS object and
+		 * copying back results in serious breakage. Normally when
+		 * calling xe_bo_restore_kernel() we always fully restart the
+		 * GT, which re-intializes such things.  We could potentially
+		 * skip saving and restoring such objects in xe_bo_evict_all()
+		 * however seems quite fragile not to also restart the GT. Try
+		 * to do that here by triggering a GT reset.
+		 */
+		for_each_gt(__gt, xe, id) {
+			xe_gt_reset_async(__gt);
+			flush_work(&__gt->reset.worker);
+		}
+		if (err) {
+			KUNIT_FAIL(test, "restore kernel err=%pe\n",
+				   ERR_PTR(err));
+			goto cleanup_all;
+		}
+
+		err = xe_bo_restore_user(xe);
+		if (err) {
+			KUNIT_FAIL(test, "restore user err=%pe\n", ERR_PTR(err));
+			goto cleanup_all;
+		}
+
+		if (!xe_bo_is_vram(external)) {
+			KUNIT_FAIL(test, "external bo is not vram\n");
+			err = -EPROTO;
+			goto cleanup_all;
+		}
+
+		if (xe_bo_is_vram(bo)) {
+			KUNIT_FAIL(test, "bo is vram\n");
+			err = -EPROTO;
+			goto cleanup_all;
+		}
+
+		if (i) {
+			down_read(&vm->lock);
+			xe_vm_lock(vm, false);
+			err = xe_bo_validate(bo, bo->vm, false);
+			xe_vm_unlock(vm);
+			up_read(&vm->lock);
+			if (err) {
+				KUNIT_FAIL(test, "bo valid err=%pe\n",
+					   ERR_PTR(err));
+				goto cleanup_all;
+			}
+			xe_bo_lock(external, false);
+			err = xe_bo_validate(external, NULL, false);
+			xe_bo_unlock(external);
+			if (err) {
+				KUNIT_FAIL(test, "external bo valid err=%pe\n",
+					   ERR_PTR(err));
+				goto cleanup_all;
+			}
+		}
+
+		xe_bo_lock(external, false);
+		xe_bo_unpin_external(external);
+		xe_bo_unlock(external);
+
+		xe_bo_put(external);
+
+		xe_bo_lock(bo, false);
+		__xe_bo_unset_bulk_move(bo);
+		xe_bo_unlock(bo);
+		xe_bo_put(bo);
+		continue;
+
+cleanup_all:
+		xe_bo_lock(external, false);
+		xe_bo_unpin_external(external);
+		xe_bo_unlock(external);
+cleanup_external:
+		xe_bo_put(external);
+cleanup_bo:
+		xe_bo_lock(bo, false);
+		__xe_bo_unset_bulk_move(bo);
+		xe_bo_unlock(bo);
+		xe_bo_put(bo);
+		break;
+	}
+
+	xe_vm_put(vm);
+
+	return 0;
+}
+
+static int evict_test_run_device(struct xe_device *xe)
+{
+	struct kunit *test = xe_cur_kunit();
+	struct xe_tile *tile;
+	int id;
+
+	if (!IS_DGFX(xe)) {
+		kunit_info(test, "Skipping non-discrete device %s.\n",
+			   dev_name(xe->drm.dev));
+		return 0;
+	}
+
+	xe_device_mem_access_get(xe);
+
+	for_each_tile(tile, xe, id)
+		evict_test_run_tile(xe, tile, test);
+
+	xe_device_mem_access_put(xe);
+
+	return 0;
+}
+
+void xe_bo_evict_kunit(struct kunit *test)
+{
+	xe_call_for_each_device(evict_test_run_device);
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_bo_evict_kunit);
diff --git a/drivers/gpu/drm/xe/tests/xe_bo_test.c b/drivers/gpu/drm/xe/tests/xe_bo_test.c
new file mode 100644
index 000000000000..f408f17f2164
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_bo_test.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_bo_test.h"
+
+#include <kunit/test.h>
+
+static struct kunit_case xe_bo_tests[] = {
+	KUNIT_CASE(xe_ccs_migrate_kunit),
+	KUNIT_CASE(xe_bo_evict_kunit),
+	{}
+};
+
+static struct kunit_suite xe_bo_test_suite = {
+	.name = "xe_bo",
+	.test_cases = xe_bo_tests,
+};
+
+kunit_test_suite(xe_bo_test_suite);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("xe_bo kunit test");
+MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_bo_test.h b/drivers/gpu/drm/xe/tests/xe_bo_test.h
new file mode 100644
index 000000000000..0113ab45066a
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_bo_test.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 AND MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_BO_TEST_H_
+#define _XE_BO_TEST_H_
+
+struct kunit;
+
+void xe_ccs_migrate_kunit(struct kunit *test);
+void xe_bo_evict_kunit(struct kunit *test);
+
+#endif
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
new file mode 100644
index 000000000000..9f6d571d7fa9
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
@@ -0,0 +1,278 @@
+// SPDX-License-Identifier: GPL-2.0 AND MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/xe_drm.h>
+
+#include <kunit/test.h>
+#include <kunit/visibility.h>
+
+#include "tests/xe_dma_buf_test.h"
+#include "tests/xe_pci_test.h"
+
+#include "xe_pci.h"
+
+static bool p2p_enabled(struct dma_buf_test_params *params)
+{
+	return IS_ENABLED(CONFIG_PCI_P2PDMA) && params->attach_ops &&
+		params->attach_ops->allow_peer2peer;
+}
+
+static bool is_dynamic(struct dma_buf_test_params *params)
+{
+	return IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY) && params->attach_ops &&
+		params->attach_ops->move_notify;
+}
+
+static void check_residency(struct kunit *test, struct xe_bo *exported,
+			    struct xe_bo *imported, struct dma_buf *dmabuf)
+{
+	struct dma_buf_test_params *params = to_dma_buf_test_params(test->priv);
+	u32 mem_type;
+	int ret;
+
+	xe_bo_assert_held(exported);
+	xe_bo_assert_held(imported);
+
+	mem_type = XE_PL_VRAM0;
+	if (!(params->mem_mask & XE_BO_CREATE_VRAM0_BIT))
+		/* No VRAM allowed */
+		mem_type = XE_PL_TT;
+	else if (params->force_different_devices && !p2p_enabled(params))
+		/* No P2P */
+		mem_type = XE_PL_TT;
+	else if (params->force_different_devices && !is_dynamic(params) &&
+		 (params->mem_mask & XE_BO_CREATE_SYSTEM_BIT))
+		/* Pin migrated to TT */
+		mem_type = XE_PL_TT;
+
+	if (!xe_bo_is_mem_type(exported, mem_type)) {
+		KUNIT_FAIL(test, "Exported bo was not in expected memory type.\n");
+		return;
+	}
+
+	if (xe_bo_is_pinned(exported))
+		return;
+
+	/*
+	 * Evict exporter. Note that the gem object dma_buf member isn't
+	 * set from xe_gem_prime_export(), and it's needed for the move_notify()
+	 * functionality, so hack that up here. Evicting the exported bo will
+	 * evict also the imported bo through the move_notify() functionality if
+	 * importer is on a different device. If they're on the same device,
+	 * the exporter and the importer should be the same bo.
+	 */
+	swap(exported->ttm.base.dma_buf, dmabuf);
+	ret = xe_bo_evict(exported, true);
+	swap(exported->ttm.base.dma_buf, dmabuf);
+	if (ret) {
+		if (ret != -EINTR && ret != -ERESTARTSYS)
+			KUNIT_FAIL(test, "Evicting exporter failed with err=%d.\n",
+				   ret);
+		return;
+	}
+
+	/* Verify that also importer has been evicted to SYSTEM */
+	if (exported != imported && !xe_bo_is_mem_type(imported, XE_PL_SYSTEM)) {
+		KUNIT_FAIL(test, "Importer wasn't properly evicted.\n");
+		return;
+	}
+
+	/* Re-validate the importer. This should move also exporter in. */
+	ret = xe_bo_validate(imported, NULL, false);
+	if (ret) {
+		if (ret != -EINTR && ret != -ERESTARTSYS)
+			KUNIT_FAIL(test, "Validating importer failed with err=%d.\n",
+				   ret);
+		return;
+	}
+
+	/*
+	 * If on different devices, the exporter is kept in system  if
+	 * possible, saving a migration step as the transfer is just
+	 * likely as fast from system memory.
+	 */
+	if (params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)
+		KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, XE_PL_TT));
+	else
+		KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, mem_type));
+
+	if (params->force_different_devices)
+		KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(imported, XE_PL_TT));
+	else
+		KUNIT_EXPECT_TRUE(test, exported == imported);
+}
+
+static void xe_test_dmabuf_import_same_driver(struct xe_device *xe)
+{
+	struct kunit *test = xe_cur_kunit();
+	struct dma_buf_test_params *params = to_dma_buf_test_params(test->priv);
+	struct drm_gem_object *import;
+	struct dma_buf *dmabuf;
+	struct xe_bo *bo;
+	size_t size;
+
+	/* No VRAM on this device? */
+	if (!ttm_manager_type(&xe->ttm, XE_PL_VRAM0) &&
+	    (params->mem_mask & XE_BO_CREATE_VRAM0_BIT))
+		return;
+
+	size = PAGE_SIZE;
+	if ((params->mem_mask & XE_BO_CREATE_VRAM0_BIT) &&
+	    xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
+		size = SZ_64K;
+
+	kunit_info(test, "running %s\n", __func__);
+	bo = xe_bo_create_user(xe, NULL, NULL, size, DRM_XE_GEM_CPU_CACHING_WC,
+			       ttm_bo_type_device, XE_BO_CREATE_USER_BIT | params->mem_mask);
+	if (IS_ERR(bo)) {
+		KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
+			   PTR_ERR(bo));
+		return;
+	}
+
+	dmabuf = xe_gem_prime_export(&bo->ttm.base, 0);
+	if (IS_ERR(dmabuf)) {
+		KUNIT_FAIL(test, "xe_gem_prime_export() failed with err=%ld\n",
+			   PTR_ERR(dmabuf));
+		goto out;
+	}
+
+	import = xe_gem_prime_import(&xe->drm, dmabuf);
+	if (!IS_ERR(import)) {
+		struct xe_bo *import_bo = gem_to_xe_bo(import);
+
+		/*
+		 * Did import succeed when it shouldn't due to lack of p2p support?
+		 */
+		if (params->force_different_devices &&
+		    !p2p_enabled(params) &&
+		    !(params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)) {
+			KUNIT_FAIL(test,
+				   "xe_gem_prime_import() succeeded when it shouldn't have\n");
+		} else {
+			int err;
+
+			/* Is everything where we expect it to be? */
+			xe_bo_lock(import_bo, false);
+			err = xe_bo_validate(import_bo, NULL, false);
+
+			/* Pinning in VRAM is not allowed. */
+			if (!is_dynamic(params) &&
+			    params->force_different_devices &&
+			    !(params->mem_mask & XE_BO_CREATE_SYSTEM_BIT))
+				KUNIT_EXPECT_EQ(test, err, -EINVAL);
+			/* Otherwise only expect interrupts or success. */
+			else if (err && err != -EINTR && err != -ERESTARTSYS)
+				KUNIT_EXPECT_TRUE(test, !err || err == -EINTR ||
+						  err == -ERESTARTSYS);
+
+			if (!err)
+				check_residency(test, bo, import_bo, dmabuf);
+			xe_bo_unlock(import_bo);
+		}
+		drm_gem_object_put(import);
+	} else if (PTR_ERR(import) != -EOPNOTSUPP) {
+		/* Unexpected error code. */
+		KUNIT_FAIL(test,
+			   "xe_gem_prime_import failed with the wrong err=%ld\n",
+			   PTR_ERR(import));
+	} else if (!params->force_different_devices ||
+		   p2p_enabled(params) ||
+		   (params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)) {
+		/* Shouldn't fail if we can reuse same bo, use p2p or use system */
+		KUNIT_FAIL(test, "dynamic p2p attachment failed with err=%ld\n",
+			   PTR_ERR(import));
+	}
+	dma_buf_put(dmabuf);
+out:
+	drm_gem_object_put(&bo->ttm.base);
+}
+
+static const struct dma_buf_attach_ops nop2p_attach_ops = {
+	.allow_peer2peer = false,
+	.move_notify = xe_dma_buf_move_notify
+};
+
+/*
+ * We test the implementation with bos of different residency and with
+ * importers with different capabilities; some lacking p2p support and some
+ * lacking dynamic capabilities (attach_ops == NULL). We also fake
+ * different devices avoiding the import shortcut that just reuses the same
+ * gem object.
+ */
+static const struct dma_buf_test_params test_params[] = {
+	{.mem_mask = XE_BO_CREATE_VRAM0_BIT,
+	 .attach_ops = &xe_dma_buf_attach_ops},
+	{.mem_mask = XE_BO_CREATE_VRAM0_BIT,
+	 .attach_ops = &xe_dma_buf_attach_ops,
+	 .force_different_devices = true},
+
+	{.mem_mask = XE_BO_CREATE_VRAM0_BIT,
+	 .attach_ops = &nop2p_attach_ops},
+	{.mem_mask = XE_BO_CREATE_VRAM0_BIT,
+	 .attach_ops = &nop2p_attach_ops,
+	 .force_different_devices = true},
+
+	{.mem_mask = XE_BO_CREATE_VRAM0_BIT},
+	{.mem_mask = XE_BO_CREATE_VRAM0_BIT,
+	 .force_different_devices = true},
+
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT,
+	 .attach_ops = &xe_dma_buf_attach_ops},
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT,
+	 .attach_ops = &xe_dma_buf_attach_ops,
+	 .force_different_devices = true},
+
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT,
+	 .attach_ops = &nop2p_attach_ops},
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT,
+	 .attach_ops = &nop2p_attach_ops,
+	 .force_different_devices = true},
+
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT},
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT,
+	 .force_different_devices = true},
+
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT,
+	 .attach_ops = &xe_dma_buf_attach_ops},
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT,
+	 .attach_ops = &xe_dma_buf_attach_ops,
+	 .force_different_devices = true},
+
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT,
+	 .attach_ops = &nop2p_attach_ops},
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT,
+	 .attach_ops = &nop2p_attach_ops,
+	 .force_different_devices = true},
+
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT},
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT,
+	 .force_different_devices = true},
+
+	{}
+};
+
+static int dma_buf_run_device(struct xe_device *xe)
+{
+	const struct dma_buf_test_params *params;
+	struct kunit *test = xe_cur_kunit();
+
+	for (params = test_params; params->mem_mask; ++params) {
+		struct dma_buf_test_params p = *params;
+
+		p.base.id = XE_TEST_LIVE_DMA_BUF;
+		test->priv = &p;
+		xe_test_dmabuf_import_same_driver(xe);
+	}
+
+	/* A non-zero return would halt iteration over driver devices */
+	return 0;
+}
+
+void xe_dma_buf_kunit(struct kunit *test)
+{
+	xe_call_for_each_device(dma_buf_run_device);
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_dma_buf_kunit);
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c
new file mode 100644
index 000000000000..9f5a9cda8c0f
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_dma_buf_test.h"
+
+#include <kunit/test.h>
+
+static struct kunit_case xe_dma_buf_tests[] = {
+	KUNIT_CASE(xe_dma_buf_kunit),
+	{}
+};
+
+static struct kunit_suite xe_dma_buf_test_suite = {
+	.name = "xe_dma_buf",
+	.test_cases = xe_dma_buf_tests,
+};
+
+kunit_test_suite(xe_dma_buf_test_suite);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("xe_dma_buf kunit test");
+MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.h b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.h
new file mode 100644
index 000000000000..e6b464ddd526
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 AND MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_DMA_BUF_TEST_H_
+#define _XE_DMA_BUF_TEST_H_
+
+struct kunit;
+
+void xe_dma_buf_kunit(struct kunit *test);
+
+#endif
diff --git a/drivers/gpu/drm/xe/tests/xe_lmtt_test.c b/drivers/gpu/drm/xe/tests/xe_lmtt_test.c
new file mode 100644
index 000000000000..1f1557c45ae1
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_lmtt_test.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0 AND MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <kunit/test.h>
+
+static const struct lmtt_ops_param {
+	const char *desc;
+	const struct xe_lmtt_ops *ops;
+} lmtt_ops_params[] = {
+	{ "2-level", &lmtt_2l_ops, },
+	{ "multi-level", &lmtt_ml_ops, },
+};
+
+static void lmtt_ops_param_get_desc(const struct lmtt_ops_param *p, char *desc)
+{
+	snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%s", p->desc);
+}
+
+KUNIT_ARRAY_PARAM(lmtt_ops, lmtt_ops_params, lmtt_ops_param_get_desc);
+
+static void test_ops(struct kunit *test)
+{
+	const struct lmtt_ops_param *p = test->param_value;
+	const struct xe_lmtt_ops *ops = p->ops;
+	unsigned int n;
+
+	KUNIT_ASSERT_NOT_NULL(test, ops->lmtt_root_pd_level);
+	KUNIT_ASSERT_NOT_NULL(test, ops->lmtt_pte_num);
+	KUNIT_ASSERT_NOT_NULL(test, ops->lmtt_pte_size);
+	KUNIT_ASSERT_NOT_NULL(test, ops->lmtt_pte_shift);
+	KUNIT_ASSERT_NOT_NULL(test, ops->lmtt_pte_index);
+	KUNIT_ASSERT_NOT_NULL(test, ops->lmtt_pte_encode);
+
+	KUNIT_EXPECT_NE(test, ops->lmtt_root_pd_level(), 0);
+
+	for (n = 0; n <= ops->lmtt_root_pd_level(); n++) {
+		KUNIT_EXPECT_NE_MSG(test, ops->lmtt_pte_num(n), 0,
+				    "level=%u", n);
+		KUNIT_EXPECT_NE_MSG(test, ops->lmtt_pte_size(n), 0,
+				    "level=%u", n);
+		KUNIT_EXPECT_NE_MSG(test, ops->lmtt_pte_encode(0, n), LMTT_PTE_INVALID,
+				    "level=%u", n);
+	}
+
+	for (n = 0; n < ops->lmtt_root_pd_level(); n++) {
+		u64 addr = BIT_ULL(ops->lmtt_pte_shift(n));
+
+		KUNIT_EXPECT_NE_MSG(test, ops->lmtt_pte_shift(n), 0,
+				    "level=%u", n);
+		KUNIT_EXPECT_EQ_MSG(test, ops->lmtt_pte_index(addr - 1, n), 0,
+				    "addr=%#llx level=%u", addr, n);
+		KUNIT_EXPECT_EQ_MSG(test, ops->lmtt_pte_index(addr + 1, n), 1,
+				    "addr=%#llx level=%u", addr, n);
+		KUNIT_EXPECT_EQ_MSG(test, ops->lmtt_pte_index(addr * 2 - 1, n), 1,
+				    "addr=%#llx level=%u", addr, n);
+		KUNIT_EXPECT_EQ_MSG(test, ops->lmtt_pte_index(addr * 2, n), 2,
+				    "addr=%#llx level=%u", addr, n);
+	}
+}
+
+static struct kunit_case lmtt_test_cases[] = {
+	KUNIT_CASE_PARAM(test_ops, lmtt_ops_gen_params),
+	{}
+};
+
+static struct kunit_suite lmtt_suite = {
+	.name = "lmtt",
+	.test_cases = lmtt_test_cases,
+};
+
+kunit_test_suites(&lmtt_suite);
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
new file mode 100644
index 000000000000..a6523df0f1d3
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -0,0 +1,444 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020-2022 Intel Corporation
+ */
+
+#include <kunit/test.h>
+#include <kunit/visibility.h>
+
+#include "tests/xe_migrate_test.h"
+#include "tests/xe_pci_test.h"
+
+#include "xe_pci.h"
+
+static bool sanity_fence_failed(struct xe_device *xe, struct dma_fence *fence,
+				const char *str, struct kunit *test)
+{
+	long ret;
+
+	if (IS_ERR(fence)) {
+		KUNIT_FAIL(test, "Failed to create fence for %s: %li\n", str,
+			   PTR_ERR(fence));
+		return true;
+	}
+	if (!fence)
+		return true;
+
+	ret = dma_fence_wait_timeout(fence, false, 5 * HZ);
+	if (ret <= 0) {
+		KUNIT_FAIL(test, "Fence timed out for %s: %li\n", str, ret);
+		return true;
+	}
+
+	return false;
+}
+
+static int run_sanity_job(struct xe_migrate *m, struct xe_device *xe,
+			  struct xe_bb *bb, u32 second_idx, const char *str,
+			  struct kunit *test)
+{
+	u64 batch_base = xe_migrate_batch_base(m, xe->info.has_usm);
+	struct xe_sched_job *job = xe_bb_create_migration_job(m->q, bb,
+							      batch_base,
+							      second_idx);
+	struct dma_fence *fence;
+
+	if (IS_ERR(job)) {
+		KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n",
+			   PTR_ERR(job));
+		return PTR_ERR(job);
+	}
+
+	xe_sched_job_arm(job);
+	fence = dma_fence_get(&job->drm.s_fence->finished);
+	xe_sched_job_push(job);
+
+	if (sanity_fence_failed(xe, fence, str, test))
+		return -ETIMEDOUT;
+
+	dma_fence_put(fence);
+	kunit_info(test, "%s: Job completed\n", str);
+	return 0;
+}
+
+static void
+sanity_populate_cb(struct xe_migrate_pt_update *pt_update,
+		   struct xe_tile *tile, struct iosys_map *map, void *dst,
+		   u32 qword_ofs, u32 num_qwords,
+		   const struct xe_vm_pgtable_update *update)
+{
+	struct migrate_test_params *p =
+		to_migrate_test_params(xe_cur_kunit_priv(XE_TEST_LIVE_MIGRATE));
+	int i;
+	u64 *ptr = dst;
+	u64 value;
+
+	for (i = 0; i < num_qwords; i++) {
+		value = (qword_ofs + i - update->ofs) * 0x1111111111111111ULL;
+		if (map)
+			xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) *
+				  sizeof(u64), u64, value);
+		else
+			ptr[i] = value;
+	}
+
+	kunit_info(xe_cur_kunit(), "Used %s.\n", map ? "CPU" : "GPU");
+	if (p->force_gpu && map)
+		KUNIT_FAIL(xe_cur_kunit(), "GPU pagetable update used CPU.\n");
+}
+
+static const struct xe_migrate_pt_update_ops sanity_ops = {
+	.populate = sanity_populate_cb,
+};
+
+#define check(_retval, _expected, str, _test)				\
+	do { if ((_retval) != (_expected)) {				\
+			KUNIT_FAIL(_test, "Sanity check failed: " str	\
+				   " expected %llx, got %llx\n",	\
+				   (u64)(_expected), (u64)(_retval));	\
+		} } while (0)
+
+static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
+		      struct kunit *test, u32 region)
+{
+	struct xe_device *xe = tile_to_xe(m->tile);
+	u64 retval, expected = 0;
+	bool big = bo->size >= SZ_2M;
+	struct dma_fence *fence;
+	const char *str = big ? "Copying big bo" : "Copying small bo";
+	int err;
+
+	struct xe_bo *remote = xe_bo_create_locked(xe, m->tile, NULL,
+						   bo->size,
+						   ttm_bo_type_kernel,
+						   region |
+						   XE_BO_NEEDS_CPU_ACCESS);
+	if (IS_ERR(remote)) {
+		KUNIT_FAIL(test, "Failed to allocate remote bo for %s: %li\n",
+			   str, PTR_ERR(remote));
+		return;
+	}
+
+	err = xe_bo_validate(remote, NULL, false);
+	if (err) {
+		KUNIT_FAIL(test, "Failed to validate system bo for %s: %li\n",
+			   str, err);
+		goto out_unlock;
+	}
+
+	err = xe_bo_vmap(remote);
+	if (err) {
+		KUNIT_FAIL(test, "Failed to vmap system bo for %s: %li\n",
+			   str, err);
+		goto out_unlock;
+	}
+
+	xe_map_memset(xe, &remote->vmap, 0, 0xd0, remote->size);
+	fence = xe_migrate_clear(m, remote, remote->ttm.resource);
+	if (!sanity_fence_failed(xe, fence, big ? "Clearing remote big bo" :
+				 "Clearing remote small bo", test)) {
+		retval = xe_map_rd(xe, &remote->vmap, 0, u64);
+		check(retval, expected, "remote first offset should be cleared",
+		      test);
+		retval = xe_map_rd(xe, &remote->vmap, remote->size - 8, u64);
+		check(retval, expected, "remote last offset should be cleared",
+		      test);
+	}
+	dma_fence_put(fence);
+
+	/* Try to copy 0xc0 from remote to vram with 2MB or 64KiB/4KiB pages */
+	xe_map_memset(xe, &remote->vmap, 0, 0xc0, remote->size);
+	xe_map_memset(xe, &bo->vmap, 0, 0xd0, bo->size);
+
+	expected = 0xc0c0c0c0c0c0c0c0;
+	fence = xe_migrate_copy(m, remote, bo, remote->ttm.resource,
+				bo->ttm.resource, false);
+	if (!sanity_fence_failed(xe, fence, big ? "Copying big bo remote -> vram" :
+				 "Copying small bo remote -> vram", test)) {
+		retval = xe_map_rd(xe, &bo->vmap, 0, u64);
+		check(retval, expected,
+		      "remote -> vram bo first offset should be copied", test);
+		retval = xe_map_rd(xe, &bo->vmap, bo->size - 8, u64);
+		check(retval, expected,
+		      "remote -> vram bo offset should be copied", test);
+	}
+	dma_fence_put(fence);
+
+	/* And other way around.. slightly hacky.. */
+	xe_map_memset(xe, &remote->vmap, 0, 0xd0, remote->size);
+	xe_map_memset(xe, &bo->vmap, 0, 0xc0, bo->size);
+
+	fence = xe_migrate_copy(m, bo, remote, bo->ttm.resource,
+				remote->ttm.resource, false);
+	if (!sanity_fence_failed(xe, fence, big ? "Copying big bo vram -> remote" :
+				 "Copying small bo vram -> remote", test)) {
+		retval = xe_map_rd(xe, &remote->vmap, 0, u64);
+		check(retval, expected,
+		      "vram -> remote bo first offset should be copied", test);
+		retval = xe_map_rd(xe, &remote->vmap, bo->size - 8, u64);
+		check(retval, expected,
+		      "vram -> remote bo last offset should be copied", test);
+	}
+	dma_fence_put(fence);
+
+	xe_bo_vunmap(remote);
+out_unlock:
+	xe_bo_unlock(remote);
+	xe_bo_put(remote);
+}
+
+static void test_copy_sysmem(struct xe_migrate *m, struct xe_bo *bo,
+			     struct kunit *test)
+{
+	test_copy(m, bo, test, XE_BO_CREATE_SYSTEM_BIT);
+}
+
+static void test_copy_vram(struct xe_migrate *m, struct xe_bo *bo,
+			   struct kunit *test)
+{
+	u32 region;
+
+	if (bo->ttm.resource->mem_type == XE_PL_SYSTEM)
+		return;
+
+	if (bo->ttm.resource->mem_type == XE_PL_VRAM0)
+		region = XE_BO_CREATE_VRAM1_BIT;
+	else
+		region = XE_BO_CREATE_VRAM0_BIT;
+	test_copy(m, bo, test, region);
+}
+
+static void test_pt_update(struct xe_migrate *m, struct xe_bo *pt,
+			   struct kunit *test, bool force_gpu)
+{
+	struct xe_device *xe = tile_to_xe(m->tile);
+	struct dma_fence *fence;
+	u64 retval, expected;
+	ktime_t then, now;
+	int i;
+
+	struct xe_vm_pgtable_update update = {
+		.ofs = 1,
+		.qwords = 0x10,
+		.pt_bo = pt,
+	};
+	struct xe_migrate_pt_update pt_update = {
+		.ops = &sanity_ops,
+	};
+	struct migrate_test_params p = {
+		.base.id = XE_TEST_LIVE_MIGRATE,
+		.force_gpu = force_gpu,
+	};
+
+	test->priv = &p;
+	/* Test xe_migrate_update_pgtables() updates the pagetable as expected */
+	expected = 0xf0f0f0f0f0f0f0f0ULL;
+	xe_map_memset(xe, &pt->vmap, 0, (u8)expected, pt->size);
+
+	then = ktime_get();
+	fence = xe_migrate_update_pgtables(m, m->q->vm, NULL, m->q, &update, 1,
+					   NULL, 0, &pt_update);
+	now = ktime_get();
+	if (sanity_fence_failed(xe, fence, "Migration pagetable update", test))
+		return;
+
+	kunit_info(test, "Updating without syncing took %llu us,\n",
+		   (unsigned long long)ktime_to_us(ktime_sub(now, then)));
+
+	dma_fence_put(fence);
+	retval = xe_map_rd(xe, &pt->vmap, 0, u64);
+	check(retval, expected, "PTE[0] must stay untouched", test);
+
+	for (i = 0; i < update.qwords; i++) {
+		retval = xe_map_rd(xe, &pt->vmap, (update.ofs + i) * 8, u64);
+		check(retval, i * 0x1111111111111111ULL, "PTE update", test);
+	}
+
+	retval = xe_map_rd(xe, &pt->vmap, 8 * (update.ofs + update.qwords),
+			   u64);
+	check(retval, expected, "PTE[0x11] must stay untouched", test);
+}
+
+static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
+{
+	struct xe_tile *tile = m->tile;
+	struct xe_device *xe = tile_to_xe(tile);
+	struct xe_bo *pt, *bo = m->pt_bo, *big, *tiny;
+	struct xe_res_cursor src_it;
+	struct dma_fence *fence;
+	u64 retval, expected;
+	struct xe_bb *bb;
+	int err;
+	u8 id = tile->id;
+
+	err = xe_bo_vmap(bo);
+	if (err) {
+		KUNIT_FAIL(test, "Failed to vmap our pagetables: %li\n",
+			   PTR_ERR(bo));
+		return;
+	}
+
+	big = xe_bo_create_pin_map(xe, tile, m->q->vm, SZ_4M,
+				   ttm_bo_type_kernel,
+				   XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+				   XE_BO_CREATE_PINNED_BIT);
+	if (IS_ERR(big)) {
+		KUNIT_FAIL(test, "Failed to allocate bo: %li\n", PTR_ERR(big));
+		goto vunmap;
+	}
+
+	pt = xe_bo_create_pin_map(xe, tile, m->q->vm, XE_PAGE_SIZE,
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+				  XE_BO_CREATE_PINNED_BIT);
+	if (IS_ERR(pt)) {
+		KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n",
+			   PTR_ERR(pt));
+		goto free_big;
+	}
+
+	tiny = xe_bo_create_pin_map(xe, tile, m->q->vm,
+				    2 * SZ_4K,
+				    ttm_bo_type_kernel,
+				    XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+				    XE_BO_CREATE_PINNED_BIT);
+	if (IS_ERR(tiny)) {
+		KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n",
+			   PTR_ERR(pt));
+		goto free_pt;
+	}
+
+	bb = xe_bb_new(tile->primary_gt, 32, xe->info.has_usm);
+	if (IS_ERR(bb)) {
+		KUNIT_FAIL(test, "Failed to create batchbuffer: %li\n",
+			   PTR_ERR(bb));
+		goto free_tiny;
+	}
+
+	kunit_info(test, "Starting tests, top level PT addr: %lx, special pagetable base addr: %lx\n",
+		   (unsigned long)xe_bo_main_addr(m->q->vm->pt_root[id]->bo, XE_PAGE_SIZE),
+		   (unsigned long)xe_bo_main_addr(m->pt_bo, XE_PAGE_SIZE));
+
+	/* First part of the test, are we updating our pagetable bo with a new entry? */
+	xe_map_wr(xe, &bo->vmap, XE_PAGE_SIZE * (NUM_KERNEL_PDE - 1), u64,
+		  0xdeaddeadbeefbeef);
+	expected = m->q->vm->pt_ops->pte_encode_bo(pt, 0, xe->pat.idx[XE_CACHE_WB], 0);
+	if (m->q->vm->flags & XE_VM_FLAG_64K)
+		expected |= XE_PTE_PS64;
+	if (xe_bo_is_vram(pt))
+		xe_res_first(pt->ttm.resource, 0, pt->size, &src_it);
+	else
+		xe_res_first_sg(xe_bo_sg(pt), 0, pt->size, &src_it);
+
+	emit_pte(m, bb, NUM_KERNEL_PDE - 1, xe_bo_is_vram(pt), false,
+		 &src_it, XE_PAGE_SIZE, pt->ttm.resource);
+
+	run_sanity_job(m, xe, bb, bb->len, "Writing PTE for our fake PT", test);
+
+	retval = xe_map_rd(xe, &bo->vmap, XE_PAGE_SIZE * (NUM_KERNEL_PDE - 1),
+			   u64);
+	check(retval, expected, "PTE entry write", test);
+
+	/* Now try to write data to our newly mapped 'pagetable', see if it succeeds */
+	bb->len = 0;
+	bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+	xe_map_wr(xe, &pt->vmap, 0, u32, 0xdeaddead);
+	expected = 0;
+
+	emit_clear(tile->primary_gt, bb, xe_migrate_vm_addr(NUM_KERNEL_PDE - 1, 0), 4, 4,
+		   IS_DGFX(xe));
+	run_sanity_job(m, xe, bb, 1, "Writing to our newly mapped pagetable",
+		       test);
+
+	retval = xe_map_rd(xe, &pt->vmap, 0, u32);
+	check(retval, expected, "Write to PT after adding PTE", test);
+
+	/* Sanity checks passed, try the full ones! */
+
+	/* Clear a small bo */
+	kunit_info(test, "Clearing small buffer object\n");
+	xe_map_memset(xe, &tiny->vmap, 0, 0x22, tiny->size);
+	expected = 0;
+	fence = xe_migrate_clear(m, tiny, tiny->ttm.resource);
+	if (sanity_fence_failed(xe, fence, "Clearing small bo", test))
+		goto out;
+
+	dma_fence_put(fence);
+	retval = xe_map_rd(xe, &tiny->vmap, 0, u32);
+	check(retval, expected, "Command clear small first value", test);
+	retval = xe_map_rd(xe, &tiny->vmap, tiny->size - 4, u32);
+	check(retval, expected, "Command clear small last value", test);
+
+	kunit_info(test, "Copying small buffer object to system\n");
+	test_copy_sysmem(m, tiny, test);
+	if (xe->info.tile_count > 1) {
+		kunit_info(test, "Copying small buffer object to other vram\n");
+		test_copy_vram(m, tiny, test);
+	}
+
+	/* Clear a big bo */
+	kunit_info(test, "Clearing big buffer object\n");
+	xe_map_memset(xe, &big->vmap, 0, 0x11, big->size);
+	expected = 0;
+	fence = xe_migrate_clear(m, big, big->ttm.resource);
+	if (sanity_fence_failed(xe, fence, "Clearing big bo", test))
+		goto out;
+
+	dma_fence_put(fence);
+	retval = xe_map_rd(xe, &big->vmap, 0, u32);
+	check(retval, expected, "Command clear big first value", test);
+	retval = xe_map_rd(xe, &big->vmap, big->size - 4, u32);
+	check(retval, expected, "Command clear big last value", test);
+
+	kunit_info(test, "Copying big buffer object to system\n");
+	test_copy_sysmem(m, big, test);
+	if (xe->info.tile_count > 1) {
+		kunit_info(test, "Copying big buffer object to other vram\n");
+		test_copy_vram(m, big, test);
+	}
+
+	kunit_info(test, "Testing page table update using CPU if GPU idle.\n");
+	test_pt_update(m, pt, test, false);
+	kunit_info(test, "Testing page table update using GPU\n");
+	test_pt_update(m, pt, test, true);
+
+out:
+	xe_bb_free(bb, NULL);
+free_tiny:
+	xe_bo_unpin(tiny);
+	xe_bo_put(tiny);
+free_pt:
+	xe_bo_unpin(pt);
+	xe_bo_put(pt);
+free_big:
+	xe_bo_unpin(big);
+	xe_bo_put(big);
+vunmap:
+	xe_bo_vunmap(m->pt_bo);
+}
+
+static int migrate_test_run_device(struct xe_device *xe)
+{
+	struct kunit *test = xe_cur_kunit();
+	struct xe_tile *tile;
+	int id;
+
+	for_each_tile(tile, xe, id) {
+		struct xe_migrate *m = tile->migrate;
+
+		kunit_info(test, "Testing tile id %d.\n", id);
+		xe_vm_lock(m->q->vm, true);
+		xe_device_mem_access_get(xe);
+		xe_migrate_sanity_test(m, test);
+		xe_device_mem_access_put(xe);
+		xe_vm_unlock(m->q->vm);
+	}
+
+	return 0;
+}
+
+void xe_migrate_sanity_kunit(struct kunit *test)
+{
+	xe_call_for_each_device(migrate_test_run_device);
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_migrate_sanity_kunit);
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.c b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
new file mode 100644
index 000000000000..cf0c173b945f
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_migrate_test.h"
+
+#include <kunit/test.h>
+
+static struct kunit_case xe_migrate_tests[] = {
+	KUNIT_CASE(xe_migrate_sanity_kunit),
+	{}
+};
+
+static struct kunit_suite xe_migrate_test_suite = {
+	.name = "xe_migrate",
+	.test_cases = xe_migrate_tests,
+};
+
+kunit_test_suite(xe_migrate_test_suite);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("xe_migrate kunit test");
+MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.h b/drivers/gpu/drm/xe/tests/xe_migrate_test.h
new file mode 100644
index 000000000000..7c645c66824f
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 AND MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_MIGRATE_TEST_H_
+#define _XE_MIGRATE_TEST_H_
+
+struct kunit;
+
+void xe_migrate_sanity_kunit(struct kunit *test);
+
+#endif
diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c
new file mode 100644
index 000000000000..7dd34f94e809
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_mocs.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0 AND MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <kunit/test.h>
+#include <kunit/visibility.h>
+
+#include "tests/xe_mocs_test.h"
+#include "tests/xe_pci_test.h"
+#include "tests/xe_test.h"
+
+#include "xe_pci.h"
+#include "xe_gt.h"
+#include "xe_mocs.h"
+#include "xe_device.h"
+
+struct live_mocs {
+	struct xe_mocs_info table;
+};
+
+static int live_mocs_init(struct live_mocs *arg, struct xe_gt *gt)
+{
+	unsigned int flags;
+	struct kunit *test = xe_cur_kunit();
+
+	memset(arg, 0, sizeof(*arg));
+
+	flags = get_mocs_settings(gt_to_xe(gt), &arg->table);
+
+	kunit_info(test, "table size %d", arg->table.size);
+	kunit_info(test, "table uc_index %d", arg->table.uc_index);
+	kunit_info(test, "table n_entries %d", arg->table.n_entries);
+
+	return flags;
+}
+
+static void read_l3cc_table(struct xe_gt *gt,
+			    const struct xe_mocs_info *info)
+{
+	unsigned int i;
+	u32 l3cc;
+	u32 reg_val;
+	u32 ret;
+
+	struct kunit *test = xe_cur_kunit();
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n");
+	mocs_dbg(&gt_to_xe(gt)->drm, "L3CC entries:%d\n", info->n_entries);
+	for (i = 0;
+	     i < (info->n_entries + 1) / 2 ?
+	     (l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i),
+				  get_entry_l3cc(info, 2 * i + 1))), 1 : 0;
+	     i++) {
+		if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250)
+			reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i));
+		else
+			reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i));
+		mocs_dbg(&gt_to_xe(gt)->drm, "%d 0x%x 0x%x 0x%x\n", i,
+			 XELP_LNCFCMOCS(i).addr, reg_val, l3cc);
+		if (reg_val != l3cc)
+			KUNIT_FAIL(test, "l3cc reg 0x%x has incorrect val.\n",
+				   XELP_LNCFCMOCS(i).addr);
+	}
+	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+	xe_device_mem_access_put(gt_to_xe(gt));
+}
+
+static void read_mocs_table(struct xe_gt *gt,
+			    const struct xe_mocs_info *info)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	unsigned int i;
+	u32 mocs;
+	u32 reg_val;
+	u32 ret;
+
+	struct kunit *test = xe_cur_kunit();
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n");
+	mocs_dbg(&gt_to_xe(gt)->drm, "Global MOCS entries:%d\n", info->n_entries);
+	drm_WARN_ONCE(&xe->drm, !info->unused_entries_index,
+		      "Unused entries index should have been defined\n");
+	for (i = 0;
+	     i < info->n_entries ? (mocs = get_entry_control(info, i)), 1 : 0;
+	     i++) {
+		if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250)
+			reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i));
+		else
+			reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i));
+		mocs_dbg(&gt_to_xe(gt)->drm, "%d 0x%x 0x%x 0x%x\n", i,
+			 XELP_GLOBAL_MOCS(i).addr, reg_val, mocs);
+		if (reg_val != mocs)
+			KUNIT_FAIL(test, "mocs reg 0x%x has incorrect val.\n",
+				   XELP_GLOBAL_MOCS(i).addr);
+	}
+	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+	xe_device_mem_access_put(gt_to_xe(gt));
+}
+
+static int mocs_kernel_test_run_device(struct xe_device *xe)
+{
+	/* Basic check the system is configured with the expected mocs table */
+
+	struct live_mocs mocs;
+	struct xe_gt *gt;
+
+	unsigned int flags;
+	int id;
+
+	for_each_gt(gt, xe, id) {
+		flags = live_mocs_init(&mocs, gt);
+		if (flags & HAS_GLOBAL_MOCS)
+			read_mocs_table(gt, &mocs.table);
+		if (flags & HAS_LNCF_MOCS)
+			read_l3cc_table(gt, &mocs.table);
+	}
+	return 0;
+}
+
+void xe_live_mocs_kernel_kunit(struct kunit *test)
+{
+	xe_call_for_each_device(mocs_kernel_test_run_device);
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_live_mocs_kernel_kunit);
diff --git a/drivers/gpu/drm/xe/tests/xe_mocs_test.c b/drivers/gpu/drm/xe/tests/xe_mocs_test.c
new file mode 100644
index 000000000000..421b819fd4ba
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_mocs_test.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_mocs_test.h"
+
+#include <kunit/test.h>
+
+static struct kunit_case xe_mocs_tests[] = {
+	KUNIT_CASE(xe_live_mocs_kernel_kunit),
+	{}
+};
+
+static struct kunit_suite xe_mocs_test_suite = {
+	.name = "xe_mocs",
+	.test_cases = xe_mocs_tests,
+};
+
+kunit_test_suite(xe_mocs_test_suite);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("xe_mocs kunit test");
+MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_mocs_test.h b/drivers/gpu/drm/xe/tests/xe_mocs_test.h
new file mode 100644
index 000000000000..7faa3575e6c3
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_mocs_test.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 AND MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_MOCS_TEST_H_
+#define _XE_MOCS_TEST_H_
+
+struct kunit;
+
+void xe_live_mocs_kernel_kunit(struct kunit *test);
+
+#endif
diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c
new file mode 100644
index 000000000000..602793644f61
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_pci.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0 AND MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "tests/xe_pci_test.h"
+
+#include "tests/xe_test.h"
+
+#include <kunit/test-bug.h>
+#include <kunit/test.h>
+#include <kunit/test-bug.h>
+#include <kunit/visibility.h>
+
+struct kunit_test_data {
+	int ndevs;
+	xe_device_fn xe_fn;
+};
+
+static int dev_to_xe_device_fn(struct device *dev, void *__data)
+
+{
+	struct drm_device *drm = dev_get_drvdata(dev);
+	struct kunit_test_data *data = __data;
+	int ret = 0;
+	int idx;
+
+	data->ndevs++;
+
+	if (drm_dev_enter(drm, &idx))
+		ret = data->xe_fn(to_xe_device(dev_get_drvdata(dev)));
+	drm_dev_exit(idx);
+
+	return ret;
+}
+
+/**
+ * xe_call_for_each_device - Iterate over all devices this driver binds to
+ * @xe_fn: Function to call for each device.
+ *
+ * This function iterated over all devices this driver binds to, and calls
+ * @xe_fn: for each one of them. If the called function returns anything else
+ * than 0, iteration is stopped and the return value is returned by this
+ * function. Across each function call, drm_dev_enter() / drm_dev_exit() is
+ * called for the corresponding drm device.
+ *
+ * Return: Number of devices iterated or
+ *         the error code of a call to @xe_fn returning an error code.
+ */
+int xe_call_for_each_device(xe_device_fn xe_fn)
+{
+	int ret;
+	struct kunit_test_data data = {
+	    .xe_fn = xe_fn,
+	    .ndevs = 0,
+	};
+
+	ret = driver_for_each_device(&xe_pci_driver.driver, NULL,
+				     &data, dev_to_xe_device_fn);
+
+	if (!data.ndevs)
+		kunit_skip(current->kunit_test, "test runs only on hardware\n");
+
+	return ret ?: data.ndevs;
+}
+
+/**
+ * xe_call_for_each_graphics_ip - Iterate over all recognized graphics IPs
+ * @xe_fn: Function to call for each device.
+ *
+ * This function iterates over the descriptors for all graphics IPs recognized
+ * by the driver and calls @xe_fn: for each one of them.
+ */
+void xe_call_for_each_graphics_ip(xe_graphics_fn xe_fn)
+{
+	const struct xe_graphics_desc *ip, *last = NULL;
+
+	for (int i = 0; i < ARRAY_SIZE(graphics_ip_map); i++) {
+		ip = graphics_ip_map[i].ip;
+		if (ip == last)
+			continue;
+
+		xe_fn(ip);
+		last = ip;
+	}
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_call_for_each_graphics_ip);
+
+/**
+ * xe_call_for_each_media_ip - Iterate over all recognized media IPs
+ * @xe_fn: Function to call for each device.
+ *
+ * This function iterates over the descriptors for all media IPs recognized
+ * by the driver and calls @xe_fn: for each one of them.
+ */
+void xe_call_for_each_media_ip(xe_media_fn xe_fn)
+{
+	const struct xe_media_desc *ip, *last = NULL;
+
+	for (int i = 0; i < ARRAY_SIZE(media_ip_map); i++) {
+		ip = media_ip_map[i].ip;
+		if (ip == last)
+			continue;
+
+		xe_fn(ip);
+		last = ip;
+	}
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_call_for_each_media_ip);
+
+static void fake_read_gmdid(struct xe_device *xe, enum xe_gmdid_type type,
+			    u32 *ver, u32 *revid)
+{
+	struct kunit *test = kunit_get_current_test();
+	struct xe_pci_fake_data *data = test->priv;
+
+	if (type == GMDID_MEDIA) {
+		*ver = data->media_verx100;
+		*revid = xe_step_to_gmdid(data->media_step);
+	} else {
+		*ver = data->graphics_verx100;
+		*revid = xe_step_to_gmdid(data->graphics_step);
+	}
+}
+
+int xe_pci_fake_device_init(struct xe_device *xe)
+{
+	struct kunit *test = kunit_get_current_test();
+	struct xe_pci_fake_data *data = test->priv;
+	const struct pci_device_id *ent = pciidlist;
+	const struct xe_device_desc *desc;
+	const struct xe_subplatform_desc *subplatform_desc;
+
+	if (!data) {
+		desc = (const void *)ent->driver_data;
+		subplatform_desc = NULL;
+		goto done;
+	}
+
+	for (ent = pciidlist; ent->device; ent++) {
+		desc = (const void *)ent->driver_data;
+		if (desc->platform == data->platform)
+			break;
+	}
+
+	if (!ent->device)
+		return -ENODEV;
+
+	for (subplatform_desc = desc->subplatforms;
+	     subplatform_desc && subplatform_desc->subplatform;
+	     subplatform_desc++)
+		if (subplatform_desc->subplatform == data->subplatform)
+			break;
+
+	if (data->subplatform != XE_SUBPLATFORM_NONE && !subplatform_desc)
+		return -ENODEV;
+
+done:
+	kunit_activate_static_stub(test, read_gmdid, fake_read_gmdid);
+
+	xe_info_init_early(xe, desc, subplatform_desc);
+	xe_info_init(xe, desc->graphics, desc->media);
+
+	return 0;
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_device_init);
diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.c b/drivers/gpu/drm/xe/tests/xe_pci_test.c
new file mode 100644
index 000000000000..171e4180f1aa
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_pci_test.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_drv.h>
+#include <drm/drm_kunit_helpers.h>
+
+#include <kunit/test.h>
+
+#include "tests/xe_test.h"
+
+#include "xe_device.h"
+#include "xe_pci_test.h"
+#include "xe_pci_types.h"
+
+static void check_graphics_ip(const struct xe_graphics_desc *graphics)
+{
+	struct kunit *test = xe_cur_kunit();
+	u64 mask = graphics->hw_engine_mask;
+
+	/* RCS, CCS, and BCS engines are allowed on the graphics IP */
+	mask &= ~(XE_HW_ENGINE_RCS_MASK |
+		  XE_HW_ENGINE_CCS_MASK |
+		  XE_HW_ENGINE_BCS_MASK);
+
+	/* Any remaining engines are an error */
+	KUNIT_ASSERT_EQ(test, mask, 0);
+}
+
+static void check_media_ip(const struct xe_media_desc *media)
+{
+	struct kunit *test = xe_cur_kunit();
+	u64 mask = media->hw_engine_mask;
+
+	/* VCS, VECS and GSCCS engines are allowed on the media IP */
+	mask &= ~(XE_HW_ENGINE_VCS_MASK |
+		  XE_HW_ENGINE_VECS_MASK |
+		  XE_HW_ENGINE_GSCCS_MASK);
+
+	/* Any remaining engines are an error */
+	KUNIT_ASSERT_EQ(test, mask, 0);
+}
+
+static void xe_gmdid_graphics_ip(struct kunit *test)
+{
+	xe_call_for_each_graphics_ip(check_graphics_ip);
+}
+
+static void xe_gmdid_media_ip(struct kunit *test)
+{
+	xe_call_for_each_media_ip(check_media_ip);
+}
+
+static struct kunit_case xe_pci_tests[] = {
+	KUNIT_CASE(xe_gmdid_graphics_ip),
+	KUNIT_CASE(xe_gmdid_media_ip),
+	{}
+};
+
+static struct kunit_suite xe_pci_test_suite = {
+	.name = "xe_pci",
+	.test_cases = xe_pci_tests,
+};
+
+kunit_test_suite(xe_pci_test_suite);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("xe_pci kunit test");
+MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.h b/drivers/gpu/drm/xe/tests/xe_pci_test.h
new file mode 100644
index 000000000000..811ffe5bd9fd
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_pci_test.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 AND MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_PCI_TEST_H_
+#define _XE_PCI_TEST_H_
+
+#include <linux/types.h>
+
+#include "xe_platform_types.h"
+
+struct xe_device;
+struct xe_graphics_desc;
+struct xe_media_desc;
+
+typedef int (*xe_device_fn)(struct xe_device *);
+typedef void (*xe_graphics_fn)(const struct xe_graphics_desc *);
+typedef void (*xe_media_fn)(const struct xe_media_desc *);
+
+int xe_call_for_each_device(xe_device_fn xe_fn);
+void xe_call_for_each_graphics_ip(xe_graphics_fn xe_fn);
+void xe_call_for_each_media_ip(xe_media_fn xe_fn);
+
+struct xe_pci_fake_data {
+	enum xe_platform platform;
+	enum xe_subplatform subplatform;
+	u32 graphics_verx100;
+	u32 media_verx100;
+	u32 graphics_step;
+	u32 media_step;
+};
+
+int xe_pci_fake_device_init(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
new file mode 100644
index 000000000000..4a6972897675
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
@@ -0,0 +1,319 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/string.h>
+#include <linux/xarray.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_kunit_helpers.h>
+
+#include <kunit/test.h>
+
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_reg_defs.h"
+#include "xe_device.h"
+#include "xe_device_types.h"
+#include "xe_pci_test.h"
+#include "xe_reg_sr.h"
+#include "xe_rtp.h"
+
+#define REGULAR_REG1	XE_REG(1)
+#define REGULAR_REG2	XE_REG(2)
+#define REGULAR_REG3	XE_REG(3)
+#define MCR_REG1	XE_REG_MCR(1)
+#define MCR_REG2	XE_REG_MCR(2)
+#define MCR_REG3	XE_REG_MCR(3)
+#define MASKED_REG1	XE_REG(1, XE_REG_OPTION_MASKED)
+
+#undef XE_REG_MCR
+#define XE_REG_MCR(...)     XE_REG(__VA_ARGS__, .mcr = 1)
+
+struct rtp_test_case {
+	const char *name;
+	struct xe_reg expected_reg;
+	u32 expected_set_bits;
+	u32 expected_clr_bits;
+	unsigned long expected_count;
+	unsigned int expected_sr_errors;
+	const struct xe_rtp_entry_sr *entries;
+};
+
+static bool match_yes(const struct xe_gt *gt, const struct xe_hw_engine *hwe)
+{
+	return true;
+}
+
+static bool match_no(const struct xe_gt *gt, const struct xe_hw_engine *hwe)
+{
+	return false;
+}
+
+static const struct rtp_test_case cases[] = {
+	{
+		.name = "coalesce-same-reg",
+		.expected_reg = REGULAR_REG1,
+		.expected_set_bits = REG_BIT(0) | REG_BIT(1),
+		.expected_clr_bits = REG_BIT(0) | REG_BIT(1),
+		.expected_count = 1,
+		/* Different bits on the same register: create a single entry */
+		.entries = (const struct xe_rtp_entry_sr[]) {
+			{ XE_RTP_NAME("basic-1"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0)))
+			},
+			{ XE_RTP_NAME("basic-2"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(1)))
+			},
+			{}
+		},
+	},
+	{
+		.name = "no-match-no-add",
+		.expected_reg = REGULAR_REG1,
+		.expected_set_bits = REG_BIT(0),
+		.expected_clr_bits = REG_BIT(0),
+		.expected_count = 1,
+		/* Don't coalesce second entry since rules don't match */
+		.entries = (const struct xe_rtp_entry_sr[]) {
+			{ XE_RTP_NAME("basic-1"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0)))
+			},
+			{ XE_RTP_NAME("basic-2"),
+			  XE_RTP_RULES(FUNC(match_no)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(1)))
+			},
+			{}
+		},
+	},
+	{
+		.name = "no-match-no-add-multiple-rules",
+		.expected_reg = REGULAR_REG1,
+		.expected_set_bits = REG_BIT(0),
+		.expected_clr_bits = REG_BIT(0),
+		.expected_count = 1,
+		/* Don't coalesce second entry due to one of the rules */
+		.entries = (const struct xe_rtp_entry_sr[]) {
+			{ XE_RTP_NAME("basic-1"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0)))
+			},
+			{ XE_RTP_NAME("basic-2"),
+			  XE_RTP_RULES(FUNC(match_yes), FUNC(match_no)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(1)))
+			},
+			{}
+		},
+	},
+	{
+		.name = "two-regs-two-entries",
+		.expected_reg = REGULAR_REG1,
+		.expected_set_bits = REG_BIT(0),
+		.expected_clr_bits = REG_BIT(0),
+		.expected_count = 2,
+		/* Same bits on different registers are not coalesced */
+		.entries = (const struct xe_rtp_entry_sr[]) {
+			{ XE_RTP_NAME("basic-1"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0)))
+			},
+			{ XE_RTP_NAME("basic-2"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG2, REG_BIT(0)))
+			},
+			{}
+		},
+	},
+	{
+		.name = "clr-one-set-other",
+		.expected_reg = REGULAR_REG1,
+		.expected_set_bits = REG_BIT(0),
+		.expected_clr_bits = REG_BIT(1) | REG_BIT(0),
+		.expected_count = 1,
+		/* Check clr vs set actions on different bits */
+		.entries = (const struct xe_rtp_entry_sr[]) {
+			{ XE_RTP_NAME("basic-1"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0)))
+			},
+			{ XE_RTP_NAME("basic-2"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(CLR(REGULAR_REG1, REG_BIT(1)))
+			},
+			{}
+		},
+	},
+	{
+#define TEMP_MASK	REG_GENMASK(10, 8)
+#define TEMP_FIELD	REG_FIELD_PREP(TEMP_MASK, 2)
+		.name = "set-field",
+		.expected_reg = REGULAR_REG1,
+		.expected_set_bits = TEMP_FIELD,
+		.expected_clr_bits = TEMP_MASK,
+		.expected_count = 1,
+		/* Check FIELD_SET works */
+		.entries = (const struct xe_rtp_entry_sr[]) {
+			{ XE_RTP_NAME("basic-1"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(FIELD_SET(REGULAR_REG1,
+						   TEMP_MASK, TEMP_FIELD))
+			},
+			{}
+		},
+#undef TEMP_MASK
+#undef TEMP_FIELD
+	},
+	{
+		.name = "conflict-duplicate",
+		.expected_reg = REGULAR_REG1,
+		.expected_set_bits = REG_BIT(0),
+		.expected_clr_bits = REG_BIT(0),
+		.expected_count = 1,
+		.expected_sr_errors = 1,
+		.entries = (const struct xe_rtp_entry_sr[]) {
+			{ XE_RTP_NAME("basic-1"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0)))
+			},
+			/* drop: setting same values twice */
+			{ XE_RTP_NAME("basic-2"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0)))
+			},
+			{}
+		},
+	},
+	{
+		.name = "conflict-not-disjoint",
+		.expected_reg = REGULAR_REG1,
+		.expected_set_bits = REG_BIT(0),
+		.expected_clr_bits = REG_BIT(0),
+		.expected_count = 1,
+		.expected_sr_errors = 1,
+		.entries = (const struct xe_rtp_entry_sr[]) {
+			{ XE_RTP_NAME("basic-1"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0)))
+			},
+			/* drop: bits are not disjoint with previous entries */
+			{ XE_RTP_NAME("basic-2"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(CLR(REGULAR_REG1, REG_GENMASK(1, 0)))
+			},
+			{}
+		},
+	},
+	{
+		.name = "conflict-reg-type",
+		.expected_reg = REGULAR_REG1,
+		.expected_set_bits = REG_BIT(0),
+		.expected_clr_bits = REG_BIT(0),
+		.expected_count = 1,
+		.expected_sr_errors = 2,
+		.entries = (const struct xe_rtp_entry_sr[]) {
+			{ XE_RTP_NAME("basic-1"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0)))
+			},
+			/* drop: regular vs MCR */
+			{ XE_RTP_NAME("basic-2"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(MCR_REG1, REG_BIT(1)))
+			},
+			/* drop: regular vs masked */
+			{ XE_RTP_NAME("basic-3"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(MASKED_REG1, REG_BIT(0)))
+			},
+			{}
+		},
+	},
+};
+
+static void xe_rtp_process_tests(struct kunit *test)
+{
+	const struct rtp_test_case *param = test->param_value;
+	struct xe_device *xe = test->priv;
+	struct xe_gt *gt = xe_device_get_root_tile(xe)->primary_gt;
+	struct xe_reg_sr *reg_sr = &gt->reg_sr;
+	const struct xe_reg_sr_entry *sre, *sr_entry = NULL;
+	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt);
+	unsigned long idx, count = 0;
+
+	xe_reg_sr_init(reg_sr, "xe_rtp_tests", xe);
+	xe_rtp_process_to_sr(&ctx, param->entries, reg_sr);
+
+	xa_for_each(&reg_sr->xa, idx, sre) {
+		if (idx == param->expected_reg.addr)
+			sr_entry = sre;
+
+		count++;
+	}
+
+	KUNIT_EXPECT_EQ(test, count, param->expected_count);
+	KUNIT_EXPECT_EQ(test, sr_entry->clr_bits, param->expected_clr_bits);
+	KUNIT_EXPECT_EQ(test, sr_entry->set_bits, param->expected_set_bits);
+	KUNIT_EXPECT_EQ(test, sr_entry->reg.raw, param->expected_reg.raw);
+	KUNIT_EXPECT_EQ(test, reg_sr->errors, param->expected_sr_errors);
+}
+
+static void rtp_desc(const struct rtp_test_case *t, char *desc)
+{
+	strscpy(desc, t->name, KUNIT_PARAM_DESC_SIZE);
+}
+
+KUNIT_ARRAY_PARAM(rtp, cases, rtp_desc);
+
+static int xe_rtp_test_init(struct kunit *test)
+{
+	struct xe_device *xe;
+	struct device *dev;
+	int ret;
+
+	dev = drm_kunit_helper_alloc_device(test);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
+
+	xe = drm_kunit_helper_alloc_drm_device(test, dev,
+					       struct xe_device,
+					       drm, DRIVER_GEM);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe);
+
+	/* Initialize an empty device */
+	test->priv = NULL;
+	ret = xe_pci_fake_device_init(xe);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	xe->drm.dev = dev;
+	test->priv = xe;
+
+	return 0;
+}
+
+static void xe_rtp_test_exit(struct kunit *test)
+{
+	struct xe_device *xe = test->priv;
+
+	drm_kunit_helper_free_device(test, xe->drm.dev);
+}
+
+static struct kunit_case xe_rtp_tests[] = {
+	KUNIT_CASE_PARAM(xe_rtp_process_tests, rtp_gen_params),
+	{}
+};
+
+static struct kunit_suite xe_rtp_test_suite = {
+	.name = "xe_rtp",
+	.init = xe_rtp_test_init,
+	.exit = xe_rtp_test_exit,
+	.test_cases = xe_rtp_tests,
+};
+
+kunit_test_suite(xe_rtp_test_suite);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("xe_rtp kunit test");
+MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_test.h b/drivers/gpu/drm/xe/tests/xe_test.h
new file mode 100644
index 000000000000..7a1ae213e750
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_test.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0 AND MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_TEST_H_
+#define _XE_TEST_H_
+
+#include <linux/types.h>
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+#include <linux/sched.h>
+#include <kunit/test.h>
+
+/*
+ * Each test that provides a kunit private test structure, place a test id
+ * here and point the kunit->priv to an embedded struct xe_test_priv.
+ */
+enum xe_test_priv_id {
+	XE_TEST_LIVE_DMA_BUF,
+	XE_TEST_LIVE_MIGRATE,
+};
+
+/**
+ * struct xe_test_priv - Base class for test private info
+ * @id: enum xe_test_priv_id to identify the subclass.
+ */
+struct xe_test_priv {
+	enum xe_test_priv_id id;
+};
+
+#define XE_TEST_DECLARE(x) x
+#define XE_TEST_ONLY(x) unlikely(x)
+#define XE_TEST_EXPORT
+#define xe_cur_kunit() current->kunit_test
+
+/**
+ * xe_cur_kunit_priv - Obtain the struct xe_test_priv pointed to by
+ * current->kunit->priv if it exists and is embedded in the expected subclass.
+ * @id: Id of the expected subclass.
+ *
+ * Return: NULL if the process is not a kunit test, and NULL if the
+ * current kunit->priv pointer is not pointing to an object of the expected
+ * subclass. A pointer to the embedded struct xe_test_priv otherwise.
+ */
+static inline struct xe_test_priv *
+xe_cur_kunit_priv(enum xe_test_priv_id id)
+{
+	struct xe_test_priv *priv;
+
+	if (!xe_cur_kunit())
+		return NULL;
+
+	priv = xe_cur_kunit()->priv;
+	return priv->id == id ? priv : NULL;
+}
+
+#else /* if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) */
+
+#define XE_TEST_DECLARE(x)
+#define XE_TEST_ONLY(x) 0
+#define XE_TEST_EXPORT static
+#define xe_cur_kunit() NULL
+#define xe_cur_kunit_priv(_id) NULL
+
+#endif
+#endif
diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c
new file mode 100644
index 000000000000..b4715b78ef3b
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_drv.h>
+#include <drm/drm_kunit_helpers.h>
+
+#include <kunit/test.h>
+
+#include "xe_device.h"
+#include "xe_pci_test.h"
+#include "xe_reg_sr.h"
+#include "xe_tuning.h"
+#include "xe_wa.h"
+
+struct platform_test_case {
+	const char *name;
+	enum xe_platform platform;
+	enum xe_subplatform subplatform;
+	u32 graphics_verx100;
+	u32 media_verx100;
+	struct xe_step_info step;
+};
+
+#define PLATFORM_CASE(platform__, graphics_step__)				\
+	{									\
+		.name = #platform__ " (" #graphics_step__ ")",			\
+		.platform = XE_ ## platform__,					\
+		.subplatform = XE_SUBPLATFORM_NONE,				\
+		.step = { .graphics = STEP_ ## graphics_step__ }		\
+	}
+
+
+#define SUBPLATFORM_CASE(platform__, subplatform__, graphics_step__)			\
+	{										\
+		.name = #platform__ "_" #subplatform__ " (" #graphics_step__ ")",	\
+		.platform = XE_ ## platform__,						\
+		.subplatform = XE_SUBPLATFORM_ ## platform__ ## _ ## subplatform__,	\
+		.step = { .graphics = STEP_ ## graphics_step__ }			\
+	}
+
+#define GMDID_CASE(platform__, graphics_verx100__, graphics_step__,		\
+		   media_verx100__, media_step__)				\
+	{									\
+		.name = #platform__ " (g:" #graphics_step__ ", m:" #media_step__ ")",\
+		.platform = XE_ ## platform__,					\
+		.subplatform = XE_SUBPLATFORM_NONE,				\
+		.graphics_verx100 = graphics_verx100__,				\
+		.media_verx100 = media_verx100__,				\
+		.step = { .graphics = STEP_ ## graphics_step__,			\
+			   .media = STEP_ ## media_step__ }			\
+	}
+
+static const struct platform_test_case cases[] = {
+	PLATFORM_CASE(TIGERLAKE, B0),
+	PLATFORM_CASE(DG1, A0),
+	PLATFORM_CASE(DG1, B0),
+	PLATFORM_CASE(ALDERLAKE_S, A0),
+	PLATFORM_CASE(ALDERLAKE_S, B0),
+	PLATFORM_CASE(ALDERLAKE_S, C0),
+	PLATFORM_CASE(ALDERLAKE_S, D0),
+	PLATFORM_CASE(ALDERLAKE_P, A0),
+	PLATFORM_CASE(ALDERLAKE_P, B0),
+	PLATFORM_CASE(ALDERLAKE_P, C0),
+	SUBPLATFORM_CASE(ALDERLAKE_S, RPLS, D0),
+	SUBPLATFORM_CASE(ALDERLAKE_P, RPLU, E0),
+	SUBPLATFORM_CASE(DG2, G10, A0),
+	SUBPLATFORM_CASE(DG2, G10, A1),
+	SUBPLATFORM_CASE(DG2, G10, B0),
+	SUBPLATFORM_CASE(DG2, G10, C0),
+	SUBPLATFORM_CASE(DG2, G11, A0),
+	SUBPLATFORM_CASE(DG2, G11, B0),
+	SUBPLATFORM_CASE(DG2, G11, B1),
+	SUBPLATFORM_CASE(DG2, G12, A0),
+	SUBPLATFORM_CASE(DG2, G12, A1),
+	GMDID_CASE(METEORLAKE, 1270, A0, 1300, A0),
+	GMDID_CASE(METEORLAKE, 1271, A0, 1300, A0),
+	GMDID_CASE(LUNARLAKE, 2004, A0, 2000, A0),
+	GMDID_CASE(LUNARLAKE, 2004, B0, 2000, A0),
+};
+
+static void platform_desc(const struct platform_test_case *t, char *desc)
+{
+	strscpy(desc, t->name, KUNIT_PARAM_DESC_SIZE);
+}
+
+KUNIT_ARRAY_PARAM(platform, cases, platform_desc);
+
+static int xe_wa_test_init(struct kunit *test)
+{
+	const struct platform_test_case *param = test->param_value;
+	struct xe_pci_fake_data data = {
+		.platform = param->platform,
+		.subplatform = param->subplatform,
+		.graphics_verx100 = param->graphics_verx100,
+		.media_verx100 = param->media_verx100,
+		.graphics_step = param->step.graphics,
+		.media_step = param->step.media,
+	};
+	struct xe_device *xe;
+	struct device *dev;
+	int ret;
+
+	dev = drm_kunit_helper_alloc_device(test);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
+
+	xe = drm_kunit_helper_alloc_drm_device(test, dev,
+					       struct xe_device,
+					       drm, DRIVER_GEM);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe);
+
+	test->priv = &data;
+	ret = xe_pci_fake_device_init(xe);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	if (!param->graphics_verx100)
+		xe->info.step = param->step;
+
+	/* TODO: init hw engines for engine/LRC WAs */
+	xe->drm.dev = dev;
+	test->priv = xe;
+
+	return 0;
+}
+
+static void xe_wa_test_exit(struct kunit *test)
+{
+	struct xe_device *xe = test->priv;
+
+	drm_kunit_helper_free_device(test, xe->drm.dev);
+}
+
+static void xe_wa_gt(struct kunit *test)
+{
+	struct xe_device *xe = test->priv;
+	struct xe_gt *gt;
+	int id;
+
+	for_each_gt(gt, xe, id) {
+		xe_reg_sr_init(&gt->reg_sr, "GT", xe);
+
+		xe_wa_process_gt(gt);
+		xe_tuning_process_gt(gt);
+
+		KUNIT_ASSERT_EQ(test, gt->reg_sr.errors, 0);
+	}
+}
+
+static struct kunit_case xe_wa_tests[] = {
+	KUNIT_CASE_PARAM(xe_wa_gt, platform_gen_params),
+	{}
+};
+
+static struct kunit_suite xe_rtp_test_suite = {
+	.name = "xe_wa",
+	.init = xe_wa_test_init,
+	.exit = xe_wa_test_exit,
+	.test_cases = xe_wa_tests,
+};
+
+kunit_test_suite(xe_rtp_test_suite);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("xe_wa kunit test");
+MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/xe_assert.h b/drivers/gpu/drm/xe/xe_assert.h
new file mode 100644
index 000000000000..34c142e6cfb0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_assert.h
@@ -0,0 +1,174 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_ASSERT_H_
+#define _XE_ASSERT_H_
+
+#include <linux/string_helpers.h>
+
+#include <drm/drm_print.h>
+
+#include "xe_device_types.h"
+#include "xe_step.h"
+
+/**
+ * DOC: Xe ASSERTs
+ *
+ * While Xe driver aims to be simpler than legacy i915 driver it is still
+ * complex enough that some changes introduced while adding new functionality
+ * could break the existing code.
+ *
+ * Adding &drm_WARN or &drm_err to catch unwanted programming usage could lead
+ * to undesired increased driver footprint and may impact production driver
+ * performance as this additional code will be always present.
+ *
+ * To allow annotate functions with additional detailed debug checks to assert
+ * that all prerequisites are satisfied, without worrying about footprint or
+ * performance penalty on production builds where all potential misuses
+ * introduced during code integration were already fixed, we introduce family
+ * of Xe assert macros that try to follow classic assert() utility:
+ *
+ *  * xe_assert()
+ *  * xe_tile_assert()
+ *  * xe_gt_assert()
+ *
+ * These macros are implemented on top of &drm_WARN, but unlikely to the origin,
+ * warning is triggered when provided condition is false. Additionally all above
+ * assert macros cannot be used in expressions or as a condition, since
+ * underlying code will be compiled out on non-debug builds.
+ *
+ * Note that these macros are not intended for use to cover known gaps in the
+ * implementation; for such cases use regular &drm_WARN or &drm_err and provide
+ * valid safe fallback.
+ *
+ * Also in cases where performance or footprint is not an issue, developers
+ * should continue to use the regular &drm_WARN or &drm_err to ensure that bug
+ * reports from production builds will contain meaningful diagnostics data.
+ *
+ * Below code shows how asserts could help in debug to catch unplanned use::
+ *
+ *	static void one_igfx(struct xe_device *xe)
+ *	{
+ *		xe_assert(xe, xe->info.is_dgfx == false);
+ *		xe_assert(xe, xe->info.tile_count == 1);
+ *	}
+ *
+ *	static void two_dgfx(struct xe_device *xe)
+ *	{
+ *		xe_assert(xe, xe->info.is_dgfx);
+ *		xe_assert(xe, xe->info.tile_count == 2);
+ *	}
+ *
+ *	void foo(struct xe_device *xe)
+ *	{
+ *		if (xe->info.dgfx)
+ *			return two_dgfx(xe);
+ *		return one_igfx(xe);
+ *	}
+ *
+ *	void bar(struct xe_device *xe)
+ *	{
+ *		if (drm_WARN_ON(xe->drm, xe->info.tile_count > 2))
+ *			return;
+ *
+ *		if (xe->info.tile_count == 2)
+ *			return two_dgfx(xe);
+ *		return one_igfx(xe);
+ *	}
+ */
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+#define __xe_assert_msg(xe, condition, msg, arg...) ({						\
+	(void)drm_WARN(&(xe)->drm, !(condition), "[" DRM_NAME "] Assertion `%s` failed!\n" msg,	\
+		       __stringify(condition), ## arg);						\
+})
+#else
+#define __xe_assert_msg(xe, condition, msg, arg...) ({						\
+	typecheck(const struct xe_device *, xe);						\
+	BUILD_BUG_ON_INVALID(condition);							\
+})
+#endif
+
+/**
+ * xe_assert - warn if condition is false when debugging.
+ * @xe: the &struct xe_device pointer to which &condition applies
+ * @condition: condition to check
+ *
+ * xe_assert() uses &drm_WARN to emit a warning and print additional information
+ * that could be read from the &xe pointer if provided &condition is false.
+ *
+ * Contrary to &drm_WARN, xe_assert() is effective only on debug builds
+ * (&CONFIG_DRM_XE_DEBUG must be enabled) and cannot be used in expressions
+ * or as a condition.
+ *
+ * See `Xe ASSERTs`_ for general usage guidelines.
+ */
+#define xe_assert(xe, condition) xe_assert_msg((xe), condition, "")
+#define xe_assert_msg(xe, condition, msg, arg...) ({						\
+	const struct xe_device *__xe = (xe);							\
+	__xe_assert_msg(__xe, condition,							\
+			"platform: %d subplatform: %d\n"					\
+			"graphics: %s %u.%02u step %s\n"					\
+			"media: %s %u.%02u step %s\n"						\
+			msg,									\
+			__xe->info.platform, __xe->info.subplatform,				\
+			__xe->info.graphics_name,						\
+			__xe->info.graphics_verx100 / 100,					\
+			__xe->info.graphics_verx100 % 100,					\
+			xe_step_name(__xe->info.step.graphics),					\
+			__xe->info.media_name,							\
+			__xe->info.media_verx100 / 100,						\
+			__xe->info.media_verx100 % 100,						\
+			xe_step_name(__xe->info.step.media),					\
+			## arg);								\
+})
+
+/**
+ * xe_tile_assert - warn if condition is false when debugging.
+ * @tile: the &struct xe_tile pointer to which &condition applies
+ * @condition: condition to check
+ *
+ * xe_tile_assert() uses &drm_WARN to emit a warning and print additional
+ * information that could be read from the &tile pointer if provided &condition
+ * is false.
+ *
+ * Contrary to &drm_WARN, xe_tile_assert() is effective only on debug builds
+ * (&CONFIG_DRM_XE_DEBUG must be enabled) and cannot be used in expressions
+ * or as a condition.
+ *
+ * See `Xe ASSERTs`_ for general usage guidelines.
+ */
+#define xe_tile_assert(tile, condition) xe_tile_assert_msg((tile), condition, "")
+#define xe_tile_assert_msg(tile, condition, msg, arg...) ({					\
+	const struct xe_tile *__tile = (tile);							\
+	char __buf[10] __maybe_unused;								\
+	xe_assert_msg(tile_to_xe(__tile), condition, "tile: %u VRAM %s\n" msg,			\
+		      __tile->id, ({ string_get_size(__tile->mem.vram.actual_physical_size, 1,	\
+				     STRING_UNITS_2, __buf, sizeof(__buf)); __buf; }), ## arg);	\
+})
+
+/**
+ * xe_gt_assert - warn if condition is false when debugging.
+ * @gt: the &struct xe_gt pointer to which &condition applies
+ * @condition: condition to check
+ *
+ * xe_gt_assert() uses &drm_WARN to emit a warning and print additional
+ * information that could be safetely read from the &gt pointer if provided
+ * &condition is false.
+ *
+ * Contrary to &drm_WARN, xe_gt_assert() is effective only on debug builds
+ * (&CONFIG_DRM_XE_DEBUG must be enabled) and cannot be used in expressions
+ * or as a condition.
+ *
+ * See `Xe ASSERTs`_ for general usage guidelines.
+ */
+#define xe_gt_assert(gt, condition) xe_gt_assert_msg((gt), condition, "")
+#define xe_gt_assert_msg(gt, condition, msg, arg...) ({						\
+	const struct xe_gt *__gt = (gt);							\
+	xe_tile_assert_msg(gt_to_tile(__gt), condition, "GT: %u type %d\n" msg,			\
+			   __gt->info.id, __gt->info.type, ## arg);				\
+})
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c
new file mode 100644
index 000000000000..7c124475c428
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bb.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_bb.h"
+
+#include "instructions/xe_mi_commands.h"
+#include "regs/xe_gpu_commands.h"
+#include "xe_device.h"
+#include "xe_exec_queue_types.h"
+#include "xe_gt.h"
+#include "xe_hw_fence.h"
+#include "xe_sa.h"
+#include "xe_sched_job.h"
+#include "xe_vm_types.h"
+
+static int bb_prefetch(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	if (GRAPHICS_VERx100(xe) >= 1250 && !xe_gt_is_media_type(gt))
+		/*
+		 * RCS and CCS require 1K, although other engines would be
+		 * okay with 512.
+		 */
+		return SZ_1K;
+	else
+		return SZ_512;
+}
+
+struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm)
+{
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL);
+	int err;
+
+	if (!bb)
+		return ERR_PTR(-ENOMEM);
+
+	/*
+	 * We need to allocate space for the requested number of dwords,
+	 * one additional MI_BATCH_BUFFER_END dword, and additional buffer
+	 * space to accomodate the platform-specific hardware prefetch
+	 * requirements.
+	 */
+	bb->bo = xe_sa_bo_new(!usm ? tile->mem.kernel_bb_pool : gt->usm.bb_pool,
+			      4 * (dwords + 1) + bb_prefetch(gt));
+	if (IS_ERR(bb->bo)) {
+		err = PTR_ERR(bb->bo);
+		goto err;
+	}
+
+	bb->cs = xe_sa_bo_cpu_addr(bb->bo);
+	bb->len = 0;
+
+	return bb;
+err:
+	kfree(bb);
+	return ERR_PTR(err);
+}
+
+static struct xe_sched_job *
+__xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb, u64 *addr)
+{
+	u32 size = drm_suballoc_size(bb->bo);
+
+	bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+
+	xe_gt_assert(q->gt, bb->len * 4 + bb_prefetch(q->gt) <= size);
+
+	xe_sa_bo_flush_write(bb->bo);
+
+	return xe_sched_job_create(q, addr);
+}
+
+struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q,
+						struct xe_bb *bb,
+						u64 batch_base_ofs,
+						u32 second_idx)
+{
+	u64 addr[2] = {
+		batch_base_ofs + drm_suballoc_soffset(bb->bo),
+		batch_base_ofs + drm_suballoc_soffset(bb->bo) +
+		4 * second_idx,
+	};
+
+	xe_gt_assert(q->gt, second_idx <= bb->len);
+	xe_gt_assert(q->gt, q->vm->flags & XE_VM_FLAG_MIGRATION);
+
+	return __xe_bb_create_job(q, bb, addr);
+}
+
+struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q,
+				      struct xe_bb *bb)
+{
+	u64 addr = xe_sa_bo_gpu_addr(bb->bo);
+
+	xe_gt_assert(q->gt, !(q->vm && q->vm->flags & XE_VM_FLAG_MIGRATION));
+	return __xe_bb_create_job(q, bb, &addr);
+}
+
+void xe_bb_free(struct xe_bb *bb, struct dma_fence *fence)
+{
+	if (!bb)
+		return;
+
+	xe_sa_bo_free(bb->bo, fence);
+	kfree(bb);
+}
diff --git a/drivers/gpu/drm/xe/xe_bb.h b/drivers/gpu/drm/xe/xe_bb.h
new file mode 100644
index 000000000000..fafacd73dcc3
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bb.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_BB_H_
+#define _XE_BB_H_
+
+#include "xe_bb_types.h"
+
+struct dma_fence;
+
+struct xe_gt;
+struct xe_exec_queue;
+struct xe_sched_job;
+
+struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 size, bool usm);
+struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q,
+				      struct xe_bb *bb);
+struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q,
+						struct xe_bb *bb, u64 batch_ofs,
+						u32 second_idx);
+void xe_bb_free(struct xe_bb *bb, struct dma_fence *fence);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_bb_types.h b/drivers/gpu/drm/xe/xe_bb_types.h
new file mode 100644
index 000000000000..b7d30308cf90
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bb_types.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_BB_TYPES_H_
+#define _XE_BB_TYPES_H_
+
+#include <linux/types.h>
+
+struct drm_suballoc;
+
+struct xe_bb {
+	struct drm_suballoc *bo;
+
+	u32 *cs;
+	u32 len; /* in dwords */
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
new file mode 100644
index 000000000000..4d3b80ec906d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -0,0 +1,2276 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_bo.h"
+
+#include <linux/dma-buf.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_gem_ttm_helper.h>
+#include <drm/drm_managed.h>
+#include <drm/ttm/ttm_device.h>
+#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_tt.h>
+#include <drm/xe_drm.h>
+
+#include "xe_device.h"
+#include "xe_dma_buf.h"
+#include "xe_drm_client.h"
+#include "xe_ggtt.h"
+#include "xe_gt.h"
+#include "xe_map.h"
+#include "xe_migrate.h"
+#include "xe_preempt_fence.h"
+#include "xe_res_cursor.h"
+#include "xe_trace.h"
+#include "xe_ttm_stolen_mgr.h"
+#include "xe_vm.h"
+
+const char *const xe_mem_type_to_name[TTM_NUM_MEM_TYPES]  = {
+	[XE_PL_SYSTEM] = "system",
+	[XE_PL_TT] = "gtt",
+	[XE_PL_VRAM0] = "vram0",
+	[XE_PL_VRAM1] = "vram1",
+	[XE_PL_STOLEN] = "stolen"
+};
+
+static const struct ttm_place sys_placement_flags = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.mem_type = XE_PL_SYSTEM,
+	.flags = 0,
+};
+
+static struct ttm_placement sys_placement = {
+	.num_placement = 1,
+	.placement = &sys_placement_flags,
+	.num_busy_placement = 1,
+	.busy_placement = &sys_placement_flags,
+};
+
+static const struct ttm_place tt_placement_flags = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.mem_type = XE_PL_TT,
+	.flags = 0,
+};
+
+static struct ttm_placement tt_placement = {
+	.num_placement = 1,
+	.placement = &tt_placement_flags,
+	.num_busy_placement = 1,
+	.busy_placement = &sys_placement_flags,
+};
+
+bool mem_type_is_vram(u32 mem_type)
+{
+	return mem_type >= XE_PL_VRAM0 && mem_type != XE_PL_STOLEN;
+}
+
+static bool resource_is_stolen_vram(struct xe_device *xe, struct ttm_resource *res)
+{
+	return res->mem_type == XE_PL_STOLEN && IS_DGFX(xe);
+}
+
+static bool resource_is_vram(struct ttm_resource *res)
+{
+	return mem_type_is_vram(res->mem_type);
+}
+
+bool xe_bo_is_vram(struct xe_bo *bo)
+{
+	return resource_is_vram(bo->ttm.resource) ||
+		resource_is_stolen_vram(xe_bo_device(bo), bo->ttm.resource);
+}
+
+bool xe_bo_is_stolen(struct xe_bo *bo)
+{
+	return bo->ttm.resource->mem_type == XE_PL_STOLEN;
+}
+
+/**
+ * xe_bo_is_stolen_devmem - check if BO is of stolen type accessed via PCI BAR
+ * @bo: The BO
+ *
+ * The stolen memory is accessed through the PCI BAR for both DGFX and some
+ * integrated platforms that have a dedicated bit in the PTE for devmem (DM).
+ *
+ * Returns: true if it's stolen memory accessed via PCI BAR, false otherwise.
+ */
+bool xe_bo_is_stolen_devmem(struct xe_bo *bo)
+{
+	return xe_bo_is_stolen(bo) &&
+		GRAPHICS_VERx100(xe_bo_device(bo)) >= 1270;
+}
+
+static bool xe_bo_is_user(struct xe_bo *bo)
+{
+	return bo->flags & XE_BO_CREATE_USER_BIT;
+}
+
+static struct xe_migrate *
+mem_type_to_migrate(struct xe_device *xe, u32 mem_type)
+{
+	struct xe_tile *tile;
+
+	xe_assert(xe, mem_type == XE_PL_STOLEN || mem_type_is_vram(mem_type));
+	tile = &xe->tiles[mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)];
+	return tile->migrate;
+}
+
+static struct xe_mem_region *res_to_mem_region(struct ttm_resource *res)
+{
+	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
+	struct ttm_resource_manager *mgr;
+
+	xe_assert(xe, resource_is_vram(res));
+	mgr = ttm_manager_type(&xe->ttm, res->mem_type);
+	return to_xe_ttm_vram_mgr(mgr)->vram;
+}
+
+static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
+			   u32 bo_flags, u32 *c)
+{
+	if (bo_flags & XE_BO_CREATE_SYSTEM_BIT) {
+		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
+
+		bo->placements[*c] = (struct ttm_place) {
+			.mem_type = XE_PL_TT,
+		};
+		*c += 1;
+
+		if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
+			bo->props.preferred_mem_type = XE_PL_TT;
+	}
+}
+
+static void add_vram(struct xe_device *xe, struct xe_bo *bo,
+		     struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c)
+{
+	struct ttm_place place = { .mem_type = mem_type };
+	struct xe_mem_region *vram;
+	u64 io_size;
+
+	xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
+
+	vram = to_xe_ttm_vram_mgr(ttm_manager_type(&xe->ttm, mem_type))->vram;
+	xe_assert(xe, vram && vram->usable_size);
+	io_size = vram->io_size;
+
+	/*
+	 * For eviction / restore on suspend / resume objects
+	 * pinned in VRAM must be contiguous
+	 */
+	if (bo_flags & (XE_BO_CREATE_PINNED_BIT |
+			XE_BO_CREATE_GGTT_BIT))
+		place.flags |= TTM_PL_FLAG_CONTIGUOUS;
+
+	if (io_size < vram->usable_size) {
+		if (bo_flags & XE_BO_NEEDS_CPU_ACCESS) {
+			place.fpfn = 0;
+			place.lpfn = io_size >> PAGE_SHIFT;
+		} else {
+			place.flags |= TTM_PL_FLAG_TOPDOWN;
+		}
+	}
+	places[*c] = place;
+	*c += 1;
+
+	if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
+		bo->props.preferred_mem_type = mem_type;
+}
+
+static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
+			 u32 bo_flags, u32 *c)
+{
+	if (bo->props.preferred_gt == XE_GT1) {
+		if (bo_flags & XE_BO_CREATE_VRAM1_BIT)
+			add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
+		if (bo_flags & XE_BO_CREATE_VRAM0_BIT)
+			add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
+	} else {
+		if (bo_flags & XE_BO_CREATE_VRAM0_BIT)
+			add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
+		if (bo_flags & XE_BO_CREATE_VRAM1_BIT)
+			add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
+	}
+}
+
+static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
+			   u32 bo_flags, u32 *c)
+{
+	if (bo_flags & XE_BO_CREATE_STOLEN_BIT) {
+		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
+
+		bo->placements[*c] = (struct ttm_place) {
+			.mem_type = XE_PL_STOLEN,
+			.flags = bo_flags & (XE_BO_CREATE_PINNED_BIT |
+					     XE_BO_CREATE_GGTT_BIT) ?
+				TTM_PL_FLAG_CONTIGUOUS : 0,
+		};
+		*c += 1;
+	}
+}
+
+static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
+				       u32 bo_flags)
+{
+	u32 c = 0;
+
+	bo->props.preferred_mem_type = XE_BO_PROPS_INVALID;
+
+	/* The order of placements should indicate preferred location */
+
+	if (bo->props.preferred_mem_class == DRM_XE_MEM_REGION_CLASS_SYSMEM) {
+		try_add_system(xe, bo, bo_flags, &c);
+		try_add_vram(xe, bo, bo_flags, &c);
+	} else {
+		try_add_vram(xe, bo, bo_flags, &c);
+		try_add_system(xe, bo, bo_flags, &c);
+	}
+	try_add_stolen(xe, bo, bo_flags, &c);
+
+	if (!c)
+		return -EINVAL;
+
+	bo->placement = (struct ttm_placement) {
+		.num_placement = c,
+		.placement = bo->placements,
+		.num_busy_placement = c,
+		.busy_placement = bo->placements,
+	};
+
+	return 0;
+}
+
+int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
+			      u32 bo_flags)
+{
+	xe_bo_assert_held(bo);
+	return __xe_bo_placement_for_flags(xe, bo, bo_flags);
+}
+
+static void xe_evict_flags(struct ttm_buffer_object *tbo,
+			   struct ttm_placement *placement)
+{
+	if (!xe_bo_is_xe_bo(tbo)) {
+		/* Don't handle scatter gather BOs */
+		if (tbo->type == ttm_bo_type_sg) {
+			placement->num_placement = 0;
+			placement->num_busy_placement = 0;
+			return;
+		}
+
+		*placement = sys_placement;
+		return;
+	}
+
+	/*
+	 * For xe, sg bos that are evicted to system just triggers a
+	 * rebind of the sg list upon subsequent validation to XE_PL_TT.
+	 */
+	switch (tbo->resource->mem_type) {
+	case XE_PL_VRAM0:
+	case XE_PL_VRAM1:
+	case XE_PL_STOLEN:
+		*placement = tt_placement;
+		break;
+	case XE_PL_TT:
+	default:
+		*placement = sys_placement;
+		break;
+	}
+}
+
+struct xe_ttm_tt {
+	struct ttm_tt ttm;
+	struct device *dev;
+	struct sg_table sgt;
+	struct sg_table *sg;
+};
+
+static int xe_tt_map_sg(struct ttm_tt *tt)
+{
+	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
+	unsigned long num_pages = tt->num_pages;
+	int ret;
+
+	XE_WARN_ON(tt->page_flags & TTM_TT_FLAG_EXTERNAL);
+
+	if (xe_tt->sg)
+		return 0;
+
+	ret = sg_alloc_table_from_pages_segment(&xe_tt->sgt, tt->pages,
+						num_pages, 0,
+						(u64)num_pages << PAGE_SHIFT,
+						xe_sg_segment_size(xe_tt->dev),
+						GFP_KERNEL);
+	if (ret)
+		return ret;
+
+	xe_tt->sg = &xe_tt->sgt;
+	ret = dma_map_sgtable(xe_tt->dev, xe_tt->sg, DMA_BIDIRECTIONAL,
+			      DMA_ATTR_SKIP_CPU_SYNC);
+	if (ret) {
+		sg_free_table(xe_tt->sg);
+		xe_tt->sg = NULL;
+		return ret;
+	}
+
+	return 0;
+}
+
+struct sg_table *xe_bo_sg(struct xe_bo *bo)
+{
+	struct ttm_tt *tt = bo->ttm.ttm;
+	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
+
+	return xe_tt->sg;
+}
+
+static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
+				       u32 page_flags)
+{
+	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
+	struct xe_device *xe = xe_bo_device(bo);
+	struct xe_ttm_tt *tt;
+	unsigned long extra_pages;
+	enum ttm_caching caching;
+	int err;
+
+	tt = kzalloc(sizeof(*tt), GFP_KERNEL);
+	if (!tt)
+		return NULL;
+
+	tt->dev = xe->drm.dev;
+
+	extra_pages = 0;
+	if (xe_bo_needs_ccs_pages(bo))
+		extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size),
+					   PAGE_SIZE);
+
+	switch (bo->cpu_caching) {
+	case DRM_XE_GEM_CPU_CACHING_WC:
+		caching = ttm_write_combined;
+		break;
+	default:
+		caching = ttm_cached;
+		break;
+	}
+
+	WARN_ON((bo->flags & XE_BO_CREATE_USER_BIT) && !bo->cpu_caching);
+
+	/*
+	 * Display scanout is always non-coherent with the CPU cache.
+	 *
+	 * For Xe_LPG and beyond, PPGTT PTE lookups are also non-coherent and
+	 * require a CPU:WC mapping.
+	 */
+	if ((!bo->cpu_caching && bo->flags & XE_BO_SCANOUT_BIT) ||
+	    (xe->info.graphics_verx100 >= 1270 && bo->flags & XE_BO_PAGETABLE))
+		caching = ttm_write_combined;
+
+	err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages);
+	if (err) {
+		kfree(tt);
+		return NULL;
+	}
+
+	return &tt->ttm;
+}
+
+static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt,
+			      struct ttm_operation_ctx *ctx)
+{
+	int err;
+
+	/*
+	 * dma-bufs are not populated with pages, and the dma-
+	 * addresses are set up when moved to XE_PL_TT.
+	 */
+	if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
+		return 0;
+
+	err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx);
+	if (err)
+		return err;
+
+	/* A follow up may move this xe_bo_move when BO is moved to XE_PL_TT */
+	err = xe_tt_map_sg(tt);
+	if (err)
+		ttm_pool_free(&ttm_dev->pool, tt);
+
+	return err;
+}
+
+static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt)
+{
+	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
+
+	if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
+		return;
+
+	if (xe_tt->sg) {
+		dma_unmap_sgtable(xe_tt->dev, xe_tt->sg,
+				  DMA_BIDIRECTIONAL, 0);
+		sg_free_table(xe_tt->sg);
+		xe_tt->sg = NULL;
+	}
+
+	return ttm_pool_free(&ttm_dev->pool, tt);
+}
+
+static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt)
+{
+	ttm_tt_fini(tt);
+	kfree(tt);
+}
+
+static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
+				 struct ttm_resource *mem)
+{
+	struct xe_device *xe = ttm_to_xe_device(bdev);
+
+	switch (mem->mem_type) {
+	case XE_PL_SYSTEM:
+	case XE_PL_TT:
+		return 0;
+	case XE_PL_VRAM0:
+	case XE_PL_VRAM1: {
+		struct xe_ttm_vram_mgr_resource *vres =
+			to_xe_ttm_vram_mgr_resource(mem);
+		struct xe_mem_region *vram = res_to_mem_region(mem);
+
+		if (vres->used_visible_size < mem->size)
+			return -EINVAL;
+
+		mem->bus.offset = mem->start << PAGE_SHIFT;
+
+		if (vram->mapping &&
+		    mem->placement & TTM_PL_FLAG_CONTIGUOUS)
+			mem->bus.addr = (u8 __force *)vram->mapping +
+				mem->bus.offset;
+
+		mem->bus.offset += vram->io_start;
+		mem->bus.is_iomem = true;
+
+#if  !defined(CONFIG_X86)
+		mem->bus.caching = ttm_write_combined;
+#endif
+		return 0;
+	} case XE_PL_STOLEN:
+		return xe_ttm_stolen_io_mem_reserve(xe, mem);
+	default:
+		return -EINVAL;
+	}
+}
+
+static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
+				const struct ttm_operation_ctx *ctx)
+{
+	struct dma_resv_iter cursor;
+	struct dma_fence *fence;
+	struct drm_gem_object *obj = &bo->ttm.base;
+	struct drm_gpuvm_bo *vm_bo;
+	bool idle = false;
+	int ret = 0;
+
+	dma_resv_assert_held(bo->ttm.base.resv);
+
+	if (!list_empty(&bo->ttm.base.gpuva.list)) {
+		dma_resv_iter_begin(&cursor, bo->ttm.base.resv,
+				    DMA_RESV_USAGE_BOOKKEEP);
+		dma_resv_for_each_fence_unlocked(&cursor, fence)
+			dma_fence_enable_sw_signaling(fence);
+		dma_resv_iter_end(&cursor);
+	}
+
+	drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
+		struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
+		struct drm_gpuva *gpuva;
+
+		if (!xe_vm_in_fault_mode(vm)) {
+			drm_gpuvm_bo_evict(vm_bo, true);
+			continue;
+		}
+
+		if (!idle) {
+			long timeout;
+
+			if (ctx->no_wait_gpu &&
+			    !dma_resv_test_signaled(bo->ttm.base.resv,
+						    DMA_RESV_USAGE_BOOKKEEP))
+				return -EBUSY;
+
+			timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
+							DMA_RESV_USAGE_BOOKKEEP,
+							ctx->interruptible,
+							MAX_SCHEDULE_TIMEOUT);
+			if (!timeout)
+				return -ETIME;
+			if (timeout < 0)
+				return timeout;
+
+			idle = true;
+		}
+
+		drm_gpuvm_bo_for_each_va(gpuva, vm_bo) {
+			struct xe_vma *vma = gpuva_to_vma(gpuva);
+
+			trace_xe_vma_evict(vma);
+			ret = xe_vm_invalidate_vma(vma);
+			if (XE_WARN_ON(ret))
+				return ret;
+		}
+	}
+
+	return ret;
+}
+
+/*
+ * The dma-buf map_attachment() / unmap_attachment() is hooked up here.
+ * Note that unmapping the attachment is deferred to the next
+ * map_attachment time, or to bo destroy (after idling) whichever comes first.
+ * This is to avoid syncing before unmap_attachment(), assuming that the
+ * caller relies on idling the reservation object before moving the
+ * backing store out. Should that assumption not hold, then we will be able
+ * to unconditionally call unmap_attachment() when moving out to system.
+ */
+static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo,
+			     struct ttm_resource *new_res)
+{
+	struct dma_buf_attachment *attach = ttm_bo->base.import_attach;
+	struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt,
+					       ttm);
+	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
+	struct sg_table *sg;
+
+	xe_assert(xe, attach);
+	xe_assert(xe, ttm_bo->ttm);
+
+	if (new_res->mem_type == XE_PL_SYSTEM)
+		goto out;
+
+	if (ttm_bo->sg) {
+		dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
+		ttm_bo->sg = NULL;
+	}
+
+	sg = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
+	if (IS_ERR(sg))
+		return PTR_ERR(sg);
+
+	ttm_bo->sg = sg;
+	xe_tt->sg = sg;
+
+out:
+	ttm_bo_move_null(ttm_bo, new_res);
+
+	return 0;
+}
+
+/**
+ * xe_bo_move_notify - Notify subsystems of a pending move
+ * @bo: The buffer object
+ * @ctx: The struct ttm_operation_ctx controlling locking and waits.
+ *
+ * This function notifies subsystems of an upcoming buffer move.
+ * Upon receiving such a notification, subsystems should schedule
+ * halting access to the underlying pages and optionally add a fence
+ * to the buffer object's dma_resv object, that signals when access is
+ * stopped. The caller will wait on all dma_resv fences before
+ * starting the move.
+ *
+ * A subsystem may commence access to the object after obtaining
+ * bindings to the new backing memory under the object lock.
+ *
+ * Return: 0 on success, -EINTR or -ERESTARTSYS if interrupted in fault mode,
+ * negative error code on error.
+ */
+static int xe_bo_move_notify(struct xe_bo *bo,
+			     const struct ttm_operation_ctx *ctx)
+{
+	struct ttm_buffer_object *ttm_bo = &bo->ttm;
+	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
+	int ret;
+
+	/*
+	 * If this starts to call into many components, consider
+	 * using a notification chain here.
+	 */
+
+	if (xe_bo_is_pinned(bo))
+		return -EINVAL;
+
+	xe_bo_vunmap(bo);
+	ret = xe_bo_trigger_rebind(xe, bo, ctx);
+	if (ret)
+		return ret;
+
+	/* Don't call move_notify() for imported dma-bufs. */
+	if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach)
+		dma_buf_move_notify(ttm_bo->base.dma_buf);
+
+	return 0;
+}
+
+static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
+		      struct ttm_operation_ctx *ctx,
+		      struct ttm_resource *new_mem,
+		      struct ttm_place *hop)
+{
+	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
+	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
+	struct ttm_resource *old_mem = ttm_bo->resource;
+	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
+	struct ttm_tt *ttm = ttm_bo->ttm;
+	struct xe_migrate *migrate = NULL;
+	struct dma_fence *fence;
+	bool move_lacks_source;
+	bool tt_has_data;
+	bool needs_clear;
+	bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) &&
+				  ttm && ttm_tt_is_populated(ttm)) ? true : false;
+	int ret = 0;
+	/* Bo creation path, moving to system or TT. */
+	if ((!old_mem && ttm) && !handle_system_ccs) {
+		ttm_bo_move_null(ttm_bo, new_mem);
+		return 0;
+	}
+
+	if (ttm_bo->type == ttm_bo_type_sg) {
+		ret = xe_bo_move_notify(bo, ctx);
+		if (!ret)
+			ret = xe_bo_move_dmabuf(ttm_bo, new_mem);
+		goto out;
+	}
+
+	tt_has_data = ttm && (ttm_tt_is_populated(ttm) ||
+			      (ttm->page_flags & TTM_TT_FLAG_SWAPPED));
+
+	move_lacks_source = handle_system_ccs ? (!bo->ccs_cleared)  :
+						(!mem_type_is_vram(old_mem_type) && !tt_has_data);
+
+	needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
+		(!ttm && ttm_bo->type == ttm_bo_type_device);
+
+	if ((move_lacks_source && !needs_clear)) {
+		ttm_bo_move_null(ttm_bo, new_mem);
+		goto out;
+	}
+
+	if (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT && !handle_system_ccs) {
+		ttm_bo_move_null(ttm_bo, new_mem);
+		goto out;
+	}
+
+	/*
+	 * Failed multi-hop where the old_mem is still marked as
+	 * TTM_PL_FLAG_TEMPORARY, should just be a dummy move.
+	 */
+	if (old_mem_type == XE_PL_TT &&
+	    new_mem->mem_type == XE_PL_TT) {
+		ttm_bo_move_null(ttm_bo, new_mem);
+		goto out;
+	}
+
+	if (!move_lacks_source && !xe_bo_is_pinned(bo)) {
+		ret = xe_bo_move_notify(bo, ctx);
+		if (ret)
+			goto out;
+	}
+
+	if (old_mem_type == XE_PL_TT &&
+	    new_mem->mem_type == XE_PL_SYSTEM) {
+		long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
+						     DMA_RESV_USAGE_BOOKKEEP,
+						     true,
+						     MAX_SCHEDULE_TIMEOUT);
+		if (timeout < 0) {
+			ret = timeout;
+			goto out;
+		}
+
+		if (!handle_system_ccs) {
+			ttm_bo_move_null(ttm_bo, new_mem);
+			goto out;
+		}
+	}
+
+	if (!move_lacks_source &&
+	    ((old_mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) ||
+	     (mem_type_is_vram(old_mem_type) &&
+	      new_mem->mem_type == XE_PL_SYSTEM))) {
+		hop->fpfn = 0;
+		hop->lpfn = 0;
+		hop->mem_type = XE_PL_TT;
+		hop->flags = TTM_PL_FLAG_TEMPORARY;
+		ret = -EMULTIHOP;
+		goto out;
+	}
+
+	if (bo->tile)
+		migrate = bo->tile->migrate;
+	else if (resource_is_vram(new_mem))
+		migrate = mem_type_to_migrate(xe, new_mem->mem_type);
+	else if (mem_type_is_vram(old_mem_type))
+		migrate = mem_type_to_migrate(xe, old_mem_type);
+	else
+		migrate = xe->tiles[0].migrate;
+
+	xe_assert(xe, migrate);
+	trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type, move_lacks_source);
+	xe_device_mem_access_get(xe);
+
+	if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) {
+		/*
+		 * Kernel memory that is pinned should only be moved on suspend
+		 * / resume, some of the pinned memory is required for the
+		 * device to resume / use the GPU to move other evicted memory
+		 * (user memory) around. This likely could be optimized a bit
+		 * futher where we find the minimum set of pinned memory
+		 * required for resume but for simplity doing a memcpy for all
+		 * pinned memory.
+		 */
+		ret = xe_bo_vmap(bo);
+		if (!ret) {
+			ret = ttm_bo_move_memcpy(ttm_bo, ctx, new_mem);
+
+			/* Create a new VMAP once kernel BO back in VRAM */
+			if (!ret && resource_is_vram(new_mem)) {
+				struct xe_mem_region *vram = res_to_mem_region(new_mem);
+				void __iomem *new_addr = vram->mapping +
+					(new_mem->start << PAGE_SHIFT);
+
+				if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) {
+					ret = -EINVAL;
+					xe_device_mem_access_put(xe);
+					goto out;
+				}
+
+				xe_assert(xe, new_mem->start ==
+					  bo->placements->fpfn);
+
+				iosys_map_set_vaddr_iomem(&bo->vmap, new_addr);
+			}
+		}
+	} else {
+		if (move_lacks_source)
+			fence = xe_migrate_clear(migrate, bo, new_mem);
+		else
+			fence = xe_migrate_copy(migrate, bo, bo, old_mem,
+						new_mem, handle_system_ccs);
+		if (IS_ERR(fence)) {
+			ret = PTR_ERR(fence);
+			xe_device_mem_access_put(xe);
+			goto out;
+		}
+		if (!move_lacks_source) {
+			ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict,
+							true, new_mem);
+			if (ret) {
+				dma_fence_wait(fence, false);
+				ttm_bo_move_null(ttm_bo, new_mem);
+				ret = 0;
+			}
+		} else {
+			/*
+			 * ttm_bo_move_accel_cleanup() may blow up if
+			 * bo->resource == NULL, so just attach the
+			 * fence and set the new resource.
+			 */
+			dma_resv_add_fence(ttm_bo->base.resv, fence,
+					   DMA_RESV_USAGE_KERNEL);
+			ttm_bo_move_null(ttm_bo, new_mem);
+		}
+
+		dma_fence_put(fence);
+	}
+
+	xe_device_mem_access_put(xe);
+
+out:
+	return ret;
+
+}
+
+/**
+ * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
+ * @bo: The buffer object to move.
+ *
+ * On successful completion, the object memory will be moved to sytem memory.
+ * This function blocks until the object has been fully moved.
+ *
+ * This is needed to for special handling of pinned VRAM object during
+ * suspend-resume.
+ *
+ * Return: 0 on success. Negative error code on failure.
+ */
+int xe_bo_evict_pinned(struct xe_bo *bo)
+{
+	struct ttm_place place = {
+		.mem_type = XE_PL_TT,
+	};
+	struct ttm_placement placement = {
+		.placement = &place,
+		.num_placement = 1,
+	};
+	struct ttm_operation_ctx ctx = {
+		.interruptible = false,
+	};
+	struct ttm_resource *new_mem;
+	int ret;
+
+	xe_bo_assert_held(bo);
+
+	if (WARN_ON(!bo->ttm.resource))
+		return -EINVAL;
+
+	if (WARN_ON(!xe_bo_is_pinned(bo)))
+		return -EINVAL;
+
+	if (WARN_ON(!xe_bo_is_vram(bo)))
+		return -EINVAL;
+
+	ret = ttm_bo_mem_space(&bo->ttm, &placement, &new_mem, &ctx);
+	if (ret)
+		return ret;
+
+	if (!bo->ttm.ttm) {
+		bo->ttm.ttm = xe_ttm_tt_create(&bo->ttm, 0);
+		if (!bo->ttm.ttm) {
+			ret = -ENOMEM;
+			goto err_res_free;
+		}
+	}
+
+	ret = ttm_tt_populate(bo->ttm.bdev, bo->ttm.ttm, &ctx);
+	if (ret)
+		goto err_res_free;
+
+	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
+	if (ret)
+		goto err_res_free;
+
+	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
+	if (ret)
+		goto err_res_free;
+
+	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
+			      false, MAX_SCHEDULE_TIMEOUT);
+
+	return 0;
+
+err_res_free:
+	ttm_resource_free(&bo->ttm, &new_mem);
+	return ret;
+}
+
+/**
+ * xe_bo_restore_pinned() - Restore a pinned VRAM object
+ * @bo: The buffer object to move.
+ *
+ * On successful completion, the object memory will be moved back to VRAM.
+ * This function blocks until the object has been fully moved.
+ *
+ * This is needed to for special handling of pinned VRAM object during
+ * suspend-resume.
+ *
+ * Return: 0 on success. Negative error code on failure.
+ */
+int xe_bo_restore_pinned(struct xe_bo *bo)
+{
+	struct ttm_operation_ctx ctx = {
+		.interruptible = false,
+	};
+	struct ttm_resource *new_mem;
+	int ret;
+
+	xe_bo_assert_held(bo);
+
+	if (WARN_ON(!bo->ttm.resource))
+		return -EINVAL;
+
+	if (WARN_ON(!xe_bo_is_pinned(bo)))
+		return -EINVAL;
+
+	if (WARN_ON(xe_bo_is_vram(bo) || !bo->ttm.ttm))
+		return -EINVAL;
+
+	ret = ttm_bo_mem_space(&bo->ttm, &bo->placement, &new_mem, &ctx);
+	if (ret)
+		return ret;
+
+	ret = ttm_tt_populate(bo->ttm.bdev, bo->ttm.ttm, &ctx);
+	if (ret)
+		goto err_res_free;
+
+	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
+	if (ret)
+		goto err_res_free;
+
+	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
+	if (ret)
+		goto err_res_free;
+
+	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
+			      false, MAX_SCHEDULE_TIMEOUT);
+
+	return 0;
+
+err_res_free:
+	ttm_resource_free(&bo->ttm, &new_mem);
+	return ret;
+}
+
+static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo,
+				       unsigned long page_offset)
+{
+	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
+	struct xe_res_cursor cursor;
+	struct xe_mem_region *vram;
+
+	if (ttm_bo->resource->mem_type == XE_PL_STOLEN)
+		return xe_ttm_stolen_io_offset(bo, page_offset << PAGE_SHIFT) >> PAGE_SHIFT;
+
+	vram = res_to_mem_region(ttm_bo->resource);
+	xe_res_first(ttm_bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor);
+	return (vram->io_start + cursor.start) >> PAGE_SHIFT;
+}
+
+static void __xe_bo_vunmap(struct xe_bo *bo);
+
+/*
+ * TODO: Move this function to TTM so we don't rely on how TTM does its
+ * locking, thereby abusing TTM internals.
+ */
+static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo)
+{
+	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
+	bool locked;
+
+	xe_assert(xe, !kref_read(&ttm_bo->kref));
+
+	/*
+	 * We can typically only race with TTM trylocking under the
+	 * lru_lock, which will immediately be unlocked again since
+	 * the ttm_bo refcount is zero at this point. So trylocking *should*
+	 * always succeed here, as long as we hold the lru lock.
+	 */
+	spin_lock(&ttm_bo->bdev->lru_lock);
+	locked = dma_resv_trylock(ttm_bo->base.resv);
+	spin_unlock(&ttm_bo->bdev->lru_lock);
+	xe_assert(xe, locked);
+
+	return locked;
+}
+
+static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
+{
+	struct dma_resv_iter cursor;
+	struct dma_fence *fence;
+	struct dma_fence *replacement = NULL;
+	struct xe_bo *bo;
+
+	if (!xe_bo_is_xe_bo(ttm_bo))
+		return;
+
+	bo = ttm_to_xe_bo(ttm_bo);
+	xe_assert(xe_bo_device(bo), !(bo->created && kref_read(&ttm_bo->base.refcount)));
+
+	/*
+	 * Corner case where TTM fails to allocate memory and this BOs resv
+	 * still points the VMs resv
+	 */
+	if (ttm_bo->base.resv != &ttm_bo->base._resv)
+		return;
+
+	if (!xe_ttm_bo_lock_in_destructor(ttm_bo))
+		return;
+
+	/*
+	 * Scrub the preempt fences if any. The unbind fence is already
+	 * attached to the resv.
+	 * TODO: Don't do this for external bos once we scrub them after
+	 * unbind.
+	 */
+	dma_resv_for_each_fence(&cursor, ttm_bo->base.resv,
+				DMA_RESV_USAGE_BOOKKEEP, fence) {
+		if (xe_fence_is_xe_preempt(fence) &&
+		    !dma_fence_is_signaled(fence)) {
+			if (!replacement)
+				replacement = dma_fence_get_stub();
+
+			dma_resv_replace_fences(ttm_bo->base.resv,
+						fence->context,
+						replacement,
+						DMA_RESV_USAGE_BOOKKEEP);
+		}
+	}
+	dma_fence_put(replacement);
+
+	dma_resv_unlock(ttm_bo->base.resv);
+}
+
+static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
+{
+	if (!xe_bo_is_xe_bo(ttm_bo))
+		return;
+
+	/*
+	 * Object is idle and about to be destroyed. Release the
+	 * dma-buf attachment.
+	 */
+	if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) {
+		struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm,
+						       struct xe_ttm_tt, ttm);
+
+		dma_buf_unmap_attachment(ttm_bo->base.import_attach, ttm_bo->sg,
+					 DMA_BIDIRECTIONAL);
+		ttm_bo->sg = NULL;
+		xe_tt->sg = NULL;
+	}
+}
+
+struct ttm_device_funcs xe_ttm_funcs = {
+	.ttm_tt_create = xe_ttm_tt_create,
+	.ttm_tt_populate = xe_ttm_tt_populate,
+	.ttm_tt_unpopulate = xe_ttm_tt_unpopulate,
+	.ttm_tt_destroy = xe_ttm_tt_destroy,
+	.evict_flags = xe_evict_flags,
+	.move = xe_bo_move,
+	.io_mem_reserve = xe_ttm_io_mem_reserve,
+	.io_mem_pfn = xe_ttm_io_mem_pfn,
+	.release_notify = xe_ttm_bo_release_notify,
+	.eviction_valuable = ttm_bo_eviction_valuable,
+	.delete_mem_notify = xe_ttm_bo_delete_mem_notify,
+};
+
+static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
+{
+	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
+	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
+
+	if (bo->ttm.base.import_attach)
+		drm_prime_gem_destroy(&bo->ttm.base, NULL);
+	drm_gem_object_release(&bo->ttm.base);
+
+	xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list));
+
+	if (bo->ggtt_node.size)
+		xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo);
+
+#ifdef CONFIG_PROC_FS
+	if (bo->client)
+		xe_drm_client_remove_bo(bo);
+#endif
+
+	if (bo->vm && xe_bo_is_user(bo))
+		xe_vm_put(bo->vm);
+
+	kfree(bo);
+}
+
+static void xe_gem_object_free(struct drm_gem_object *obj)
+{
+	/* Our BO reference counting scheme works as follows:
+	 *
+	 * The gem object kref is typically used throughout the driver,
+	 * and the gem object holds a ttm_buffer_object refcount, so
+	 * that when the last gem object reference is put, which is when
+	 * we end up in this function, we put also that ttm_buffer_object
+	 * refcount. Anything using gem interfaces is then no longer
+	 * allowed to access the object in a way that requires a gem
+	 * refcount, including locking the object.
+	 *
+	 * driver ttm callbacks is allowed to use the ttm_buffer_object
+	 * refcount directly if needed.
+	 */
+	__xe_bo_vunmap(gem_to_xe_bo(obj));
+	ttm_bo_put(container_of(obj, struct ttm_buffer_object, base));
+}
+
+static void xe_gem_object_close(struct drm_gem_object *obj,
+				struct drm_file *file_priv)
+{
+	struct xe_bo *bo = gem_to_xe_bo(obj);
+
+	if (bo->vm && !xe_vm_in_fault_mode(bo->vm)) {
+		xe_assert(xe_bo_device(bo), xe_bo_is_user(bo));
+
+		xe_bo_lock(bo, false);
+		ttm_bo_set_bulk_move(&bo->ttm, NULL);
+		xe_bo_unlock(bo);
+	}
+}
+
+static bool should_migrate_to_system(struct xe_bo *bo)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+
+	return xe_device_in_fault_mode(xe) && bo->props.cpu_atomic;
+}
+
+static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
+{
+	struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
+	struct drm_device *ddev = tbo->base.dev;
+	vm_fault_t ret;
+	int idx, r = 0;
+
+	ret = ttm_bo_vm_reserve(tbo, vmf);
+	if (ret)
+		return ret;
+
+	if (drm_dev_enter(ddev, &idx)) {
+		struct xe_bo *bo = ttm_to_xe_bo(tbo);
+
+		trace_xe_bo_cpu_fault(bo);
+
+		if (should_migrate_to_system(bo)) {
+			r = xe_bo_migrate(bo, XE_PL_TT);
+			if (r == -EBUSY || r == -ERESTARTSYS || r == -EINTR)
+				ret = VM_FAULT_NOPAGE;
+			else if (r)
+				ret = VM_FAULT_SIGBUS;
+		}
+		if (!ret)
+			ret = ttm_bo_vm_fault_reserved(vmf,
+						       vmf->vma->vm_page_prot,
+						       TTM_BO_VM_NUM_PREFAULT);
+		drm_dev_exit(idx);
+	} else {
+		ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
+	}
+	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
+		return ret;
+
+	dma_resv_unlock(tbo->base.resv);
+	return ret;
+}
+
+static const struct vm_operations_struct xe_gem_vm_ops = {
+	.fault = xe_gem_fault,
+	.open = ttm_bo_vm_open,
+	.close = ttm_bo_vm_close,
+	.access = ttm_bo_vm_access
+};
+
+static const struct drm_gem_object_funcs xe_gem_object_funcs = {
+	.free = xe_gem_object_free,
+	.close = xe_gem_object_close,
+	.mmap = drm_gem_ttm_mmap,
+	.export = xe_gem_prime_export,
+	.vm_ops = &xe_gem_vm_ops,
+};
+
+/**
+ * xe_bo_alloc - Allocate storage for a struct xe_bo
+ *
+ * This funcition is intended to allocate storage to be used for input
+ * to __xe_bo_create_locked(), in the case a pointer to the bo to be
+ * created is needed before the call to __xe_bo_create_locked().
+ * If __xe_bo_create_locked ends up never to be called, then the
+ * storage allocated with this function needs to be freed using
+ * xe_bo_free().
+ *
+ * Return: A pointer to an uninitialized struct xe_bo on success,
+ * ERR_PTR(-ENOMEM) on error.
+ */
+struct xe_bo *xe_bo_alloc(void)
+{
+	struct xe_bo *bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+
+	if (!bo)
+		return ERR_PTR(-ENOMEM);
+
+	return bo;
+}
+
+/**
+ * xe_bo_free - Free storage allocated using xe_bo_alloc()
+ * @bo: The buffer object storage.
+ *
+ * Refer to xe_bo_alloc() documentation for valid use-cases.
+ */
+void xe_bo_free(struct xe_bo *bo)
+{
+	kfree(bo);
+}
+
+struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
+				     struct xe_tile *tile, struct dma_resv *resv,
+				     struct ttm_lru_bulk_move *bulk, size_t size,
+				     u16 cpu_caching, enum ttm_bo_type type,
+				     u32 flags)
+{
+	struct ttm_operation_ctx ctx = {
+		.interruptible = true,
+		.no_wait_gpu = false,
+	};
+	struct ttm_placement *placement;
+	uint32_t alignment;
+	size_t aligned_size;
+	int err;
+
+	/* Only kernel objects should set GT */
+	xe_assert(xe, !tile || type == ttm_bo_type_kernel);
+
+	if (XE_WARN_ON(!size)) {
+		xe_bo_free(bo);
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (flags & (XE_BO_CREATE_VRAM_MASK | XE_BO_CREATE_STOLEN_BIT) &&
+	    !(flags & XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT) &&
+	    xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) {
+		aligned_size = ALIGN(size, SZ_64K);
+		if (type != ttm_bo_type_device)
+			size = ALIGN(size, SZ_64K);
+		flags |= XE_BO_INTERNAL_64K;
+		alignment = SZ_64K >> PAGE_SHIFT;
+
+	} else {
+		aligned_size = ALIGN(size, SZ_4K);
+		flags &= ~XE_BO_INTERNAL_64K;
+		alignment = SZ_4K >> PAGE_SHIFT;
+	}
+
+	if (type == ttm_bo_type_device && aligned_size != size)
+		return ERR_PTR(-EINVAL);
+
+	if (!bo) {
+		bo = xe_bo_alloc();
+		if (IS_ERR(bo))
+			return bo;
+	}
+
+	bo->ccs_cleared = false;
+	bo->tile = tile;
+	bo->size = size;
+	bo->flags = flags;
+	bo->cpu_caching = cpu_caching;
+	bo->ttm.base.funcs = &xe_gem_object_funcs;
+	bo->props.preferred_mem_class = XE_BO_PROPS_INVALID;
+	bo->props.preferred_gt = XE_BO_PROPS_INVALID;
+	bo->props.preferred_mem_type = XE_BO_PROPS_INVALID;
+	bo->ttm.priority = XE_BO_PRIORITY_NORMAL;
+	INIT_LIST_HEAD(&bo->pinned_link);
+#ifdef CONFIG_PROC_FS
+	INIT_LIST_HEAD(&bo->client_link);
+#endif
+
+	drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
+
+	if (resv) {
+		ctx.allow_res_evict = !(flags & XE_BO_CREATE_NO_RESV_EVICT);
+		ctx.resv = resv;
+	}
+
+	if (!(flags & XE_BO_FIXED_PLACEMENT_BIT)) {
+		err = __xe_bo_placement_for_flags(xe, bo, bo->flags);
+		if (WARN_ON(err)) {
+			xe_ttm_bo_destroy(&bo->ttm);
+			return ERR_PTR(err);
+		}
+	}
+
+	/* Defer populating type_sg bos */
+	placement = (type == ttm_bo_type_sg ||
+		     bo->flags & XE_BO_DEFER_BACKING) ? &sys_placement :
+		&bo->placement;
+	err = ttm_bo_init_reserved(&xe->ttm, &bo->ttm, type,
+				   placement, alignment,
+				   &ctx, NULL, resv, xe_ttm_bo_destroy);
+	if (err)
+		return ERR_PTR(err);
+
+	/*
+	 * The VRAM pages underneath are potentially still being accessed by the
+	 * GPU, as per async GPU clearing and async evictions. However TTM makes
+	 * sure to add any corresponding move/clear fences into the objects
+	 * dma-resv using the DMA_RESV_USAGE_KERNEL slot.
+	 *
+	 * For KMD internal buffers we don't care about GPU clearing, however we
+	 * still need to handle async evictions, where the VRAM is still being
+	 * accessed by the GPU. Most internal callers are not expecting this,
+	 * since they are missing the required synchronisation before accessing
+	 * the memory. To keep things simple just sync wait any kernel fences
+	 * here, if the buffer is designated KMD internal.
+	 *
+	 * For normal userspace objects we should already have the required
+	 * pipelining or sync waiting elsewhere, since we already have to deal
+	 * with things like async GPU clearing.
+	 */
+	if (type == ttm_bo_type_kernel) {
+		long timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
+						     DMA_RESV_USAGE_KERNEL,
+						     ctx.interruptible,
+						     MAX_SCHEDULE_TIMEOUT);
+
+		if (timeout < 0) {
+			if (!resv)
+				dma_resv_unlock(bo->ttm.base.resv);
+			xe_bo_put(bo);
+			return ERR_PTR(timeout);
+		}
+	}
+
+	bo->created = true;
+	if (bulk)
+		ttm_bo_set_bulk_move(&bo->ttm, bulk);
+	else
+		ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
+
+	return bo;
+}
+
+static int __xe_bo_fixed_placement(struct xe_device *xe,
+				   struct xe_bo *bo,
+				   u32 flags,
+				   u64 start, u64 end, u64 size)
+{
+	struct ttm_place *place = bo->placements;
+
+	if (flags & (XE_BO_CREATE_USER_BIT|XE_BO_CREATE_SYSTEM_BIT))
+		return -EINVAL;
+
+	place->flags = TTM_PL_FLAG_CONTIGUOUS;
+	place->fpfn = start >> PAGE_SHIFT;
+	place->lpfn = end >> PAGE_SHIFT;
+
+	switch (flags & (XE_BO_CREATE_STOLEN_BIT | XE_BO_CREATE_VRAM_MASK)) {
+	case XE_BO_CREATE_VRAM0_BIT:
+		place->mem_type = XE_PL_VRAM0;
+		break;
+	case XE_BO_CREATE_VRAM1_BIT:
+		place->mem_type = XE_PL_VRAM1;
+		break;
+	case XE_BO_CREATE_STOLEN_BIT:
+		place->mem_type = XE_PL_STOLEN;
+		break;
+
+	default:
+		/* 0 or multiple of the above set */
+		return -EINVAL;
+	}
+
+	bo->placement = (struct ttm_placement) {
+		.num_placement = 1,
+		.placement = place,
+		.num_busy_placement = 1,
+		.busy_placement = place,
+	};
+
+	return 0;
+}
+
+static struct xe_bo *
+__xe_bo_create_locked(struct xe_device *xe,
+		      struct xe_tile *tile, struct xe_vm *vm,
+		      size_t size, u64 start, u64 end,
+		      u16 cpu_caching, enum ttm_bo_type type, u32 flags)
+{
+	struct xe_bo *bo = NULL;
+	int err;
+
+	if (vm)
+		xe_vm_assert_held(vm);
+
+	if (start || end != ~0ULL) {
+		bo = xe_bo_alloc();
+		if (IS_ERR(bo))
+			return bo;
+
+		flags |= XE_BO_FIXED_PLACEMENT_BIT;
+		err = __xe_bo_fixed_placement(xe, bo, flags, start, end, size);
+		if (err) {
+			xe_bo_free(bo);
+			return ERR_PTR(err);
+		}
+	}
+
+	bo = ___xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL,
+				    vm && !xe_vm_in_fault_mode(vm) &&
+				    flags & XE_BO_CREATE_USER_BIT ?
+				    &vm->lru_bulk_move : NULL, size,
+				    cpu_caching, type, flags);
+	if (IS_ERR(bo))
+		return bo;
+
+	/*
+	 * Note that instead of taking a reference no the drm_gpuvm_resv_bo(),
+	 * to ensure the shared resv doesn't disappear under the bo, the bo
+	 * will keep a reference to the vm, and avoid circular references
+	 * by having all the vm's bo refereferences released at vm close
+	 * time.
+	 */
+	if (vm && xe_bo_is_user(bo))
+		xe_vm_get(vm);
+	bo->vm = vm;
+
+	if (bo->flags & XE_BO_CREATE_GGTT_BIT) {
+		if (!tile && flags & XE_BO_CREATE_STOLEN_BIT)
+			tile = xe_device_get_root_tile(xe);
+
+		xe_assert(xe, tile);
+
+		if (flags & XE_BO_FIXED_PLACEMENT_BIT) {
+			err = xe_ggtt_insert_bo_at(tile->mem.ggtt, bo,
+						   start + bo->size, U64_MAX);
+		} else {
+			err = xe_ggtt_insert_bo(tile->mem.ggtt, bo);
+		}
+		if (err)
+			goto err_unlock_put_bo;
+	}
+
+	return bo;
+
+err_unlock_put_bo:
+	__xe_bo_unset_bulk_move(bo);
+	xe_bo_unlock_vm_held(bo);
+	xe_bo_put(bo);
+	return ERR_PTR(err);
+}
+
+struct xe_bo *
+xe_bo_create_locked_range(struct xe_device *xe,
+			  struct xe_tile *tile, struct xe_vm *vm,
+			  size_t size, u64 start, u64 end,
+			  enum ttm_bo_type type, u32 flags)
+{
+	return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type, flags);
+}
+
+struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
+				  struct xe_vm *vm, size_t size,
+				  enum ttm_bo_type type, u32 flags)
+{
+	return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type, flags);
+}
+
+struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
+				struct xe_vm *vm, size_t size,
+				u16 cpu_caching,
+				enum ttm_bo_type type,
+				u32 flags)
+{
+	struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL,
+						 cpu_caching, type,
+						 flags | XE_BO_CREATE_USER_BIT);
+	if (!IS_ERR(bo))
+		xe_bo_unlock_vm_held(bo);
+
+	return bo;
+}
+
+struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile,
+			   struct xe_vm *vm, size_t size,
+			   enum ttm_bo_type type, u32 flags)
+{
+	struct xe_bo *bo = xe_bo_create_locked(xe, tile, vm, size, type, flags);
+
+	if (!IS_ERR(bo))
+		xe_bo_unlock_vm_held(bo);
+
+	return bo;
+}
+
+struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile,
+				      struct xe_vm *vm,
+				      size_t size, u64 offset,
+				      enum ttm_bo_type type, u32 flags)
+{
+	struct xe_bo *bo;
+	int err;
+	u64 start = offset == ~0ull ? 0 : offset;
+	u64 end = offset == ~0ull ? offset : start + size;
+
+	if (flags & XE_BO_CREATE_STOLEN_BIT &&
+	    xe_ttm_stolen_cpu_access_needs_ggtt(xe))
+		flags |= XE_BO_CREATE_GGTT_BIT;
+
+	bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type,
+				       flags | XE_BO_NEEDS_CPU_ACCESS);
+	if (IS_ERR(bo))
+		return bo;
+
+	err = xe_bo_pin(bo);
+	if (err)
+		goto err_put;
+
+	err = xe_bo_vmap(bo);
+	if (err)
+		goto err_unpin;
+
+	xe_bo_unlock_vm_held(bo);
+
+	return bo;
+
+err_unpin:
+	xe_bo_unpin(bo);
+err_put:
+	xe_bo_unlock_vm_held(bo);
+	xe_bo_put(bo);
+	return ERR_PTR(err);
+}
+
+struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
+				   struct xe_vm *vm, size_t size,
+				   enum ttm_bo_type type, u32 flags)
+{
+	return xe_bo_create_pin_map_at(xe, tile, vm, size, ~0ull, type, flags);
+}
+
+struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
+				     const void *data, size_t size,
+				     enum ttm_bo_type type, u32 flags)
+{
+	struct xe_bo *bo = xe_bo_create_pin_map(xe, tile, NULL,
+						ALIGN(size, PAGE_SIZE),
+						type, flags);
+	if (IS_ERR(bo))
+		return bo;
+
+	xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
+
+	return bo;
+}
+
+static void __xe_bo_unpin_map_no_vm(struct drm_device *drm, void *arg)
+{
+	xe_bo_unpin_map_no_vm(arg);
+}
+
+struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
+					   size_t size, u32 flags)
+{
+	struct xe_bo *bo;
+	int ret;
+
+	bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, flags);
+	if (IS_ERR(bo))
+		return bo;
+
+	ret = drmm_add_action_or_reset(&xe->drm, __xe_bo_unpin_map_no_vm, bo);
+	if (ret)
+		return ERR_PTR(ret);
+
+	return bo;
+}
+
+struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
+					     const void *data, size_t size, u32 flags)
+{
+	struct xe_bo *bo = xe_managed_bo_create_pin_map(xe, tile, ALIGN(size, PAGE_SIZE), flags);
+
+	if (IS_ERR(bo))
+		return bo;
+
+	xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
+
+	return bo;
+}
+
+/*
+ * XXX: This is in the VM bind data path, likely should calculate this once and
+ * store, with a recalculation if the BO is moved.
+ */
+uint64_t vram_region_gpu_offset(struct ttm_resource *res)
+{
+	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
+
+	if (res->mem_type == XE_PL_STOLEN)
+		return xe_ttm_stolen_gpu_offset(xe);
+
+	return res_to_mem_region(res)->dpa_base;
+}
+
+/**
+ * xe_bo_pin_external - pin an external BO
+ * @bo: buffer object to be pinned
+ *
+ * Pin an external (not tied to a VM, can be exported via dma-buf / prime FD)
+ * BO. Unique call compared to xe_bo_pin as this function has it own set of
+ * asserts and code to ensure evict / restore on suspend / resume.
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
+int xe_bo_pin_external(struct xe_bo *bo)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+	int err;
+
+	xe_assert(xe, !bo->vm);
+	xe_assert(xe, xe_bo_is_user(bo));
+
+	if (!xe_bo_is_pinned(bo)) {
+		err = xe_bo_validate(bo, NULL, false);
+		if (err)
+			return err;
+
+		if (xe_bo_is_vram(bo)) {
+			spin_lock(&xe->pinned.lock);
+			list_add_tail(&bo->pinned_link,
+				      &xe->pinned.external_vram);
+			spin_unlock(&xe->pinned.lock);
+		}
+	}
+
+	ttm_bo_pin(&bo->ttm);
+
+	/*
+	 * FIXME: If we always use the reserve / unreserve functions for locking
+	 * we do not need this.
+	 */
+	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
+
+	return 0;
+}
+
+int xe_bo_pin(struct xe_bo *bo)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+	int err;
+
+	/* We currently don't expect user BO to be pinned */
+	xe_assert(xe, !xe_bo_is_user(bo));
+
+	/* Pinned object must be in GGTT or have pinned flag */
+	xe_assert(xe, bo->flags & (XE_BO_CREATE_PINNED_BIT |
+				   XE_BO_CREATE_GGTT_BIT));
+
+	/*
+	 * No reason we can't support pinning imported dma-bufs we just don't
+	 * expect to pin an imported dma-buf.
+	 */
+	xe_assert(xe, !bo->ttm.base.import_attach);
+
+	/* We only expect at most 1 pin */
+	xe_assert(xe, !xe_bo_is_pinned(bo));
+
+	err = xe_bo_validate(bo, NULL, false);
+	if (err)
+		return err;
+
+	/*
+	 * For pinned objects in on DGFX, which are also in vram, we expect
+	 * these to be in contiguous VRAM memory. Required eviction / restore
+	 * during suspend / resume (force restore to same physical address).
+	 */
+	if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
+	    bo->flags & XE_BO_INTERNAL_TEST)) {
+		struct ttm_place *place = &(bo->placements[0]);
+
+		if (mem_type_is_vram(place->mem_type)) {
+			xe_assert(xe, place->flags & TTM_PL_FLAG_CONTIGUOUS);
+
+			place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE) -
+				       vram_region_gpu_offset(bo->ttm.resource)) >> PAGE_SHIFT;
+			place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT);
+
+			spin_lock(&xe->pinned.lock);
+			list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
+			spin_unlock(&xe->pinned.lock);
+		}
+	}
+
+	ttm_bo_pin(&bo->ttm);
+
+	/*
+	 * FIXME: If we always use the reserve / unreserve functions for locking
+	 * we do not need this.
+	 */
+	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
+
+	return 0;
+}
+
+/**
+ * xe_bo_unpin_external - unpin an external BO
+ * @bo: buffer object to be unpinned
+ *
+ * Unpin an external (not tied to a VM, can be exported via dma-buf / prime FD)
+ * BO. Unique call compared to xe_bo_unpin as this function has it own set of
+ * asserts and code to ensure evict / restore on suspend / resume.
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
+void xe_bo_unpin_external(struct xe_bo *bo)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+
+	xe_assert(xe, !bo->vm);
+	xe_assert(xe, xe_bo_is_pinned(bo));
+	xe_assert(xe, xe_bo_is_user(bo));
+
+	if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link)) {
+		spin_lock(&xe->pinned.lock);
+		list_del_init(&bo->pinned_link);
+		spin_unlock(&xe->pinned.lock);
+	}
+
+	ttm_bo_unpin(&bo->ttm);
+
+	/*
+	 * FIXME: If we always use the reserve / unreserve functions for locking
+	 * we do not need this.
+	 */
+	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
+}
+
+void xe_bo_unpin(struct xe_bo *bo)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+
+	xe_assert(xe, !bo->ttm.base.import_attach);
+	xe_assert(xe, xe_bo_is_pinned(bo));
+
+	if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
+	    bo->flags & XE_BO_INTERNAL_TEST)) {
+		struct ttm_place *place = &(bo->placements[0]);
+
+		if (mem_type_is_vram(place->mem_type)) {
+			xe_assert(xe, !list_empty(&bo->pinned_link));
+
+			spin_lock(&xe->pinned.lock);
+			list_del_init(&bo->pinned_link);
+			spin_unlock(&xe->pinned.lock);
+		}
+	}
+
+	ttm_bo_unpin(&bo->ttm);
+}
+
+/**
+ * xe_bo_validate() - Make sure the bo is in an allowed placement
+ * @bo: The bo,
+ * @vm: Pointer to a the vm the bo shares a locked dma_resv object with, or
+ *      NULL. Used together with @allow_res_evict.
+ * @allow_res_evict: Whether it's allowed to evict bos sharing @vm's
+ *                   reservation object.
+ *
+ * Make sure the bo is in allowed placement, migrating it if necessary. If
+ * needed, other bos will be evicted. If bos selected for eviction shares
+ * the @vm's reservation object, they can be evicted iff @allow_res_evict is
+ * set to true, otherwise they will be bypassed.
+ *
+ * Return: 0 on success, negative error code on failure. May return
+ * -EINTR or -ERESTARTSYS if internal waits are interrupted by a signal.
+ */
+int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
+{
+	struct ttm_operation_ctx ctx = {
+		.interruptible = true,
+		.no_wait_gpu = false,
+	};
+
+	if (vm) {
+		lockdep_assert_held(&vm->lock);
+		xe_vm_assert_held(vm);
+
+		ctx.allow_res_evict = allow_res_evict;
+		ctx.resv = xe_vm_resv(vm);
+	}
+
+	return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
+}
+
+bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo)
+{
+	if (bo->destroy == &xe_ttm_bo_destroy)
+		return true;
+
+	return false;
+}
+
+/*
+ * Resolve a BO address. There is no assert to check if the proper lock is held
+ * so it should only be used in cases where it is not fatal to get the wrong
+ * address, such as printing debug information, but not in cases where memory is
+ * written based on this result.
+ */
+dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+	struct xe_res_cursor cur;
+	u64 page;
+
+	xe_assert(xe, page_size <= PAGE_SIZE);
+	page = offset >> PAGE_SHIFT;
+	offset &= (PAGE_SIZE - 1);
+
+	if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) {
+		xe_assert(xe, bo->ttm.ttm);
+
+		xe_res_first_sg(xe_bo_sg(bo), page << PAGE_SHIFT,
+				page_size, &cur);
+		return xe_res_dma(&cur) + offset;
+	} else {
+		struct xe_res_cursor cur;
+
+		xe_res_first(bo->ttm.resource, page << PAGE_SHIFT,
+			     page_size, &cur);
+		return cur.start + offset + vram_region_gpu_offset(bo->ttm.resource);
+	}
+}
+
+dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
+{
+	if (!READ_ONCE(bo->ttm.pin_count))
+		xe_bo_assert_held(bo);
+	return __xe_bo_addr(bo, offset, page_size);
+}
+
+int xe_bo_vmap(struct xe_bo *bo)
+{
+	void *virtual;
+	bool is_iomem;
+	int ret;
+
+	xe_bo_assert_held(bo);
+
+	if (!(bo->flags & XE_BO_NEEDS_CPU_ACCESS))
+		return -EINVAL;
+
+	if (!iosys_map_is_null(&bo->vmap))
+		return 0;
+
+	/*
+	 * We use this more or less deprecated interface for now since
+	 * ttm_bo_vmap() doesn't offer the optimization of kmapping
+	 * single page bos, which is done here.
+	 * TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap
+	 * to use struct iosys_map.
+	 */
+	ret = ttm_bo_kmap(&bo->ttm, 0, bo->size >> PAGE_SHIFT, &bo->kmap);
+	if (ret)
+		return ret;
+
+	virtual = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
+	if (is_iomem)
+		iosys_map_set_vaddr_iomem(&bo->vmap, (void __iomem *)virtual);
+	else
+		iosys_map_set_vaddr(&bo->vmap, virtual);
+
+	return 0;
+}
+
+static void __xe_bo_vunmap(struct xe_bo *bo)
+{
+	if (!iosys_map_is_null(&bo->vmap)) {
+		iosys_map_clear(&bo->vmap);
+		ttm_bo_kunmap(&bo->kmap);
+	}
+}
+
+void xe_bo_vunmap(struct xe_bo *bo)
+{
+	xe_bo_assert_held(bo);
+	__xe_bo_vunmap(bo);
+}
+
+int xe_gem_create_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_gem_create *args = data;
+	struct xe_vm *vm = NULL;
+	struct xe_bo *bo;
+	unsigned int bo_flags;
+	u32 handle;
+	int err;
+
+	if (XE_IOCTL_DBG(xe, args->extensions) ||
+	    XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) ||
+	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+		return -EINVAL;
+
+	/* at least one valid memory placement must be specified */
+	if (XE_IOCTL_DBG(xe, (args->placement & ~xe->info.mem_region_mask) ||
+			 !args->placement))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->flags &
+			 ~(DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING |
+			   DRM_XE_GEM_CREATE_FLAG_SCANOUT |
+			   DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM)))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->handle))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, !args->size))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->size > SIZE_MAX))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->size & ~PAGE_MASK))
+		return -EINVAL;
+
+	bo_flags = 0;
+	if (args->flags & DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING)
+		bo_flags |= XE_BO_DEFER_BACKING;
+
+	if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT)
+		bo_flags |= XE_BO_SCANOUT_BIT;
+
+	bo_flags |= args->placement << (ffs(XE_BO_CREATE_SYSTEM_BIT) - 1);
+
+	if (args->flags & DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) {
+		if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_CREATE_VRAM_MASK)))
+			return -EINVAL;
+
+		bo_flags |= XE_BO_NEEDS_CPU_ACCESS;
+	}
+
+	if (XE_IOCTL_DBG(xe, !args->cpu_caching ||
+			 args->cpu_caching > DRM_XE_GEM_CPU_CACHING_WC))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_CREATE_VRAM_MASK &&
+			 args->cpu_caching != DRM_XE_GEM_CPU_CACHING_WC))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_SCANOUT_BIT &&
+			 args->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB))
+		return -EINVAL;
+
+	if (args->vm_id) {
+		vm = xe_vm_lookup(xef, args->vm_id);
+		if (XE_IOCTL_DBG(xe, !vm))
+			return -ENOENT;
+		err = xe_vm_lock(vm, true);
+		if (err)
+			goto out_vm;
+	}
+
+	bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching,
+			       ttm_bo_type_device, bo_flags);
+
+	if (vm)
+		xe_vm_unlock(vm);
+
+	if (IS_ERR(bo)) {
+		err = PTR_ERR(bo);
+		goto out_vm;
+	}
+
+	err = drm_gem_handle_create(file, &bo->ttm.base, &handle);
+	if (err)
+		goto out_bulk;
+
+	args->handle = handle;
+	goto out_put;
+
+out_bulk:
+	if (vm && !xe_vm_in_fault_mode(vm)) {
+		xe_vm_lock(vm, false);
+		__xe_bo_unset_bulk_move(bo);
+		xe_vm_unlock(vm);
+	}
+out_put:
+	xe_bo_put(bo);
+out_vm:
+	if (vm)
+		xe_vm_put(vm);
+
+	return err;
+}
+
+int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct drm_xe_gem_mmap_offset *args = data;
+	struct drm_gem_object *gem_obj;
+
+	if (XE_IOCTL_DBG(xe, args->extensions) ||
+	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->flags))
+		return -EINVAL;
+
+	gem_obj = drm_gem_object_lookup(file, args->handle);
+	if (XE_IOCTL_DBG(xe, !gem_obj))
+		return -ENOENT;
+
+	/* The mmap offset was set up at BO allocation time. */
+	args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
+
+	xe_bo_put(gem_to_xe_bo(gem_obj));
+	return 0;
+}
+
+/**
+ * xe_bo_lock() - Lock the buffer object's dma_resv object
+ * @bo: The struct xe_bo whose lock is to be taken
+ * @intr: Whether to perform any wait interruptible
+ *
+ * Locks the buffer object's dma_resv object. If the buffer object is
+ * pointing to a shared dma_resv object, that shared lock is locked.
+ *
+ * Return: 0 on success, -EINTR if @intr is true and the wait for a
+ * contended lock was interrupted. If @intr is set to false, the
+ * function always returns 0.
+ */
+int xe_bo_lock(struct xe_bo *bo, bool intr)
+{
+	if (intr)
+		return dma_resv_lock_interruptible(bo->ttm.base.resv, NULL);
+
+	dma_resv_lock(bo->ttm.base.resv, NULL);
+
+	return 0;
+}
+
+/**
+ * xe_bo_unlock() - Unlock the buffer object's dma_resv object
+ * @bo: The struct xe_bo whose lock is to be released.
+ *
+ * Unlock a buffer object lock that was locked by xe_bo_lock().
+ */
+void xe_bo_unlock(struct xe_bo *bo)
+{
+	dma_resv_unlock(bo->ttm.base.resv);
+}
+
+/**
+ * xe_bo_can_migrate - Whether a buffer object likely can be migrated
+ * @bo: The buffer object to migrate
+ * @mem_type: The TTM memory type intended to migrate to
+ *
+ * Check whether the buffer object supports migration to the
+ * given memory type. Note that pinning may affect the ability to migrate as
+ * returned by this function.
+ *
+ * This function is primarily intended as a helper for checking the
+ * possibility to migrate buffer objects and can be called without
+ * the object lock held.
+ *
+ * Return: true if migration is possible, false otherwise.
+ */
+bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type)
+{
+	unsigned int cur_place;
+
+	if (bo->ttm.type == ttm_bo_type_kernel)
+		return true;
+
+	if (bo->ttm.type == ttm_bo_type_sg)
+		return false;
+
+	for (cur_place = 0; cur_place < bo->placement.num_placement;
+	     cur_place++) {
+		if (bo->placements[cur_place].mem_type == mem_type)
+			return true;
+	}
+
+	return false;
+}
+
+static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place)
+{
+	memset(place, 0, sizeof(*place));
+	place->mem_type = mem_type;
+}
+
+/**
+ * xe_bo_migrate - Migrate an object to the desired region id
+ * @bo: The buffer object to migrate.
+ * @mem_type: The TTM region type to migrate to.
+ *
+ * Attempt to migrate the buffer object to the desired memory region. The
+ * buffer object may not be pinned, and must be locked.
+ * On successful completion, the object memory type will be updated,
+ * but an async migration task may not have completed yet, and to
+ * accomplish that, the object's kernel fences must be signaled with
+ * the object lock held.
+ *
+ * Return: 0 on success. Negative error code on failure. In particular may
+ * return -EINTR or -ERESTARTSYS if signal pending.
+ */
+int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
+{
+	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
+	struct ttm_operation_ctx ctx = {
+		.interruptible = true,
+		.no_wait_gpu = false,
+	};
+	struct ttm_placement placement;
+	struct ttm_place requested;
+
+	xe_bo_assert_held(bo);
+
+	if (bo->ttm.resource->mem_type == mem_type)
+		return 0;
+
+	if (xe_bo_is_pinned(bo))
+		return -EBUSY;
+
+	if (!xe_bo_can_migrate(bo, mem_type))
+		return -EINVAL;
+
+	xe_place_from_ttm_type(mem_type, &requested);
+	placement.num_placement = 1;
+	placement.num_busy_placement = 1;
+	placement.placement = &requested;
+	placement.busy_placement = &requested;
+
+	/*
+	 * Stolen needs to be handled like below VRAM handling if we ever need
+	 * to support it.
+	 */
+	drm_WARN_ON(&xe->drm, mem_type == XE_PL_STOLEN);
+
+	if (mem_type_is_vram(mem_type)) {
+		u32 c = 0;
+
+		add_vram(xe, bo, &requested, bo->flags, mem_type, &c);
+	}
+
+	return ttm_bo_validate(&bo->ttm, &placement, &ctx);
+}
+
+/**
+ * xe_bo_evict - Evict an object to evict placement
+ * @bo: The buffer object to migrate.
+ * @force_alloc: Set force_alloc in ttm_operation_ctx
+ *
+ * On successful completion, the object memory will be moved to evict
+ * placement. Ths function blocks until the object has been fully moved.
+ *
+ * Return: 0 on success. Negative error code on failure.
+ */
+int xe_bo_evict(struct xe_bo *bo, bool force_alloc)
+{
+	struct ttm_operation_ctx ctx = {
+		.interruptible = false,
+		.no_wait_gpu = false,
+		.force_alloc = force_alloc,
+	};
+	struct ttm_placement placement;
+	int ret;
+
+	xe_evict_flags(&bo->ttm, &placement);
+	ret = ttm_bo_validate(&bo->ttm, &placement, &ctx);
+	if (ret)
+		return ret;
+
+	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
+			      false, MAX_SCHEDULE_TIMEOUT);
+
+	return 0;
+}
+
+/**
+ * xe_bo_needs_ccs_pages - Whether a bo needs to back up CCS pages when
+ * placed in system memory.
+ * @bo: The xe_bo
+ *
+ * Return: true if extra pages need to be allocated, false otherwise.
+ */
+bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+
+	if (!xe_device_has_flat_ccs(xe) || bo->ttm.type != ttm_bo_type_device)
+		return false;
+
+	/* On discrete GPUs, if the GPU can access this buffer from
+	 * system memory (i.e., it allows XE_PL_TT placement), FlatCCS
+	 * can't be used since there's no CCS storage associated with
+	 * non-VRAM addresses.
+	 */
+	if (IS_DGFX(xe) && (bo->flags & XE_BO_CREATE_SYSTEM_BIT))
+		return false;
+
+	return true;
+}
+
+/**
+ * __xe_bo_release_dummy() - Dummy kref release function
+ * @kref: The embedded struct kref.
+ *
+ * Dummy release function for xe_bo_put_deferred(). Keep off.
+ */
+void __xe_bo_release_dummy(struct kref *kref)
+{
+}
+
+/**
+ * xe_bo_put_commit() - Put bos whose put was deferred by xe_bo_put_deferred().
+ * @deferred: The lockless list used for the call to xe_bo_put_deferred().
+ *
+ * Puts all bos whose put was deferred by xe_bo_put_deferred().
+ * The @deferred list can be either an onstack local list or a global
+ * shared list used by a workqueue.
+ */
+void xe_bo_put_commit(struct llist_head *deferred)
+{
+	struct llist_node *freed;
+	struct xe_bo *bo, *next;
+
+	if (!deferred)
+		return;
+
+	freed = llist_del_all(deferred);
+	if (!freed)
+		return;
+
+	llist_for_each_entry_safe(bo, next, freed, freed)
+		drm_gem_object_free(&bo->ttm.base.refcount);
+}
+
+/**
+ * xe_bo_dumb_create - Create a dumb bo as backing for a fb
+ * @file_priv: ...
+ * @dev: ...
+ * @args: ...
+ *
+ * See dumb_create() hook in include/drm/drm_drv.h
+ *
+ * Return: ...
+ */
+int xe_bo_dumb_create(struct drm_file *file_priv,
+		      struct drm_device *dev,
+		      struct drm_mode_create_dumb *args)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_bo *bo;
+	uint32_t handle;
+	int cpp = DIV_ROUND_UP(args->bpp, 8);
+	int err;
+	u32 page_size = max_t(u32, PAGE_SIZE,
+		xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K);
+
+	args->pitch = ALIGN(args->width * cpp, 64);
+	args->size = ALIGN(mul_u32_u32(args->pitch, args->height),
+			   page_size);
+
+	bo = xe_bo_create_user(xe, NULL, NULL, args->size,
+			       DRM_XE_GEM_CPU_CACHING_WC,
+			       ttm_bo_type_device,
+			       XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
+			       XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT |
+			       XE_BO_NEEDS_CPU_ACCESS);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	err = drm_gem_handle_create(file_priv, &bo->ttm.base, &handle);
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_put(&bo->ttm.base);
+	if (!err)
+		args->handle = handle;
+	return err;
+}
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_bo.c"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
new file mode 100644
index 000000000000..8be42ac6cd07
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -0,0 +1,356 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_BO_H_
+#define _XE_BO_H_
+
+#include <drm/ttm/ttm_tt.h>
+
+#include "xe_bo_types.h"
+#include "xe_macros.h"
+#include "xe_vm_types.h"
+#include "xe_vm.h"
+
+/**
+ * xe_vm_assert_held(vm) - Assert that the vm's reservation object is held.
+ * @vm: The vm
+ */
+#define xe_vm_assert_held(vm) dma_resv_assert_held(xe_vm_resv(vm))
+
+
+
+#define XE_DEFAULT_GTT_SIZE_MB          3072ULL /* 3GB by default */
+
+#define XE_BO_CREATE_USER_BIT		BIT(0)
+/* The bits below need to be contiguous, or things break */
+#define XE_BO_CREATE_SYSTEM_BIT		BIT(1)
+#define XE_BO_CREATE_VRAM0_BIT		BIT(2)
+#define XE_BO_CREATE_VRAM1_BIT		BIT(3)
+#define XE_BO_CREATE_VRAM_MASK		(XE_BO_CREATE_VRAM0_BIT | \
+					 XE_BO_CREATE_VRAM1_BIT)
+/* -- */
+#define XE_BO_CREATE_STOLEN_BIT		BIT(4)
+#define XE_BO_CREATE_VRAM_IF_DGFX(tile) \
+	(IS_DGFX(tile_to_xe(tile)) ? XE_BO_CREATE_VRAM0_BIT << (tile)->id : \
+	 XE_BO_CREATE_SYSTEM_BIT)
+#define XE_BO_CREATE_GGTT_BIT		BIT(5)
+#define XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT BIT(6)
+#define XE_BO_CREATE_PINNED_BIT		BIT(7)
+#define XE_BO_CREATE_NO_RESV_EVICT	BIT(8)
+#define XE_BO_DEFER_BACKING		BIT(9)
+#define XE_BO_SCANOUT_BIT		BIT(10)
+#define XE_BO_FIXED_PLACEMENT_BIT	BIT(11)
+#define XE_BO_PAGETABLE			BIT(12)
+#define XE_BO_NEEDS_CPU_ACCESS		BIT(13)
+/* this one is trigger internally only */
+#define XE_BO_INTERNAL_TEST		BIT(30)
+#define XE_BO_INTERNAL_64K		BIT(31)
+
+#define XELPG_PPGTT_PTE_PAT3		BIT_ULL(62)
+#define XE2_PPGTT_PTE_PAT4		BIT_ULL(61)
+#define XE_PPGTT_PDE_PDPE_PAT2		BIT_ULL(12)
+#define XE_PPGTT_PTE_PAT2		BIT_ULL(7)
+#define XE_PPGTT_PTE_PAT1		BIT_ULL(4)
+#define XE_PPGTT_PTE_PAT0		BIT_ULL(3)
+
+#define XE_PTE_SHIFT			12
+#define XE_PAGE_SIZE			(1 << XE_PTE_SHIFT)
+#define XE_PTE_MASK			(XE_PAGE_SIZE - 1)
+#define XE_PDE_SHIFT			(XE_PTE_SHIFT - 3)
+#define XE_PDES				(1 << XE_PDE_SHIFT)
+#define XE_PDE_MASK			(XE_PDES - 1)
+
+#define XE_64K_PTE_SHIFT		16
+#define XE_64K_PAGE_SIZE		(1 << XE_64K_PTE_SHIFT)
+#define XE_64K_PTE_MASK			(XE_64K_PAGE_SIZE - 1)
+#define XE_64K_PDE_MASK			(XE_PDE_MASK >> 4)
+
+#define XE_PDE_PS_2M			BIT_ULL(7)
+#define XE_PDPE_PS_1G			BIT_ULL(7)
+#define XE_PDE_IPS_64K			BIT_ULL(11)
+
+#define XE_GGTT_PTE_DM			BIT_ULL(1)
+#define XE_USM_PPGTT_PTE_AE		BIT_ULL(10)
+#define XE_PPGTT_PTE_DM			BIT_ULL(11)
+#define XE_PDE_64K			BIT_ULL(6)
+#define XE_PTE_PS64			BIT_ULL(8)
+#define XE_PTE_NULL			BIT_ULL(9)
+
+#define XE_PAGE_PRESENT			BIT_ULL(0)
+#define XE_PAGE_RW			BIT_ULL(1)
+
+#define XE_PL_SYSTEM		TTM_PL_SYSTEM
+#define XE_PL_TT		TTM_PL_TT
+#define XE_PL_VRAM0		TTM_PL_VRAM
+#define XE_PL_VRAM1		(XE_PL_VRAM0 + 1)
+#define XE_PL_STOLEN		(TTM_NUM_MEM_TYPES - 1)
+
+#define XE_BO_PROPS_INVALID	(-1)
+
+struct sg_table;
+
+struct xe_bo *xe_bo_alloc(void);
+void xe_bo_free(struct xe_bo *bo);
+
+struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
+				     struct xe_tile *tile, struct dma_resv *resv,
+				     struct ttm_lru_bulk_move *bulk, size_t size,
+				     u16 cpu_caching, enum ttm_bo_type type,
+				     u32 flags);
+struct xe_bo *
+xe_bo_create_locked_range(struct xe_device *xe,
+			  struct xe_tile *tile, struct xe_vm *vm,
+			  size_t size, u64 start, u64 end,
+			  enum ttm_bo_type type, u32 flags);
+struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
+				  struct xe_vm *vm, size_t size,
+				  enum ttm_bo_type type, u32 flags);
+struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile,
+			   struct xe_vm *vm, size_t size,
+			   enum ttm_bo_type type, u32 flags);
+struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
+				struct xe_vm *vm, size_t size,
+				u16 cpu_caching,
+				enum ttm_bo_type type,
+				u32 flags);
+struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
+				   struct xe_vm *vm, size_t size,
+				   enum ttm_bo_type type, u32 flags);
+struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile,
+				      struct xe_vm *vm, size_t size, u64 offset,
+				      enum ttm_bo_type type, u32 flags);
+struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
+				     const void *data, size_t size,
+				     enum ttm_bo_type type, u32 flags);
+struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
+					   size_t size, u32 flags);
+struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
+					     const void *data, size_t size, u32 flags);
+
+int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
+			      u32 bo_flags);
+
+static inline struct xe_bo *ttm_to_xe_bo(const struct ttm_buffer_object *bo)
+{
+	return container_of(bo, struct xe_bo, ttm);
+}
+
+static inline struct xe_bo *gem_to_xe_bo(const struct drm_gem_object *obj)
+{
+	return container_of(obj, struct xe_bo, ttm.base);
+}
+
+#define xe_bo_device(bo) ttm_to_xe_device((bo)->ttm.bdev)
+
+static inline struct xe_bo *xe_bo_get(struct xe_bo *bo)
+{
+	if (bo)
+		drm_gem_object_get(&bo->ttm.base);
+
+	return bo;
+}
+
+static inline void xe_bo_put(struct xe_bo *bo)
+{
+	if (bo)
+		drm_gem_object_put(&bo->ttm.base);
+}
+
+static inline void __xe_bo_unset_bulk_move(struct xe_bo *bo)
+{
+	if (bo)
+		ttm_bo_set_bulk_move(&bo->ttm, NULL);
+}
+
+static inline void xe_bo_assert_held(struct xe_bo *bo)
+{
+	if (bo)
+		dma_resv_assert_held((bo)->ttm.base.resv);
+}
+
+int xe_bo_lock(struct xe_bo *bo, bool intr);
+
+void xe_bo_unlock(struct xe_bo *bo);
+
+static inline void xe_bo_unlock_vm_held(struct xe_bo *bo)
+{
+	if (bo) {
+		XE_WARN_ON(bo->vm && bo->ttm.base.resv != xe_vm_resv(bo->vm));
+		if (bo->vm)
+			xe_vm_assert_held(bo->vm);
+		else
+			dma_resv_unlock(bo->ttm.base.resv);
+	}
+}
+
+int xe_bo_pin_external(struct xe_bo *bo);
+int xe_bo_pin(struct xe_bo *bo);
+void xe_bo_unpin_external(struct xe_bo *bo);
+void xe_bo_unpin(struct xe_bo *bo);
+int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict);
+
+static inline bool xe_bo_is_pinned(struct xe_bo *bo)
+{
+	return bo->ttm.pin_count;
+}
+
+static inline void xe_bo_unpin_map_no_vm(struct xe_bo *bo)
+{
+	if (likely(bo)) {
+		xe_bo_lock(bo, false);
+		xe_bo_unpin(bo);
+		xe_bo_unlock(bo);
+
+		xe_bo_put(bo);
+	}
+}
+
+bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo);
+dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size);
+dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size);
+
+static inline dma_addr_t
+xe_bo_main_addr(struct xe_bo *bo, size_t page_size)
+{
+	return xe_bo_addr(bo, 0, page_size);
+}
+
+static inline u32
+xe_bo_ggtt_addr(struct xe_bo *bo)
+{
+	XE_WARN_ON(bo->ggtt_node.size > bo->size);
+	XE_WARN_ON(bo->ggtt_node.start + bo->ggtt_node.size > (1ull << 32));
+	return bo->ggtt_node.start;
+}
+
+int xe_bo_vmap(struct xe_bo *bo);
+void xe_bo_vunmap(struct xe_bo *bo);
+
+bool mem_type_is_vram(u32 mem_type);
+bool xe_bo_is_vram(struct xe_bo *bo);
+bool xe_bo_is_stolen(struct xe_bo *bo);
+bool xe_bo_is_stolen_devmem(struct xe_bo *bo);
+uint64_t vram_region_gpu_offset(struct ttm_resource *res);
+
+bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type);
+
+int xe_bo_migrate(struct xe_bo *bo, u32 mem_type);
+int xe_bo_evict(struct xe_bo *bo, bool force_alloc);
+
+int xe_bo_evict_pinned(struct xe_bo *bo);
+int xe_bo_restore_pinned(struct xe_bo *bo);
+
+extern struct ttm_device_funcs xe_ttm_funcs;
+extern const char *const xe_mem_type_to_name[];
+
+int xe_gem_create_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file);
+int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file);
+int xe_bo_dumb_create(struct drm_file *file_priv,
+		      struct drm_device *dev,
+		      struct drm_mode_create_dumb *args);
+
+bool xe_bo_needs_ccs_pages(struct xe_bo *bo);
+
+static inline size_t xe_bo_ccs_pages_start(struct xe_bo *bo)
+{
+	return PAGE_ALIGN(bo->ttm.base.size);
+}
+
+static inline bool xe_bo_has_pages(struct xe_bo *bo)
+{
+	if ((bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm)) ||
+	    xe_bo_is_vram(bo))
+		return true;
+
+	return false;
+}
+
+void __xe_bo_release_dummy(struct kref *kref);
+
+/**
+ * xe_bo_put_deferred() - Put a buffer object with delayed final freeing
+ * @bo: The bo to put.
+ * @deferred: List to which to add the buffer object if we cannot put, or
+ * NULL if the function is to put unconditionally.
+ *
+ * Since the final freeing of an object includes both sleeping and (!)
+ * memory allocation in the dma_resv individualization, it's not ok
+ * to put an object from atomic context nor from within a held lock
+ * tainted by reclaim. In such situations we want to defer the final
+ * freeing until we've exited the restricting context, or in the worst
+ * case to a workqueue.
+ * This function either puts the object if possible without the refcount
+ * reaching zero, or adds it to the @deferred list if that was not possible.
+ * The caller needs to follow up with a call to xe_bo_put_commit() to actually
+ * put the bo iff this function returns true. It's safe to always
+ * follow up with a call to xe_bo_put_commit().
+ * TODO: It's TTM that is the villain here. Perhaps TTM should add an
+ * interface like this.
+ *
+ * Return: true if @bo was the first object put on the @freed list,
+ * false otherwise.
+ */
+static inline bool
+xe_bo_put_deferred(struct xe_bo *bo, struct llist_head *deferred)
+{
+	if (!deferred) {
+		xe_bo_put(bo);
+		return false;
+	}
+
+	if (!kref_put(&bo->ttm.base.refcount, __xe_bo_release_dummy))
+		return false;
+
+	return llist_add(&bo->freed, deferred);
+}
+
+void xe_bo_put_commit(struct llist_head *deferred);
+
+struct sg_table *xe_bo_sg(struct xe_bo *bo);
+
+/*
+ * xe_sg_segment_size() - Provides upper limit for sg segment size.
+ * @dev: device pointer
+ *
+ * Returns the maximum segment size for the 'struct scatterlist'
+ * elements.
+ */
+static inline unsigned int xe_sg_segment_size(struct device *dev)
+{
+	struct scatterlist __maybe_unused sg;
+	size_t max = BIT_ULL(sizeof(sg.length) * 8) - 1;
+
+	max = min_t(size_t, max, dma_max_mapping_size(dev));
+
+	/*
+	 * The iommu_dma_map_sg() function ensures iova allocation doesn't
+	 * cross dma segment boundary. It does so by padding some sg elements.
+	 * This can cause overflow, ending up with sg->length being set to 0.
+	 * Avoid this by ensuring maximum segment size is half of 'max'
+	 * rounded down to PAGE_SIZE.
+	 */
+	return round_down(max / 2, PAGE_SIZE);
+}
+
+#define i915_gem_object_flush_if_display(obj)		((void)(obj))
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+/**
+ * xe_bo_is_mem_type - Whether the bo currently resides in the given
+ * TTM memory type
+ * @bo: The bo to check.
+ * @mem_type: The TTM memory type.
+ *
+ * Return: true iff the bo resides in @mem_type, false otherwise.
+ */
+static inline bool xe_bo_is_mem_type(struct xe_bo *bo, u32 mem_type)
+{
+	xe_bo_assert_held(bo);
+	return bo->ttm.resource->mem_type == mem_type;
+}
+#endif
+#endif
diff --git a/drivers/gpu/drm/xe/xe_bo_doc.h b/drivers/gpu/drm/xe/xe_bo_doc.h
new file mode 100644
index 000000000000..f57d440cc95a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bo_doc.h
@@ -0,0 +1,179 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_BO_DOC_H_
+#define _XE_BO_DOC_H_
+
+/**
+ * DOC: Buffer Objects (BO)
+ *
+ * BO management
+ * =============
+ *
+ * TTM manages (placement, eviction, etc...) all BOs in XE.
+ *
+ * BO creation
+ * ===========
+ *
+ * Create a chunk of memory which can be used by the GPU. Placement rules
+ * (sysmem or vram region) passed in upon creation. TTM handles placement of BO
+ * and can trigger eviction of other BOs to make space for the new BO.
+ *
+ * Kernel BOs
+ * ----------
+ *
+ * A kernel BO is created as part of driver load (e.g. uC firmware images, GuC
+ * ADS, etc...) or a BO created as part of a user operation which requires
+ * a kernel BO (e.g. engine state, memory for page tables, etc...). These BOs
+ * are typically mapped in the GGTT (any kernel BOs aside memory for page tables
+ * are in the GGTT), are pinned (can't move or be evicted at runtime), have a
+ * vmap (XE can access the memory via xe_map layer) and have contiguous physical
+ * memory.
+ *
+ * More details of why kernel BOs are pinned and contiguous below.
+ *
+ * User BOs
+ * --------
+ *
+ * A user BO is created via the DRM_IOCTL_XE_GEM_CREATE IOCTL. Once it is
+ * created the BO can be mmap'd (via DRM_IOCTL_XE_GEM_MMAP_OFFSET) for user
+ * access and it can be bound for GPU access (via DRM_IOCTL_XE_VM_BIND). All
+ * user BOs are evictable and user BOs are never pinned by XE. The allocation of
+ * the backing store can be defered from creation time until first use which is
+ * either mmap, bind, or pagefault.
+ *
+ * Private BOs
+ * ~~~~~~~~~~~
+ *
+ * A private BO is a user BO created with a valid VM argument passed into the
+ * create IOCTL. If a BO is private it cannot be exported via prime FD and
+ * mappings can only be created for the BO within the VM it is tied to. Lastly,
+ * the BO dma-resv slots / lock point to the VM's dma-resv slots / lock (all
+ * private BOs to a VM share common dma-resv slots / lock).
+ *
+ * External BOs
+ * ~~~~~~~~~~~~
+ *
+ * An external BO is a user BO created with a NULL VM argument passed into the
+ * create IOCTL. An external BO can be shared with different UMDs / devices via
+ * prime FD and the BO can be mapped into multiple VMs. An external BO has its
+ * own unique dma-resv slots / lock. An external BO will be in an array of all
+ * VMs which has a mapping of the BO. This allows VMs to lookup and lock all
+ * external BOs mapped in the VM as needed.
+ *
+ * BO placement
+ * ~~~~~~~~~~~~
+ *
+ * When a user BO is created, a mask of valid placements is passed indicating
+ * which memory regions are considered valid.
+ *
+ * The memory region information is available via query uAPI (TODO: add link).
+ *
+ * BO validation
+ * =============
+ *
+ * BO validation (ttm_bo_validate) refers to ensuring a BO has a valid
+ * placement. If a BO was swapped to temporary storage, a validation call will
+ * trigger a move back to a valid (location where GPU can access BO) placement.
+ * Validation of a BO may evict other BOs to make room for the BO being
+ * validated.
+ *
+ * BO eviction / moving
+ * ====================
+ *
+ * All eviction (or in other words, moving a BO from one memory location to
+ * another) is routed through TTM with a callback into XE.
+ *
+ * Runtime eviction
+ * ----------------
+ *
+ * Runtime evictions refers to during normal operations where TTM decides it
+ * needs to move a BO. Typically this is because TTM needs to make room for
+ * another BO and the evicted BO is first BO on LRU list that is not locked.
+ *
+ * An example of this is a new BO which can only be placed in VRAM but there is
+ * not space in VRAM. There could be multiple BOs which have sysmem and VRAM
+ * placement rules which currently reside in VRAM, TTM trigger a will move of
+ * one (or multiple) of these BO(s) until there is room in VRAM to place the new
+ * BO. The evicted BO(s) are valid but still need new bindings before the BO
+ * used again (exec or compute mode rebind worker).
+ *
+ * Another example would be, TTM can't find a BO to evict which has another
+ * valid placement. In this case TTM will evict one (or multiple) unlocked BO(s)
+ * to a temporary unreachable (invalid) placement. The evicted BO(s) are invalid
+ * and before next use need to be moved to a valid placement and rebound.
+ *
+ * In both cases, moves of these BOs are scheduled behind the fences in the BO's
+ * dma-resv slots.
+ *
+ * WW locking tries to ensures if 2 VMs use 51% of the memory forward progress
+ * is made on both VMs.
+ *
+ * Runtime eviction uses per a GT migration engine (TODO: link to migration
+ * engine doc) to do a GPU memcpy from one location to another.
+ *
+ * Rebinds after runtime eviction
+ * ------------------------------
+ *
+ * When BOs are moved, every mapping (VMA) of the BO needs to rebound before
+ * the BO is used again. Every VMA is added to an evicted list of its VM when
+ * the BO is moved. This is safe because of the VM locking structure (TODO: link
+ * to VM locking doc). On the next use of a VM (exec or compute mode rebind
+ * worker) the evicted VMA list is checked and rebinds are triggered. In the
+ * case of faulting VM, the rebind is done in the page fault handler.
+ *
+ * Suspend / resume eviction of VRAM
+ * ---------------------------------
+ *
+ * During device suspend / resume VRAM may lose power which means the contents
+ * of VRAM's memory is blown away. Thus BOs present in VRAM at the time of
+ * suspend must be moved to sysmem in order for their contents to be saved.
+ *
+ * A simple TTM call (ttm_resource_manager_evict_all) can move all non-pinned
+ * (user) BOs to sysmem. External BOs that are pinned need to be manually
+ * evicted with a simple loop + xe_bo_evict call. It gets a little trickier
+ * with kernel BOs.
+ *
+ * Some kernel BOs are used by the GT migration engine to do moves, thus we
+ * can't move all of the BOs via the GT migration engine. For simplity, use a
+ * TTM memcpy (CPU) to move any kernel (pinned) BO on either suspend or resume.
+ *
+ * Some kernel BOs need to be restored to the exact same physical location. TTM
+ * makes this rather easy but the caveat is the memory must be contiguous. Again
+ * for simplity, we enforce that all kernel (pinned) BOs are contiguous and
+ * restored to the same physical location.
+ *
+ * Pinned external BOs in VRAM are restored on resume via the GPU.
+ *
+ * Rebinds after suspend / resume
+ * ------------------------------
+ *
+ * Most kernel BOs have GGTT mappings which must be restored during the resume
+ * process. All user BOs are rebound after validation on their next use.
+ *
+ * Future work
+ * ===========
+ *
+ * Trim the list of BOs which is saved / restored via TTM memcpy on suspend /
+ * resume. All we really need to save / restore via TTM memcpy is the memory
+ * required for the GuC to load and the memory for the GT migrate engine to
+ * operate.
+ *
+ * Do not require kernel BOs to be contiguous in physical memory / restored to
+ * the same physical address on resume. In all likelihood the only memory that
+ * needs to be restored to the same physical address is memory used for page
+ * tables. All of that memory is allocated 1 page at time so the contiguous
+ * requirement isn't needed. Some work on the vmap code would need to be done if
+ * kernel BOs are not contiguous too.
+ *
+ * Make some kernel BO evictable rather than pinned. An example of this would be
+ * engine state, in all likelihood if the dma-slots of these BOs where properly
+ * used rather than pinning we could safely evict + rebind these BOs as needed.
+ *
+ * Some kernel BOs do not need to be restored on resume (e.g. GuC ADS as that is
+ * repopulated on resume), add flag to mark such objects as no save / restore.
+ */
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c
new file mode 100644
index 000000000000..7a264a9ca06e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bo_evict.c
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_bo_evict.h"
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_ggtt.h"
+#include "xe_tile.h"
+
+/**
+ * xe_bo_evict_all - evict all BOs from VRAM
+ *
+ * @xe: xe device
+ *
+ * Evict non-pinned user BOs first (via GPU), evict pinned external BOs next
+ * (via GPU), wait for evictions, and finally evict pinned kernel BOs via CPU.
+ * All eviction magic done via TTM calls.
+ *
+ * Evict == move VRAM BOs to temporary (typically system) memory.
+ *
+ * This function should be called before the device goes into a suspend state
+ * where the VRAM loses power.
+ */
+int xe_bo_evict_all(struct xe_device *xe)
+{
+	struct ttm_device *bdev = &xe->ttm;
+	struct xe_bo *bo;
+	struct xe_tile *tile;
+	struct list_head still_in_list;
+	u32 mem_type;
+	u8 id;
+	int ret;
+
+	if (!IS_DGFX(xe))
+		return 0;
+
+	/* User memory */
+	for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) {
+		struct ttm_resource_manager *man =
+			ttm_manager_type(bdev, mem_type);
+
+		if (man) {
+			ret = ttm_resource_manager_evict_all(bdev, man);
+			if (ret)
+				return ret;
+		}
+	}
+
+	/* Pinned user memory in VRAM */
+	INIT_LIST_HEAD(&still_in_list);
+	spin_lock(&xe->pinned.lock);
+	for (;;) {
+		bo = list_first_entry_or_null(&xe->pinned.external_vram,
+					      typeof(*bo), pinned_link);
+		if (!bo)
+			break;
+		xe_bo_get(bo);
+		list_move_tail(&bo->pinned_link, &still_in_list);
+		spin_unlock(&xe->pinned.lock);
+
+		xe_bo_lock(bo, false);
+		ret = xe_bo_evict_pinned(bo);
+		xe_bo_unlock(bo);
+		xe_bo_put(bo);
+		if (ret) {
+			spin_lock(&xe->pinned.lock);
+			list_splice_tail(&still_in_list,
+					 &xe->pinned.external_vram);
+			spin_unlock(&xe->pinned.lock);
+			return ret;
+		}
+
+		spin_lock(&xe->pinned.lock);
+	}
+	list_splice_tail(&still_in_list, &xe->pinned.external_vram);
+	spin_unlock(&xe->pinned.lock);
+
+	/*
+	 * Wait for all user BO to be evicted as those evictions depend on the
+	 * memory moved below.
+	 */
+	for_each_tile(tile, xe, id)
+		xe_tile_migrate_wait(tile);
+
+	spin_lock(&xe->pinned.lock);
+	for (;;) {
+		bo = list_first_entry_or_null(&xe->pinned.kernel_bo_present,
+					      typeof(*bo), pinned_link);
+		if (!bo)
+			break;
+		xe_bo_get(bo);
+		list_move_tail(&bo->pinned_link, &xe->pinned.evicted);
+		spin_unlock(&xe->pinned.lock);
+
+		xe_bo_lock(bo, false);
+		ret = xe_bo_evict_pinned(bo);
+		xe_bo_unlock(bo);
+		xe_bo_put(bo);
+		if (ret)
+			return ret;
+
+		spin_lock(&xe->pinned.lock);
+	}
+	spin_unlock(&xe->pinned.lock);
+
+	return 0;
+}
+
+/**
+ * xe_bo_restore_kernel - restore kernel BOs to VRAM
+ *
+ * @xe: xe device
+ *
+ * Move kernel BOs from temporary (typically system) memory to VRAM via CPU. All
+ * moves done via TTM calls.
+ *
+ * This function should be called early, before trying to init the GT, on device
+ * resume.
+ */
+int xe_bo_restore_kernel(struct xe_device *xe)
+{
+	struct xe_bo *bo;
+	int ret;
+
+	if (!IS_DGFX(xe))
+		return 0;
+
+	spin_lock(&xe->pinned.lock);
+	for (;;) {
+		bo = list_first_entry_or_null(&xe->pinned.evicted,
+					      typeof(*bo), pinned_link);
+		if (!bo)
+			break;
+		xe_bo_get(bo);
+		list_move_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
+		spin_unlock(&xe->pinned.lock);
+
+		xe_bo_lock(bo, false);
+		ret = xe_bo_restore_pinned(bo);
+		xe_bo_unlock(bo);
+		if (ret) {
+			xe_bo_put(bo);
+			return ret;
+		}
+
+		if (bo->flags & XE_BO_CREATE_GGTT_BIT) {
+			struct xe_tile *tile = bo->tile;
+
+			mutex_lock(&tile->mem.ggtt->lock);
+			xe_ggtt_map_bo(tile->mem.ggtt, bo);
+			mutex_unlock(&tile->mem.ggtt->lock);
+		}
+
+		/*
+		 * We expect validate to trigger a move VRAM and our move code
+		 * should setup the iosys map.
+		 */
+		xe_assert(xe, !iosys_map_is_null(&bo->vmap));
+		xe_assert(xe, xe_bo_is_vram(bo));
+
+		xe_bo_put(bo);
+
+		spin_lock(&xe->pinned.lock);
+	}
+	spin_unlock(&xe->pinned.lock);
+
+	return 0;
+}
+
+/**
+ * xe_bo_restore_user - restore pinned user BOs to VRAM
+ *
+ * @xe: xe device
+ *
+ * Move pinned user BOs from temporary (typically system) memory to VRAM via
+ * CPU. All moves done via TTM calls.
+ *
+ * This function should be called late, after GT init, on device resume.
+ */
+int xe_bo_restore_user(struct xe_device *xe)
+{
+	struct xe_bo *bo;
+	struct xe_tile *tile;
+	struct list_head still_in_list;
+	u8 id;
+	int ret;
+
+	if (!IS_DGFX(xe))
+		return 0;
+
+	/* Pinned user memory in VRAM should be validated on resume */
+	INIT_LIST_HEAD(&still_in_list);
+	spin_lock(&xe->pinned.lock);
+	for (;;) {
+		bo = list_first_entry_or_null(&xe->pinned.external_vram,
+					      typeof(*bo), pinned_link);
+		if (!bo)
+			break;
+		list_move_tail(&bo->pinned_link, &still_in_list);
+		xe_bo_get(bo);
+		spin_unlock(&xe->pinned.lock);
+
+		xe_bo_lock(bo, false);
+		ret = xe_bo_restore_pinned(bo);
+		xe_bo_unlock(bo);
+		xe_bo_put(bo);
+		if (ret) {
+			spin_lock(&xe->pinned.lock);
+			list_splice_tail(&still_in_list,
+					 &xe->pinned.external_vram);
+			spin_unlock(&xe->pinned.lock);
+			return ret;
+		}
+
+		spin_lock(&xe->pinned.lock);
+	}
+	list_splice_tail(&still_in_list, &xe->pinned.external_vram);
+	spin_unlock(&xe->pinned.lock);
+
+	/* Wait for validate to complete */
+	for_each_tile(tile, xe, id)
+		xe_tile_migrate_wait(tile);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_bo_evict.h b/drivers/gpu/drm/xe/xe_bo_evict.h
new file mode 100644
index 000000000000..746894798852
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bo_evict.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_BO_EVICT_H_
+#define _XE_BO_EVICT_H_
+
+struct xe_device;
+
+int xe_bo_evict_all(struct xe_device *xe);
+int xe_bo_restore_kernel(struct xe_device *xe);
+int xe_bo_restore_user(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
new file mode 100644
index 000000000000..64c2249a4e40
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_BO_TYPES_H_
+#define _XE_BO_TYPES_H_
+
+#include <linux/iosys-map.h>
+
+#include <drm/drm_mm.h>
+#include <drm/ttm/ttm_bo.h>
+#include <drm/ttm/ttm_device.h>
+#include <drm/ttm/ttm_execbuf_util.h>
+#include <drm/ttm/ttm_placement.h>
+
+struct xe_device;
+struct xe_vm;
+
+#define XE_BO_MAX_PLACEMENTS	3
+
+/* TODO: To be selected with VM_MADVISE */
+#define	XE_BO_PRIORITY_NORMAL	1
+
+/** @xe_bo: XE buffer object */
+struct xe_bo {
+	/** @ttm: TTM base buffer object */
+	struct ttm_buffer_object ttm;
+	/** @size: Size of this buffer object */
+	size_t size;
+	/** @flags: flags for this buffer object */
+	u32 flags;
+	/** @vm: VM this BO is attached to, for extobj this will be NULL */
+	struct xe_vm *vm;
+	/** @tile: Tile this BO is attached to (kernel BO only) */
+	struct xe_tile *tile;
+	/** @placements: valid placements for this BO */
+	struct ttm_place placements[XE_BO_MAX_PLACEMENTS];
+	/** @placement: current placement for this BO */
+	struct ttm_placement placement;
+	/** @ggtt_node: GGTT node if this BO is mapped in the GGTT */
+	struct drm_mm_node ggtt_node;
+	/** @vmap: iosys map of this buffer */
+	struct iosys_map vmap;
+	/** @ttm_kmap: TTM bo kmap object for internal use only. Keep off. */
+	struct ttm_bo_kmap_obj kmap;
+	/** @pinned_link: link to present / evicted list of pinned BO */
+	struct list_head pinned_link;
+#ifdef CONFIG_PROC_FS
+	/**
+	 * @client: @xe_drm_client which created the bo
+	 */
+	struct xe_drm_client *client;
+	/**
+	 * @client_link: Link into @xe_drm_client.objects_list
+	 */
+	struct list_head client_link;
+#endif
+	/** @props: BO user controlled properties */
+	struct {
+		/** @preferred_mem: preferred memory class for this BO */
+		s16 preferred_mem_class;
+		/** @prefered_gt: preferred GT for this BO */
+		s16 preferred_gt;
+		/** @preferred_mem_type: preferred memory type */
+		s32 preferred_mem_type;
+		/**
+		 * @cpu_atomic: the CPU expects to do atomics operations to
+		 * this BO
+		 */
+		bool cpu_atomic;
+		/**
+		 * @device_atomic: the device expects to do atomics operations
+		 * to this BO
+		 */
+		bool device_atomic;
+	} props;
+	/** @freed: List node for delayed put. */
+	struct llist_node freed;
+	/** @created: Whether the bo has passed initial creation */
+	bool created;
+
+	/** @ccs_cleared */
+	bool ccs_cleared;
+
+	/**
+	 * @cpu_caching: CPU caching mode. Currently only used for userspace
+	 * objects.
+	 */
+	u16 cpu_caching;
+};
+
+#define intel_bo_to_drm_bo(bo) (&(bo)->ttm.base)
+#define intel_bo_to_i915(bo) to_i915(intel_bo_to_drm_bo(bo)->dev)
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
new file mode 100644
index 000000000000..c56fd7d59f05
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_debugfs.h"
+
+#include <linux/string_helpers.h>
+
+#include <drm/drm_debugfs.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt_debugfs.h"
+#include "xe_step.h"
+
+#ifdef CONFIG_DRM_XE_DEBUG
+#include "xe_bo_evict.h"
+#include "xe_migrate.h"
+#include "xe_vm.h"
+#endif
+
+#ifdef CONFIG_FAULT_INJECTION
+#include <linux/fault-inject.h> /* XXX: fault-inject.h is broken */
+DECLARE_FAULT_ATTR(gt_reset_failure);
+#endif
+
+static struct xe_device *node_to_xe(struct drm_info_node *node)
+{
+	return to_xe_device(node->minor->dev);
+}
+
+static int info(struct seq_file *m, void *data)
+{
+	struct xe_device *xe = node_to_xe(m->private);
+	struct drm_printer p = drm_seq_file_printer(m);
+	struct xe_gt *gt;
+	u8 id;
+
+	drm_printf(&p, "graphics_verx100 %d\n", xe->info.graphics_verx100);
+	drm_printf(&p, "media_verx100 %d\n", xe->info.media_verx100);
+	drm_printf(&p, "stepping G:%s M:%s D:%s B:%s\n",
+		   xe_step_name(xe->info.step.graphics),
+		   xe_step_name(xe->info.step.media),
+		   xe_step_name(xe->info.step.display),
+		   xe_step_name(xe->info.step.basedie));
+	drm_printf(&p, "is_dgfx %s\n", str_yes_no(xe->info.is_dgfx));
+	drm_printf(&p, "platform %d\n", xe->info.platform);
+	drm_printf(&p, "subplatform %d\n",
+		   xe->info.subplatform > XE_SUBPLATFORM_NONE ? xe->info.subplatform : 0);
+	drm_printf(&p, "devid 0x%x\n", xe->info.devid);
+	drm_printf(&p, "revid %d\n", xe->info.revid);
+	drm_printf(&p, "tile_count %d\n", xe->info.tile_count);
+	drm_printf(&p, "vm_max_level %d\n", xe->info.vm_max_level);
+	drm_printf(&p, "force_execlist %s\n", str_yes_no(xe->info.force_execlist));
+	drm_printf(&p, "has_flat_ccs %s\n", str_yes_no(xe->info.has_flat_ccs));
+	drm_printf(&p, "has_usm %s\n", str_yes_no(xe->info.has_usm));
+	for_each_gt(gt, xe, id) {
+		drm_printf(&p, "gt%d force wake %d\n", id,
+			   xe_force_wake_ref(gt_to_fw(gt), XE_FW_GT));
+		drm_printf(&p, "gt%d engine_mask 0x%llx\n", id,
+			   gt->info.engine_mask);
+	}
+
+	return 0;
+}
+
+static const struct drm_info_list debugfs_list[] = {
+	{"info", info, 0},
+};
+
+static int forcewake_open(struct inode *inode, struct file *file)
+{
+	struct xe_device *xe = inode->i_private;
+	struct xe_gt *gt;
+	u8 id;
+
+	xe_device_mem_access_get(xe);
+
+	for_each_gt(gt, xe, id)
+		XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+
+	return 0;
+}
+
+static int forcewake_release(struct inode *inode, struct file *file)
+{
+	struct xe_device *xe = inode->i_private;
+	struct xe_gt *gt;
+	u8 id;
+
+	for_each_gt(gt, xe, id)
+		XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+
+	xe_device_mem_access_put(xe);
+
+	return 0;
+}
+
+static const struct file_operations forcewake_all_fops = {
+	.owner = THIS_MODULE,
+	.open = forcewake_open,
+	.release = forcewake_release,
+};
+
+void xe_debugfs_register(struct xe_device *xe)
+{
+	struct ttm_device *bdev = &xe->ttm;
+	struct drm_minor *minor = xe->drm.primary;
+	struct dentry *root = minor->debugfs_root;
+	struct ttm_resource_manager *man;
+	struct xe_gt *gt;
+	u32 mem_type;
+	u8 id;
+
+	drm_debugfs_create_files(debugfs_list,
+				 ARRAY_SIZE(debugfs_list),
+				 root, minor);
+
+	debugfs_create_file("forcewake_all", 0400, root, xe,
+			    &forcewake_all_fops);
+
+	for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) {
+		man = ttm_manager_type(bdev, mem_type);
+
+		if (man) {
+			char name[16];
+
+			sprintf(name, "vram%d_mm", mem_type - XE_PL_VRAM0);
+			ttm_resource_manager_create_debugfs(man, root, name);
+		}
+	}
+
+	man = ttm_manager_type(bdev, XE_PL_TT);
+	ttm_resource_manager_create_debugfs(man, root, "gtt_mm");
+
+	man = ttm_manager_type(bdev, XE_PL_STOLEN);
+	if (man)
+		ttm_resource_manager_create_debugfs(man, root, "stolen_mm");
+
+	for_each_gt(gt, xe, id)
+		xe_gt_debugfs_register(gt);
+
+#ifdef CONFIG_FAULT_INJECTION
+	fault_create_debugfs_attr("fail_gt_reset", root, &gt_reset_failure);
+#endif
+
+}
diff --git a/drivers/gpu/drm/xe/xe_debugfs.h b/drivers/gpu/drm/xe/xe_debugfs.h
new file mode 100644
index 000000000000..715b8e2e0bd9
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_debugfs.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_DEBUGFS_H_
+#define _XE_DEBUGFS_H_
+
+struct xe_device;
+
+void xe_debugfs_register(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
new file mode 100644
index 000000000000..68abc0b195be
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_devcoredump.h"
+#include "xe_devcoredump_types.h"
+
+#include <linux/devcoredump.h>
+#include <generated/utsrelease.h>
+
+#include "xe_device.h"
+#include "xe_exec_queue.h"
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_guc_ct.h"
+#include "xe_guc_submit.h"
+#include "xe_hw_engine.h"
+
+/**
+ * DOC: Xe device coredump
+ *
+ * Devices overview:
+ * Xe uses dev_coredump infrastructure for exposing the crash errors in a
+ * standardized way.
+ * devcoredump exposes a temporary device under /sys/class/devcoredump/
+ * which is linked with our card device directly.
+ * The core dump can be accessed either from
+ * /sys/class/drm/card<n>/device/devcoredump/ or from
+ * /sys/class/devcoredump/devcd<m> where
+ * /sys/class/devcoredump/devcd<m>/failing_device is a link to
+ * /sys/class/drm/card<n>/device/.
+ *
+ * Snapshot at hang:
+ * The 'data' file is printed with a drm_printer pointer at devcoredump read
+ * time. For this reason, we need to take snapshots from when the hang has
+ * happened, and not only when the user is reading the file. Otherwise the
+ * information is outdated since the resets might have happened in between.
+ *
+ * 'First' failure snapshot:
+ * In general, the first hang is the most critical one since the following hangs
+ * can be a consequence of the initial hang. For this reason we only take the
+ * snapshot of the 'first' failure and ignore subsequent calls of this function,
+ * at least while the coredump device is alive. Dev_coredump has a delayed work
+ * queue that will eventually delete the device and free all the dump
+ * information.
+ */
+
+#ifdef CONFIG_DEV_COREDUMP
+
+static struct xe_device *coredump_to_xe(const struct xe_devcoredump *coredump)
+{
+	return container_of(coredump, struct xe_device, devcoredump);
+}
+
+static struct xe_guc *exec_queue_to_guc(struct xe_exec_queue *q)
+{
+	return &q->gt->uc.guc;
+}
+
+static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
+				   size_t count, void *data, size_t datalen)
+{
+	struct xe_devcoredump *coredump = data;
+	struct xe_devcoredump_snapshot *ss;
+	struct drm_printer p;
+	struct drm_print_iterator iter;
+	struct timespec64 ts;
+	int i;
+
+	/* Our device is gone already... */
+	if (!data || !coredump_to_xe(coredump))
+		return -ENODEV;
+
+	iter.data = buffer;
+	iter.offset = 0;
+	iter.start = offset;
+	iter.remain = count;
+
+	ss = &coredump->snapshot;
+	p = drm_coredump_printer(&iter);
+
+	drm_printf(&p, "**** Xe Device Coredump ****\n");
+	drm_printf(&p, "kernel: " UTS_RELEASE "\n");
+	drm_printf(&p, "module: " KBUILD_MODNAME "\n");
+
+	ts = ktime_to_timespec64(ss->snapshot_time);
+	drm_printf(&p, "Snapshot time: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec);
+	ts = ktime_to_timespec64(ss->boot_time);
+	drm_printf(&p, "Uptime: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec);
+
+	drm_printf(&p, "\n**** GuC CT ****\n");
+	xe_guc_ct_snapshot_print(coredump->snapshot.ct, &p);
+	xe_guc_exec_queue_snapshot_print(coredump->snapshot.ge, &p);
+
+	drm_printf(&p, "\n**** HW Engines ****\n");
+	for (i = 0; i < XE_NUM_HW_ENGINES; i++)
+		if (coredump->snapshot.hwe[i])
+			xe_hw_engine_snapshot_print(coredump->snapshot.hwe[i],
+						    &p);
+
+	return count - iter.remain;
+}
+
+static void xe_devcoredump_free(void *data)
+{
+	struct xe_devcoredump *coredump = data;
+	int i;
+
+	/* Our device is gone. Nothing to do... */
+	if (!data || !coredump_to_xe(coredump))
+		return;
+
+	xe_guc_ct_snapshot_free(coredump->snapshot.ct);
+	xe_guc_exec_queue_snapshot_free(coredump->snapshot.ge);
+	for (i = 0; i < XE_NUM_HW_ENGINES; i++)
+		if (coredump->snapshot.hwe[i])
+			xe_hw_engine_snapshot_free(coredump->snapshot.hwe[i]);
+
+	coredump->captured = false;
+	drm_info(&coredump_to_xe(coredump)->drm,
+		 "Xe device coredump has been deleted.\n");
+}
+
+static void devcoredump_snapshot(struct xe_devcoredump *coredump,
+				 struct xe_exec_queue *q)
+{
+	struct xe_devcoredump_snapshot *ss = &coredump->snapshot;
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	u32 adj_logical_mask = q->logical_mask;
+	u32 width_mask = (0x1 << q->width) - 1;
+	int i;
+	bool cookie;
+
+	ss->snapshot_time = ktime_get_real();
+	ss->boot_time = ktime_get_boottime();
+
+	cookie = dma_fence_begin_signalling();
+	for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) {
+		if (adj_logical_mask & BIT(i)) {
+			adj_logical_mask |= width_mask << i;
+			i += q->width;
+		} else {
+			++i;
+		}
+	}
+
+	xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
+
+	coredump->snapshot.ct = xe_guc_ct_snapshot_capture(&guc->ct, true);
+	coredump->snapshot.ge = xe_guc_exec_queue_snapshot_capture(q);
+
+	for_each_hw_engine(hwe, q->gt, id) {
+		if (hwe->class != q->hwe->class ||
+		    !(BIT(hwe->logical_instance) & adj_logical_mask)) {
+			coredump->snapshot.hwe[id] = NULL;
+			continue;
+		}
+		coredump->snapshot.hwe[id] = xe_hw_engine_snapshot_capture(hwe);
+	}
+
+	xe_force_wake_put(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
+	dma_fence_end_signalling(cookie);
+}
+
+/**
+ * xe_devcoredump - Take the required snapshots and initialize coredump device.
+ * @q: The faulty xe_exec_queue, where the issue was detected.
+ *
+ * This function should be called at the crash time within the serialized
+ * gt_reset. It is skipped if we still have the core dump device available
+ * with the information of the 'first' snapshot.
+ */
+void xe_devcoredump(struct xe_exec_queue *q)
+{
+	struct xe_device *xe = gt_to_xe(q->gt);
+	struct xe_devcoredump *coredump = &xe->devcoredump;
+
+	if (coredump->captured) {
+		drm_dbg(&xe->drm, "Multiple hangs are occurring, but only the first snapshot was taken\n");
+		return;
+	}
+
+	coredump->captured = true;
+	devcoredump_snapshot(coredump, q);
+
+	drm_info(&xe->drm, "Xe device coredump has been created\n");
+	drm_info(&xe->drm, "Check your /sys/class/drm/card%d/device/devcoredump/data\n",
+		 xe->drm.primary->index);
+
+	dev_coredumpm(xe->drm.dev, THIS_MODULE, coredump, 0, GFP_KERNEL,
+		      xe_devcoredump_read, xe_devcoredump_free);
+}
+#endif
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h
new file mode 100644
index 000000000000..6ac218a5c194
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_devcoredump.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_DEVCOREDUMP_H_
+#define _XE_DEVCOREDUMP_H_
+
+struct xe_device;
+struct xe_exec_queue;
+
+#ifdef CONFIG_DEV_COREDUMP
+void xe_devcoredump(struct xe_exec_queue *q);
+#else
+static inline void xe_devcoredump(struct xe_exec_queue *q)
+{
+}
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h
new file mode 100644
index 000000000000..7fdad9c3d3dd
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_DEVCOREDUMP_TYPES_H_
+#define _XE_DEVCOREDUMP_TYPES_H_
+
+#include <linux/ktime.h>
+#include <linux/mutex.h>
+
+#include "xe_hw_engine_types.h"
+
+struct xe_device;
+
+/**
+ * struct xe_devcoredump_snapshot - Crash snapshot
+ *
+ * This struct contains all the useful information quickly captured at the time
+ * of the crash. So, any subsequent reads of the coredump points to a data that
+ * shows the state of the GPU of when the issue has happened.
+ */
+struct xe_devcoredump_snapshot {
+	/** @snapshot_time:  Time of this capture. */
+	ktime_t snapshot_time;
+	/** @boot_time:  Relative boot time so the uptime can be calculated. */
+	ktime_t boot_time;
+
+	/* GuC snapshots */
+	/** @ct: GuC CT snapshot */
+	struct xe_guc_ct_snapshot *ct;
+	/** @ge: Guc Engine snapshot */
+	struct xe_guc_submit_exec_queue_snapshot *ge;
+	/** @hwe: HW Engine snapshot array */
+	struct xe_hw_engine_snapshot *hwe[XE_NUM_HW_ENGINES];
+};
+
+/**
+ * struct xe_devcoredump - Xe devcoredump main structure
+ *
+ * This struct represents the live and active dev_coredump node.
+ * It is created/populated at the time of a crash/error. Then it
+ * is read later when user access the device coredump data file
+ * for reading the information.
+ */
+struct xe_devcoredump {
+	/** @xe: Xe device. */
+	struct xe_device *xe;
+	/** @captured: The snapshot of the first hang has already been taken. */
+	bool captured;
+	/** @snapshot: Snapshot is captured at time of the first crash */
+	struct xe_devcoredump_snapshot snapshot;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
new file mode 100644
index 000000000000..5176c27e4b6a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -0,0 +1,661 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_device.h"
+
+#include <linux/units.h>
+
+#include <drm/drm_aperture.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_gem_ttm_helper.h>
+#include <drm/drm_ioctl.h>
+#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
+#include <drm/xe_drm.h>
+
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_regs.h"
+#include "xe_bo.h"
+#include "xe_debugfs.h"
+#include "xe_display.h"
+#include "xe_dma_buf.h"
+#include "xe_drm_client.h"
+#include "xe_drv.h"
+#include "xe_exec_queue.h"
+#include "xe_exec.h"
+#include "xe_ggtt.h"
+#include "xe_gt.h"
+#include "xe_gt_mcr.h"
+#include "xe_irq.h"
+#include "xe_mmio.h"
+#include "xe_module.h"
+#include "xe_pat.h"
+#include "xe_pcode.h"
+#include "xe_pm.h"
+#include "xe_query.h"
+#include "xe_tile.h"
+#include "xe_ttm_stolen_mgr.h"
+#include "xe_ttm_sys_mgr.h"
+#include "xe_vm.h"
+#include "xe_wait_user_fence.h"
+#include "xe_hwmon.h"
+
+#ifdef CONFIG_LOCKDEP
+struct lockdep_map xe_device_mem_access_lockdep_map = {
+	.name = "xe_device_mem_access_lockdep_map"
+};
+#endif
+
+static int xe_file_open(struct drm_device *dev, struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_drm_client *client;
+	struct xe_file *xef;
+	int ret = -ENOMEM;
+
+	xef = kzalloc(sizeof(*xef), GFP_KERNEL);
+	if (!xef)
+		return ret;
+
+	client = xe_drm_client_alloc();
+	if (!client) {
+		kfree(xef);
+		return ret;
+	}
+
+	xef->drm = file;
+	xef->client = client;
+	xef->xe = xe;
+
+	mutex_init(&xef->vm.lock);
+	xa_init_flags(&xef->vm.xa, XA_FLAGS_ALLOC1);
+
+	mutex_init(&xef->exec_queue.lock);
+	xa_init_flags(&xef->exec_queue.xa, XA_FLAGS_ALLOC1);
+
+	spin_lock(&xe->clients.lock);
+	xe->clients.count++;
+	spin_unlock(&xe->clients.lock);
+
+	file->driver_priv = xef;
+	return 0;
+}
+
+static void xe_file_close(struct drm_device *dev, struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = file->driver_priv;
+	struct xe_vm *vm;
+	struct xe_exec_queue *q;
+	unsigned long idx;
+
+	mutex_lock(&xef->exec_queue.lock);
+	xa_for_each(&xef->exec_queue.xa, idx, q) {
+		xe_exec_queue_kill(q);
+		xe_exec_queue_put(q);
+	}
+	mutex_unlock(&xef->exec_queue.lock);
+	xa_destroy(&xef->exec_queue.xa);
+	mutex_destroy(&xef->exec_queue.lock);
+	mutex_lock(&xef->vm.lock);
+	xa_for_each(&xef->vm.xa, idx, vm)
+		xe_vm_close_and_put(vm);
+	mutex_unlock(&xef->vm.lock);
+	xa_destroy(&xef->vm.xa);
+	mutex_destroy(&xef->vm.lock);
+
+	spin_lock(&xe->clients.lock);
+	xe->clients.count--;
+	spin_unlock(&xe->clients.lock);
+
+	xe_drm_client_put(xef->client);
+	kfree(xef);
+}
+
+static const struct drm_ioctl_desc xe_ioctls[] = {
+	DRM_IOCTL_DEF_DRV(XE_DEVICE_QUERY, xe_query_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_GEM_CREATE, xe_gem_create_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_GEM_MMAP_OFFSET, xe_gem_mmap_offset_ioctl,
+			  DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_VM_CREATE, xe_vm_create_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_VM_DESTROY, xe_vm_destroy_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_VM_BIND, xe_vm_bind_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_CREATE, xe_exec_queue_create_ioctl,
+			  DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_DESTROY, xe_exec_queue_destroy_ioctl,
+			  DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_GET_PROPERTY, xe_exec_queue_get_property_ioctl,
+			  DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl,
+			  DRM_RENDER_ALLOW),
+};
+
+static const struct file_operations xe_driver_fops = {
+	.owner = THIS_MODULE,
+	.open = drm_open,
+	.release = drm_release_noglobal,
+	.unlocked_ioctl = drm_ioctl,
+	.mmap = drm_gem_mmap,
+	.poll = drm_poll,
+	.read = drm_read,
+	.compat_ioctl = drm_compat_ioctl,
+	.llseek = noop_llseek,
+#ifdef CONFIG_PROC_FS
+	.show_fdinfo = drm_show_fdinfo,
+#endif
+};
+
+static void xe_driver_release(struct drm_device *dev)
+{
+	struct xe_device *xe = to_xe_device(dev);
+
+	pci_set_drvdata(to_pci_dev(xe->drm.dev), NULL);
+}
+
+static struct drm_driver driver = {
+	/* Don't use MTRRs here; the Xserver or userspace app should
+	 * deal with them for Intel hardware.
+	 */
+	.driver_features =
+	    DRIVER_GEM |
+	    DRIVER_RENDER | DRIVER_SYNCOBJ |
+	    DRIVER_SYNCOBJ_TIMELINE | DRIVER_GEM_GPUVA,
+	.open = xe_file_open,
+	.postclose = xe_file_close,
+
+	.gem_prime_import = xe_gem_prime_import,
+
+	.dumb_create = xe_bo_dumb_create,
+	.dumb_map_offset = drm_gem_ttm_dumb_map_offset,
+#ifdef CONFIG_PROC_FS
+	.show_fdinfo = xe_drm_client_fdinfo,
+#endif
+	.release = &xe_driver_release,
+
+	.ioctls = xe_ioctls,
+	.num_ioctls = ARRAY_SIZE(xe_ioctls),
+	.fops = &xe_driver_fops,
+	.name = DRIVER_NAME,
+	.desc = DRIVER_DESC,
+	.date = DRIVER_DATE,
+	.major = DRIVER_MAJOR,
+	.minor = DRIVER_MINOR,
+	.patchlevel = DRIVER_PATCHLEVEL,
+};
+
+static void xe_device_destroy(struct drm_device *dev, void *dummy)
+{
+	struct xe_device *xe = to_xe_device(dev);
+
+	if (xe->ordered_wq)
+		destroy_workqueue(xe->ordered_wq);
+
+	if (xe->unordered_wq)
+		destroy_workqueue(xe->unordered_wq);
+
+	ttm_device_fini(&xe->ttm);
+}
+
+struct xe_device *xe_device_create(struct pci_dev *pdev,
+				   const struct pci_device_id *ent)
+{
+	struct xe_device *xe;
+	int err;
+
+	xe_display_driver_set_hooks(&driver);
+
+	err = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &driver);
+	if (err)
+		return ERR_PTR(err);
+
+	xe = devm_drm_dev_alloc(&pdev->dev, &driver, struct xe_device, drm);
+	if (IS_ERR(xe))
+		return xe;
+
+	err = ttm_device_init(&xe->ttm, &xe_ttm_funcs, xe->drm.dev,
+			      xe->drm.anon_inode->i_mapping,
+			      xe->drm.vma_offset_manager, false, false);
+	if (WARN_ON(err))
+		goto err;
+
+	err = drmm_add_action_or_reset(&xe->drm, xe_device_destroy, NULL);
+	if (err)
+		goto err;
+
+	xe->info.devid = pdev->device;
+	xe->info.revid = pdev->revision;
+	xe->info.force_execlist = xe_modparam.force_execlist;
+
+	spin_lock_init(&xe->irq.lock);
+	spin_lock_init(&xe->clients.lock);
+
+	init_waitqueue_head(&xe->ufence_wq);
+
+	drmm_mutex_init(&xe->drm, &xe->usm.lock);
+	xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC);
+
+	if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
+		/* Trigger a large asid and an early asid wrap. */
+		u32 asid;
+
+		BUILD_BUG_ON(XE_MAX_ASID < 2);
+		err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, NULL,
+				      XA_LIMIT(XE_MAX_ASID - 2, XE_MAX_ASID - 1),
+				      &xe->usm.next_asid, GFP_KERNEL);
+		drm_WARN_ON(&xe->drm, err);
+		if (err >= 0)
+			xa_erase(&xe->usm.asid_to_vm, asid);
+	}
+
+	spin_lock_init(&xe->pinned.lock);
+	INIT_LIST_HEAD(&xe->pinned.kernel_bo_present);
+	INIT_LIST_HEAD(&xe->pinned.external_vram);
+	INIT_LIST_HEAD(&xe->pinned.evicted);
+
+	xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0);
+	xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0);
+	if (!xe->ordered_wq || !xe->unordered_wq) {
+		drm_err(&xe->drm, "Failed to allocate xe workqueues\n");
+		err = -ENOMEM;
+		goto err;
+	}
+
+	err = xe_display_create(xe);
+	if (WARN_ON(err))
+		goto err;
+
+	return xe;
+
+err:
+	return ERR_PTR(err);
+}
+
+/*
+ * The driver-initiated FLR is the highest level of reset that we can trigger
+ * from within the driver. It is different from the PCI FLR in that it doesn't
+ * fully reset the SGUnit and doesn't modify the PCI config space and therefore
+ * it doesn't require a re-enumeration of the PCI BARs. However, the
+ * driver-initiated FLR does still cause a reset of both GT and display and a
+ * memory wipe of local and stolen memory, so recovery would require a full HW
+ * re-init and saving/restoring (or re-populating) the wiped memory. Since we
+ * perform the FLR as the very last action before releasing access to the HW
+ * during the driver release flow, we don't attempt recovery at all, because
+ * if/when a new instance of i915 is bound to the device it will do a full
+ * re-init anyway.
+ */
+static void xe_driver_flr(struct xe_device *xe)
+{
+	const unsigned int flr_timeout = 3 * MICRO; /* specs recommend a 3s wait */
+	struct xe_gt *gt = xe_root_mmio_gt(xe);
+	int ret;
+
+	if (xe_mmio_read32(gt, GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS) {
+		drm_info_once(&xe->drm, "BIOS Disabled Driver-FLR\n");
+		return;
+	}
+
+	drm_dbg(&xe->drm, "Triggering Driver-FLR\n");
+
+	/*
+	 * Make sure any pending FLR requests have cleared by waiting for the
+	 * FLR trigger bit to go to zero. Also clear GU_DEBUG's DRIVERFLR_STATUS
+	 * to make sure it's not still set from a prior attempt (it's a write to
+	 * clear bit).
+	 * Note that we should never be in a situation where a previous attempt
+	 * is still pending (unless the HW is totally dead), but better to be
+	 * safe in case something unexpected happens
+	 */
+	ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
+	if (ret) {
+		drm_err(&xe->drm, "Driver-FLR-prepare wait for ready failed! %d\n", ret);
+		return;
+	}
+	xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS);
+
+	/* Trigger the actual Driver-FLR */
+	xe_mmio_rmw32(gt, GU_CNTL, 0, DRIVERFLR);
+
+	/* Wait for hardware teardown to complete */
+	ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
+	if (ret) {
+		drm_err(&xe->drm, "Driver-FLR-teardown wait completion failed! %d\n", ret);
+		return;
+	}
+
+	/* Wait for hardware/firmware re-init to complete */
+	ret = xe_mmio_wait32(gt, GU_DEBUG, DRIVERFLR_STATUS, DRIVERFLR_STATUS,
+			     flr_timeout, NULL, false);
+	if (ret) {
+		drm_err(&xe->drm, "Driver-FLR-reinit wait completion failed! %d\n", ret);
+		return;
+	}
+
+	/* Clear sticky completion status */
+	xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS);
+}
+
+static void xe_driver_flr_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_device *xe = arg;
+
+	if (xe->needs_flr_on_fini)
+		xe_driver_flr(xe);
+}
+
+static void xe_device_sanitize(struct drm_device *drm, void *arg)
+{
+	struct xe_device *xe = arg;
+	struct xe_gt *gt;
+	u8 id;
+
+	for_each_gt(gt, xe, id)
+		xe_gt_sanitize(gt);
+}
+
+static int xe_set_dma_info(struct xe_device *xe)
+{
+	unsigned int mask_size = xe->info.dma_mask_size;
+	int err;
+
+	dma_set_max_seg_size(xe->drm.dev, xe_sg_segment_size(xe->drm.dev));
+
+	err = dma_set_mask(xe->drm.dev, DMA_BIT_MASK(mask_size));
+	if (err)
+		goto mask_err;
+
+	err = dma_set_coherent_mask(xe->drm.dev, DMA_BIT_MASK(mask_size));
+	if (err)
+		goto mask_err;
+
+	return 0;
+
+mask_err:
+	drm_err(&xe->drm, "Can't set DMA mask/consistent mask (%d)\n", err);
+	return err;
+}
+
+/*
+ * Initialize MMIO resources that don't require any knowledge about tile count.
+ */
+int xe_device_probe_early(struct xe_device *xe)
+{
+	int err;
+
+	err = xe_mmio_init(xe);
+	if (err)
+		return err;
+
+	err = xe_mmio_root_tile_init(xe);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int xe_device_set_has_flat_ccs(struct  xe_device *xe)
+{
+	u32 reg;
+	int err;
+
+	if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs)
+		return 0;
+
+	struct xe_gt *gt = xe_root_mmio_gt(xe);
+
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		return err;
+
+	reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_LOWER);
+	xe->info.has_flat_ccs = (reg & XE2_FLAT_CCS_ENABLE);
+
+	if (!xe->info.has_flat_ccs)
+		drm_dbg(&xe->drm,
+			"Flat CCS has been disabled in bios, May lead to performance impact");
+
+	return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
+
+int xe_device_probe(struct xe_device *xe)
+{
+	struct xe_tile *tile;
+	struct xe_gt *gt;
+	int err;
+	u8 id;
+
+	xe_pat_init_early(xe);
+
+	xe->info.mem_region_mask = 1;
+	err = xe_display_init_nommio(xe);
+	if (err)
+		return err;
+
+	err = xe_set_dma_info(xe);
+	if (err)
+		return err;
+
+	xe_mmio_probe_tiles(xe);
+
+	xe_ttm_sys_mgr_init(xe);
+
+	for_each_gt(gt, xe, id)
+		xe_force_wake_init_gt(gt, gt_to_fw(gt));
+
+	for_each_tile(tile, xe, id) {
+		err = xe_ggtt_init_early(tile->mem.ggtt);
+		if (err)
+			return err;
+	}
+
+	err = drmm_add_action_or_reset(&xe->drm, xe_driver_flr_fini, xe);
+	if (err)
+		return err;
+
+	for_each_gt(gt, xe, id) {
+		err = xe_pcode_probe(gt);
+		if (err)
+			return err;
+	}
+
+	err = xe_display_init_noirq(xe);
+	if (err)
+		return err;
+
+	err = xe_irq_install(xe);
+	if (err)
+		goto err;
+
+	for_each_gt(gt, xe, id) {
+		err = xe_gt_init_early(gt);
+		if (err)
+			goto err_irq_shutdown;
+	}
+
+	err = xe_device_set_has_flat_ccs(xe);
+	if (err)
+		goto err_irq_shutdown;
+
+	err = xe_mmio_probe_vram(xe);
+	if (err)
+		goto err_irq_shutdown;
+
+	for_each_tile(tile, xe, id) {
+		err = xe_tile_init_noalloc(tile);
+		if (err)
+			goto err_irq_shutdown;
+	}
+
+	/* Allocate and map stolen after potential VRAM resize */
+	xe_ttm_stolen_mgr_init(xe);
+
+	/*
+	 * Now that GT is initialized (TTM in particular),
+	 * we can try to init display, and inherit the initial fb.
+	 * This is the reason the first allocation needs to be done
+	 * inside display.
+	 */
+	err = xe_display_init_noaccel(xe);
+	if (err)
+		goto err_irq_shutdown;
+
+	for_each_gt(gt, xe, id) {
+		err = xe_gt_init(gt);
+		if (err)
+			goto err_irq_shutdown;
+	}
+
+	xe_heci_gsc_init(xe);
+
+	err = xe_display_init(xe);
+	if (err)
+		goto err_irq_shutdown;
+
+	err = drm_dev_register(&xe->drm, 0);
+	if (err)
+		goto err_fini_display;
+
+	xe_display_register(xe);
+
+	xe_debugfs_register(xe);
+
+	xe_hwmon_register(xe);
+
+	err = drmm_add_action_or_reset(&xe->drm, xe_device_sanitize, xe);
+	if (err)
+		return err;
+
+	return 0;
+
+err_fini_display:
+	xe_display_driver_remove(xe);
+
+err_irq_shutdown:
+	xe_irq_shutdown(xe);
+err:
+	xe_display_fini(xe);
+	return err;
+}
+
+static void xe_device_remove_display(struct xe_device *xe)
+{
+	xe_display_unregister(xe);
+
+	drm_dev_unplug(&xe->drm);
+	xe_display_driver_remove(xe);
+}
+
+void xe_device_remove(struct xe_device *xe)
+{
+	xe_device_remove_display(xe);
+
+	xe_display_fini(xe);
+
+	xe_heci_gsc_fini(xe);
+
+	xe_irq_shutdown(xe);
+}
+
+void xe_device_shutdown(struct xe_device *xe)
+{
+}
+
+void xe_device_wmb(struct xe_device *xe)
+{
+	struct xe_gt *gt = xe_root_mmio_gt(xe);
+
+	wmb();
+	if (IS_DGFX(xe))
+		xe_mmio_write32(gt, SOFTWARE_FLAGS_SPR33, 0);
+}
+
+u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
+{
+	return xe_device_has_flat_ccs(xe) ?
+		DIV_ROUND_UP_ULL(size, NUM_BYTES_PER_CCS_BYTE(xe)) : 0;
+}
+
+bool xe_device_mem_access_ongoing(struct xe_device *xe)
+{
+	if (xe_pm_read_callback_task(xe) != NULL)
+		return true;
+
+	return atomic_read(&xe->mem_access.ref);
+}
+
+void xe_device_assert_mem_access(struct xe_device *xe)
+{
+	XE_WARN_ON(!xe_device_mem_access_ongoing(xe));
+}
+
+bool xe_device_mem_access_get_if_ongoing(struct xe_device *xe)
+{
+	bool active;
+
+	if (xe_pm_read_callback_task(xe) == current)
+		return true;
+
+	active = xe_pm_runtime_get_if_active(xe);
+	if (active) {
+		int ref = atomic_inc_return(&xe->mem_access.ref);
+
+		xe_assert(xe, ref != S32_MAX);
+	}
+
+	return active;
+}
+
+void xe_device_mem_access_get(struct xe_device *xe)
+{
+	int ref;
+
+	/*
+	 * This looks racy, but should be fine since the pm_callback_task only
+	 * transitions from NULL -> current (and back to NULL again), during the
+	 * runtime_resume() or runtime_suspend() callbacks, for which there can
+	 * only be a single one running for our device. We only need to prevent
+	 * recursively calling the runtime_get or runtime_put from those
+	 * callbacks, as well as preventing triggering any access_ongoing
+	 * asserts.
+	 */
+	if (xe_pm_read_callback_task(xe) == current)
+		return;
+
+	/*
+	 * Since the resume here is synchronous it can be quite easy to deadlock
+	 * if we are not careful. Also in practice it might be quite timing
+	 * sensitive to ever see the 0 -> 1 transition with the callers locks
+	 * held, so deadlocks might exist but are hard for lockdep to ever see.
+	 * With this in mind, help lockdep learn about the potentially scary
+	 * stuff that can happen inside the runtime_resume callback by acquiring
+	 * a dummy lock (it doesn't protect anything and gets compiled out on
+	 * non-debug builds).  Lockdep then only needs to see the
+	 * mem_access_lockdep_map -> runtime_resume callback once, and then can
+	 * hopefully validate all the (callers_locks) -> mem_access_lockdep_map.
+	 * For example if the (callers_locks) are ever grabbed in the
+	 * runtime_resume callback, lockdep should give us a nice splat.
+	 */
+	lock_map_acquire(&xe_device_mem_access_lockdep_map);
+	lock_map_release(&xe_device_mem_access_lockdep_map);
+
+	xe_pm_runtime_get(xe);
+	ref = atomic_inc_return(&xe->mem_access.ref);
+
+	xe_assert(xe, ref != S32_MAX);
+
+}
+
+void xe_device_mem_access_put(struct xe_device *xe)
+{
+	int ref;
+
+	if (xe_pm_read_callback_task(xe) == current)
+		return;
+
+	ref = atomic_dec_return(&xe->mem_access.ref);
+	xe_pm_runtime_put(xe);
+
+	xe_assert(xe, ref >= 0);
+}
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
new file mode 100644
index 000000000000..08d8b72c7731
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_DEVICE_H_
+#define _XE_DEVICE_H_
+
+struct xe_exec_queue;
+struct xe_file;
+
+#include <drm/drm_util.h>
+
+#include "regs/xe_gpu_commands.h"
+#include "xe_device_types.h"
+#include "xe_force_wake.h"
+#include "xe_macros.h"
+
+#ifdef CONFIG_LOCKDEP
+extern struct lockdep_map xe_device_mem_access_lockdep_map;
+#endif
+
+static inline struct xe_device *to_xe_device(const struct drm_device *dev)
+{
+	return container_of(dev, struct xe_device, drm);
+}
+
+static inline struct xe_device *pdev_to_xe_device(struct pci_dev *pdev)
+{
+	return pci_get_drvdata(pdev);
+}
+
+static inline struct xe_device *ttm_to_xe_device(struct ttm_device *ttm)
+{
+	return container_of(ttm, struct xe_device, ttm);
+}
+
+struct xe_device *xe_device_create(struct pci_dev *pdev,
+				   const struct pci_device_id *ent);
+int xe_device_probe_early(struct xe_device *xe);
+int xe_device_probe(struct xe_device *xe);
+void xe_device_remove(struct xe_device *xe);
+void xe_device_shutdown(struct xe_device *xe);
+
+void xe_device_wmb(struct xe_device *xe);
+
+static inline struct xe_file *to_xe_file(const struct drm_file *file)
+{
+	return file->driver_priv;
+}
+
+static inline struct xe_tile *xe_device_get_root_tile(struct xe_device *xe)
+{
+	return &xe->tiles[0];
+}
+
+#define XE_MAX_GT_PER_TILE 2
+
+static inline struct xe_gt *xe_tile_get_gt(struct xe_tile *tile, u8 gt_id)
+{
+	if (drm_WARN_ON(&tile_to_xe(tile)->drm, gt_id > XE_MAX_GT_PER_TILE))
+		gt_id = 0;
+
+	return gt_id ? tile->media_gt : tile->primary_gt;
+}
+
+static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id)
+{
+	struct xe_tile *root_tile = xe_device_get_root_tile(xe);
+	struct xe_gt *gt;
+
+	/*
+	 * FIXME: This only works for now because multi-tile and standalone
+	 * media are mutually exclusive on the platforms we have today.
+	 *
+	 * id => GT mapping may change once we settle on how we want to handle
+	 * our UAPI.
+	 */
+	if (MEDIA_VER(xe) >= 13) {
+		gt = xe_tile_get_gt(root_tile, gt_id);
+	} else {
+		if (drm_WARN_ON(&xe->drm, gt_id > XE_MAX_TILES_PER_DEVICE))
+			gt_id = 0;
+
+		gt = xe->tiles[gt_id].primary_gt;
+	}
+
+	if (!gt)
+		return NULL;
+
+	drm_WARN_ON(&xe->drm, gt->info.id != gt_id);
+	drm_WARN_ON(&xe->drm, gt->info.type == XE_GT_TYPE_UNINITIALIZED);
+
+	return gt;
+}
+
+/*
+ * Provide a GT structure suitable for performing non-GT MMIO operations against
+ * the primary tile.  Primarily intended for early tile initialization, display
+ * handling, top-most interrupt enable/disable, etc.  Since anything using the
+ * MMIO handle returned by this function doesn't need GSI offset translation,
+ * we'll return the primary GT from the root tile.
+ *
+ * FIXME: Fix the driver design so that 'gt' isn't the target of all MMIO
+ * operations.
+ *
+ * Returns the primary gt of the root tile.
+ */
+static inline struct xe_gt *xe_root_mmio_gt(struct xe_device *xe)
+{
+	return xe_device_get_root_tile(xe)->primary_gt;
+}
+
+static inline bool xe_device_uc_enabled(struct xe_device *xe)
+{
+	return !xe->info.force_execlist;
+}
+
+#define for_each_tile(tile__, xe__, id__) \
+	for ((id__) = 0; (id__) < (xe__)->info.tile_count; (id__)++) \
+		for_each_if((tile__) = &(xe__)->tiles[(id__)])
+
+#define for_each_remote_tile(tile__, xe__, id__) \
+	for ((id__) = 1; (id__) < (xe__)->info.tile_count; (id__)++) \
+		for_each_if((tile__) = &(xe__)->tiles[(id__)])
+
+/*
+ * FIXME: This only works for now since multi-tile and standalone media
+ * happen to be mutually exclusive.  Future platforms may change this...
+ */
+#define for_each_gt(gt__, xe__, id__) \
+	for ((id__) = 0; (id__) < (xe__)->info.gt_count; (id__)++) \
+		for_each_if((gt__) = xe_device_get_gt((xe__), (id__)))
+
+static inline struct xe_force_wake *gt_to_fw(struct xe_gt *gt)
+{
+	return &gt->mmio.fw;
+}
+
+void xe_device_mem_access_get(struct xe_device *xe);
+bool xe_device_mem_access_get_if_ongoing(struct xe_device *xe);
+void xe_device_mem_access_put(struct xe_device *xe);
+
+void xe_device_assert_mem_access(struct xe_device *xe);
+bool xe_device_mem_access_ongoing(struct xe_device *xe);
+
+static inline bool xe_device_in_fault_mode(struct xe_device *xe)
+{
+	return xe->usm.num_vm_in_fault_mode != 0;
+}
+
+static inline bool xe_device_in_non_fault_mode(struct xe_device *xe)
+{
+	return xe->usm.num_vm_in_non_fault_mode != 0;
+}
+
+static inline bool xe_device_has_flat_ccs(struct xe_device *xe)
+{
+	return xe->info.has_flat_ccs;
+}
+
+static inline bool xe_device_has_sriov(struct xe_device *xe)
+{
+	return xe->info.has_sriov;
+}
+
+u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c
new file mode 100644
index 000000000000..99113a5a2b84
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_device_sysfs.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/kobject.h>
+#include <linux/pci.h>
+#include <linux/sysfs.h>
+
+#include <drm/drm_managed.h>
+
+#include "xe_device.h"
+#include "xe_device_sysfs.h"
+#include "xe_pm.h"
+
+/**
+ * DOC: Xe device sysfs
+ * Xe driver requires exposing certain tunable knobs controlled by user space for
+ * each graphics device. Considering this, we need to add sysfs attributes at device
+ * level granularity.
+ * These sysfs attributes will be available under pci device kobj directory.
+ *
+ * vram_d3cold_threshold - Report/change vram used threshold(in MB) below
+ * which vram save/restore is permissible during runtime D3cold entry/exit.
+ */
+
+static ssize_t
+vram_d3cold_threshold_show(struct device *dev,
+			   struct device_attribute *attr, char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct xe_device *xe = pdev_to_xe_device(pdev);
+	int ret;
+
+	if (!xe)
+		return -EINVAL;
+
+	ret = sysfs_emit(buf, "%d\n", xe->d3cold.vram_threshold);
+
+	return ret;
+}
+
+static ssize_t
+vram_d3cold_threshold_store(struct device *dev, struct device_attribute *attr,
+			    const char *buff, size_t count)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct xe_device *xe = pdev_to_xe_device(pdev);
+	u32 vram_d3cold_threshold;
+	int ret;
+
+	if (!xe)
+		return -EINVAL;
+
+	ret = kstrtou32(buff, 0, &vram_d3cold_threshold);
+	if (ret)
+		return ret;
+
+	drm_dbg(&xe->drm, "vram_d3cold_threshold: %u\n", vram_d3cold_threshold);
+
+	ret = xe_pm_set_vram_threshold(xe, vram_d3cold_threshold);
+
+	return ret ?: count;
+}
+
+static DEVICE_ATTR_RW(vram_d3cold_threshold);
+
+static void xe_device_sysfs_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_device *xe = arg;
+
+	sysfs_remove_file(&xe->drm.dev->kobj, &dev_attr_vram_d3cold_threshold.attr);
+}
+
+void xe_device_sysfs_init(struct xe_device *xe)
+{
+	struct device *dev = xe->drm.dev;
+	int ret;
+
+	ret = sysfs_create_file(&dev->kobj, &dev_attr_vram_d3cold_threshold.attr);
+	if (ret) {
+		drm_warn(&xe->drm, "Failed to create sysfs file\n");
+		return;
+	}
+
+	ret = drmm_add_action_or_reset(&xe->drm, xe_device_sysfs_fini, xe);
+	if (ret)
+		drm_warn(&xe->drm, "Failed to add sysfs fini drm action\n");
+}
diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.h b/drivers/gpu/drm/xe/xe_device_sysfs.h
new file mode 100644
index 000000000000..38b240684bee
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_device_sysfs.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_DEVICE_SYSFS_H_
+#define _XE_DEVICE_SYSFS_H_
+
+struct xe_device;
+
+void xe_device_sysfs_init(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
new file mode 100644
index 000000000000..e8491979a6f2
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -0,0 +1,537 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022-2023 Intel Corporation
+ */
+
+#ifndef _XE_DEVICE_TYPES_H_
+#define _XE_DEVICE_TYPES_H_
+
+#include <linux/pci.h>
+
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/ttm/ttm_device.h>
+
+#include "xe_devcoredump_types.h"
+#include "xe_heci_gsc.h"
+#include "xe_gt_types.h"
+#include "xe_lmtt_types.h"
+#include "xe_platform_types.h"
+#include "xe_pt_types.h"
+#include "xe_sriov_types.h"
+#include "xe_step_types.h"
+
+#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
+#include "soc/intel_pch.h"
+#include "intel_display_core.h"
+#include "intel_display_device.h"
+#endif
+
+struct xe_ggtt;
+struct xe_pat_ops;
+
+#define XE_BO_INVALID_OFFSET	LONG_MAX
+
+#define GRAPHICS_VER(xe) ((xe)->info.graphics_verx100 / 100)
+#define MEDIA_VER(xe) ((xe)->info.media_verx100 / 100)
+#define GRAPHICS_VERx100(xe) ((xe)->info.graphics_verx100)
+#define MEDIA_VERx100(xe) ((xe)->info.media_verx100)
+#define IS_DGFX(xe) ((xe)->info.is_dgfx)
+#define HAS_HECI_GSCFI(xe) ((xe)->info.has_heci_gscfi)
+
+#define XE_VRAM_FLAGS_NEED64K		BIT(0)
+
+#define XE_GT0		0
+#define XE_GT1		1
+#define XE_MAX_TILES_PER_DEVICE	(XE_GT1 + 1)
+
+#define XE_MAX_ASID	(BIT(20))
+
+#define IS_PLATFORM_STEP(_xe, _platform, min_step, max_step)	\
+	((_xe)->info.platform == (_platform) &&			\
+	 (_xe)->info.step.graphics >= (min_step) &&		\
+	 (_xe)->info.step.graphics < (max_step))
+#define IS_SUBPLATFORM_STEP(_xe, _platform, sub, min_step, max_step)	\
+	((_xe)->info.platform == (_platform) &&				\
+	 (_xe)->info.subplatform == (sub) &&				\
+	 (_xe)->info.step.graphics >= (min_step) &&			\
+	 (_xe)->info.step.graphics < (max_step))
+
+#define tile_to_xe(tile__)								\
+	_Generic(tile__,								\
+		 const struct xe_tile * : (const struct xe_device *)((tile__)->xe),	\
+		 struct xe_tile * : (tile__)->xe)
+
+/**
+ * struct xe_mem_region - memory region structure
+ * This is used to describe a memory region in xe
+ * device, such as HBM memory or CXL extension memory.
+ */
+struct xe_mem_region {
+	/** @io_start: IO start address of this VRAM instance */
+	resource_size_t io_start;
+	/**
+	 * @io_size: IO size of this VRAM instance
+	 *
+	 * This represents how much of this VRAM we can access
+	 * via the CPU through the VRAM BAR. This can be smaller
+	 * than @usable_size, in which case only part of VRAM is CPU
+	 * accessible (typically the first 256M). This
+	 * configuration is known as small-bar.
+	 */
+	resource_size_t io_size;
+	/** @dpa_base: This memory regions's DPA (device physical address) base */
+	resource_size_t dpa_base;
+	/**
+	 * @usable_size: usable size of VRAM
+	 *
+	 * Usable size of VRAM excluding reserved portions
+	 * (e.g stolen mem)
+	 */
+	resource_size_t usable_size;
+	/**
+	 * @actual_physical_size: Actual VRAM size
+	 *
+	 * Actual VRAM size including reserved portions
+	 * (e.g stolen mem)
+	 */
+	resource_size_t actual_physical_size;
+	/** @mapping: pointer to VRAM mappable space */
+	void __iomem *mapping;
+};
+
+/**
+ * struct xe_tile - hardware tile structure
+ *
+ * From a driver perspective, a "tile" is effectively a complete GPU, containing
+ * an SGunit, 1-2 GTs, and (for discrete platforms) VRAM.
+ *
+ * Multi-tile platforms effectively bundle multiple GPUs behind a single PCI
+ * device and designate one "root" tile as being responsible for external PCI
+ * communication.  PCI BAR0 exposes the GGTT and MMIO register space for each
+ * tile in a stacked layout, and PCI BAR2 exposes the local memory associated
+ * with each tile similarly.  Device-wide interrupts can be enabled/disabled
+ * at the root tile, and the MSTR_TILE_INTR register will report which tiles
+ * have interrupts that need servicing.
+ */
+struct xe_tile {
+	/** @xe: Backpointer to tile's PCI device */
+	struct xe_device *xe;
+
+	/** @id: ID of the tile */
+	u8 id;
+
+	/**
+	 * @primary_gt: Primary GT
+	 */
+	struct xe_gt *primary_gt;
+
+	/**
+	 * @media_gt: Media GT
+	 *
+	 * Only present on devices with media version >= 13.
+	 */
+	struct xe_gt *media_gt;
+
+	/**
+	 * @mmio: MMIO info for a tile.
+	 *
+	 * Each tile has its own 16MB space in BAR0, laid out as:
+	 * * 0-4MB: registers
+	 * * 4MB-8MB: reserved
+	 * * 8MB-16MB: global GTT
+	 */
+	struct {
+		/** @size: size of tile's MMIO space */
+		size_t size;
+
+		/** @regs: pointer to tile's MMIO space (starting with registers) */
+		void __iomem *regs;
+	} mmio;
+
+	/**
+	 * @mmio_ext: MMIO-extension info for a tile.
+	 *
+	 * Each tile has its own additional 256MB (28-bit) MMIO-extension space.
+	 */
+	struct {
+		/** @size: size of tile's additional MMIO-extension space */
+		size_t size;
+
+		/** @regs: pointer to tile's additional MMIO-extension space */
+		void __iomem *regs;
+	} mmio_ext;
+
+	/** @mem: memory management info for tile */
+	struct {
+		/**
+		 * @vram: VRAM info for tile.
+		 *
+		 * Although VRAM is associated with a specific tile, it can
+		 * still be accessed by all tiles' GTs.
+		 */
+		struct xe_mem_region vram;
+
+		/** @vram_mgr: VRAM TTM manager */
+		struct xe_ttm_vram_mgr *vram_mgr;
+
+		/** @ggtt: Global graphics translation table */
+		struct xe_ggtt *ggtt;
+
+		/**
+		 * @kernel_bb_pool: Pool from which batchbuffers are allocated.
+		 *
+		 * Media GT shares a pool with its primary GT.
+		 */
+		struct xe_sa_manager *kernel_bb_pool;
+	} mem;
+
+	/** @sriov: tile level virtualization data */
+	union {
+		struct {
+			/** @sriov.pf.lmtt: Local Memory Translation Table. */
+			struct xe_lmtt lmtt;
+		} pf;
+	} sriov;
+
+	/** @migrate: Migration helper for vram blits and clearing */
+	struct xe_migrate *migrate;
+
+	/** @sysfs: sysfs' kobj used by xe_tile_sysfs */
+	struct kobject *sysfs;
+};
+
+/**
+ * struct xe_device - Top level struct of XE device
+ */
+struct xe_device {
+	/** @drm: drm device */
+	struct drm_device drm;
+
+	/** @devcoredump: device coredump */
+	struct xe_devcoredump devcoredump;
+
+	/** @info: device info */
+	struct intel_device_info {
+		/** @graphics_name: graphics IP name */
+		const char *graphics_name;
+		/** @media_name: media IP name */
+		const char *media_name;
+		/** @tile_mmio_ext_size: size of MMIO extension space, per-tile */
+		u32 tile_mmio_ext_size;
+		/** @graphics_verx100: graphics IP version */
+		u32 graphics_verx100;
+		/** @media_verx100: media IP version */
+		u32 media_verx100;
+		/** @mem_region_mask: mask of valid memory regions */
+		u32 mem_region_mask;
+		/** @platform: XE platform enum */
+		enum xe_platform platform;
+		/** @subplatform: XE subplatform enum */
+		enum xe_subplatform subplatform;
+		/** @devid: device ID */
+		u16 devid;
+		/** @revid: device revision */
+		u8 revid;
+		/** @step: stepping information for each IP */
+		struct xe_step_info step;
+		/** @dma_mask_size: DMA address bits */
+		u8 dma_mask_size;
+		/** @vram_flags: Vram flags */
+		u8 vram_flags;
+		/** @tile_count: Number of tiles */
+		u8 tile_count;
+		/** @gt_count: Total number of GTs for entire device */
+		u8 gt_count;
+		/** @vm_max_level: Max VM level */
+		u8 vm_max_level;
+		/** @va_bits: Maximum bits of a virtual address */
+		u8 va_bits;
+
+		/** @is_dgfx: is discrete device */
+		u8 is_dgfx:1;
+		/** @has_asid: Has address space ID */
+		u8 has_asid:1;
+		/** @force_execlist: Forced execlist submission */
+		u8 force_execlist:1;
+		/** @has_flat_ccs: Whether flat CCS metadata is used */
+		u8 has_flat_ccs:1;
+		/** @has_llc: Device has a shared CPU+GPU last level cache */
+		u8 has_llc:1;
+		/** @has_mmio_ext: Device has extra MMIO address range */
+		u8 has_mmio_ext:1;
+		/** @has_range_tlb_invalidation: Has range based TLB invalidations */
+		u8 has_range_tlb_invalidation:1;
+		/** @has_sriov: Supports SR-IOV */
+		u8 has_sriov:1;
+		/** @has_usm: Device has unified shared memory support */
+		u8 has_usm:1;
+		/** @enable_display: display enabled */
+		u8 enable_display:1;
+		/** @skip_mtcfg: skip Multi-Tile configuration from MTCFG register */
+		u8 skip_mtcfg:1;
+		/** @skip_pcode: skip access to PCODE uC */
+		u8 skip_pcode:1;
+		/** @has_heci_gscfi: device has heci gscfi */
+		u8 has_heci_gscfi:1;
+		/** @skip_guc_pc: Skip GuC based PM feature init */
+		u8 skip_guc_pc:1;
+
+#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
+		struct {
+			u32 rawclk_freq;
+		} i915_runtime;
+#endif
+	} info;
+
+	/** @irq: device interrupt state */
+	struct {
+		/** @lock: lock for processing irq's on this device */
+		spinlock_t lock;
+
+		/** @enabled: interrupts enabled on this device */
+		bool enabled;
+	} irq;
+
+	/** @ttm: ttm device */
+	struct ttm_device ttm;
+
+	/** @mmio: mmio info for device */
+	struct {
+		/** @size: size of MMIO space for device */
+		size_t size;
+		/** @regs: pointer to MMIO space for device */
+		void __iomem *regs;
+	} mmio;
+
+	/** @mem: memory info for device */
+	struct {
+		/** @vram: VRAM info for device */
+		struct xe_mem_region vram;
+		/** @sys_mgr: system TTM manager */
+		struct ttm_resource_manager sys_mgr;
+	} mem;
+
+	/** @sriov: device level virtualization data */
+	struct {
+		/** @sriov.__mode: SR-IOV mode (Don't access directly!) */
+		enum xe_sriov_mode __mode;
+	} sriov;
+
+	/** @clients: drm clients info */
+	struct {
+		/** @lock: Protects drm clients info */
+		spinlock_t lock;
+
+		/** @count: number of drm clients */
+		u64 count;
+	} clients;
+
+	/** @usm: unified memory state */
+	struct {
+		/** @asid: convert a ASID to VM */
+		struct xarray asid_to_vm;
+		/** @next_asid: next ASID, used to cyclical alloc asids */
+		u32 next_asid;
+		/** @num_vm_in_fault_mode: number of VM in fault mode */
+		u32 num_vm_in_fault_mode;
+		/** @num_vm_in_non_fault_mode: number of VM in non-fault mode */
+		u32 num_vm_in_non_fault_mode;
+		/** @lock: protects UM state */
+		struct mutex lock;
+	} usm;
+
+	/** @pinned: pinned BO state */
+	struct {
+		/** @lock: protected pinned BO list state */
+		spinlock_t lock;
+		/** @evicted: pinned kernel BO that are present */
+		struct list_head kernel_bo_present;
+		/** @evicted: pinned BO that have been evicted */
+		struct list_head evicted;
+		/** @external_vram: pinned external BO in vram*/
+		struct list_head external_vram;
+	} pinned;
+
+	/** @ufence_wq: user fence wait queue */
+	wait_queue_head_t ufence_wq;
+
+	/** @ordered_wq: used to serialize compute mode resume */
+	struct workqueue_struct *ordered_wq;
+
+	/** @unordered_wq: used to serialize unordered work, mostly display */
+	struct workqueue_struct *unordered_wq;
+
+	/** @tiles: device tiles */
+	struct xe_tile tiles[XE_MAX_TILES_PER_DEVICE];
+
+	/**
+	 * @mem_access: keep track of memory access in the device, possibly
+	 * triggering additional actions when they occur.
+	 */
+	struct {
+		/** @ref: ref count of memory accesses */
+		atomic_t ref;
+	} mem_access;
+
+	/**
+	 * @pat: Encapsulate PAT related stuff
+	 */
+	struct {
+		/** Internal operations to abstract platforms */
+		const struct xe_pat_ops *ops;
+		/** PAT table to program in the HW */
+		const struct xe_pat_table_entry *table;
+		/** Number of PAT entries */
+		int n_entries;
+		u32 idx[__XE_CACHE_LEVEL_COUNT];
+	} pat;
+
+	/** @d3cold: Encapsulate d3cold related stuff */
+	struct {
+		/** capable: Indicates if root port is d3cold capable */
+		bool capable;
+
+		/** @allowed: Indicates if d3cold is a valid device state */
+		bool allowed;
+
+		/** @power_lost: Indicates if card has really lost power. */
+		bool power_lost;
+
+		/**
+		 * @vram_threshold:
+		 *
+		 * This represents the permissible threshold(in megabytes)
+		 * for vram save/restore. d3cold will be disallowed,
+		 * when vram_usages is above or equals the threshold value
+		 * to avoid the vram save/restore latency.
+		 * Default threshold value is 300mb.
+		 */
+		u32 vram_threshold;
+		/** @lock: protect vram_threshold */
+		struct mutex lock;
+	} d3cold;
+
+	/**
+	 * @pm_callback_task: Track the active task that is running in either
+	 * the runtime_suspend or runtime_resume callbacks.
+	 */
+	struct task_struct *pm_callback_task;
+
+	/** @hwmon: hwmon subsystem integration */
+	struct xe_hwmon *hwmon;
+
+	/** @heci_gsc: graphics security controller */
+	struct xe_heci_gsc heci_gsc;
+
+	/** @needs_flr_on_fini: requests function-reset on fini */
+	bool needs_flr_on_fini;
+
+	/* private: */
+
+#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
+	/*
+	 * Any fields below this point are the ones used by display.
+	 * They are temporarily added here so xe_device can be desguised as
+	 * drm_i915_private during build. After cleanup these should go away,
+	 * migrating to the right sub-structs
+	 */
+	struct intel_display display;
+	enum intel_pch pch_type;
+	u16 pch_id;
+
+	struct dram_info {
+		bool wm_lv_0_adjust_needed;
+		u8 num_channels;
+		bool symmetric_memory;
+		enum intel_dram_type {
+			INTEL_DRAM_UNKNOWN,
+			INTEL_DRAM_DDR3,
+			INTEL_DRAM_DDR4,
+			INTEL_DRAM_LPDDR3,
+			INTEL_DRAM_LPDDR4,
+			INTEL_DRAM_DDR5,
+			INTEL_DRAM_LPDDR5,
+		} type;
+		u8 num_qgv_points;
+		u8 num_psf_gv_points;
+	} dram_info;
+
+	/*
+	 * edram size in MB.
+	 * Cannot be determined by PCIID. You must always read a register.
+	 */
+	u32 edram_size_mb;
+
+	/* To shut up runtime pm macros.. */
+	struct xe_runtime_pm {} runtime_pm;
+
+	/* For pcode */
+	struct mutex sb_lock;
+
+	/* Should be in struct intel_display */
+	u32 skl_preferred_vco_freq, max_dotclk_freq, hti_state;
+	u8 snps_phy_failed_calibration;
+	struct drm_atomic_state *modeset_restore_state;
+	struct list_head global_obj_list;
+
+	union {
+		/* only to allow build, not used functionally */
+		u32 irq_mask;
+		u32 de_irq_mask[I915_MAX_PIPES];
+	};
+	u32 pipestat_irq_mask[I915_MAX_PIPES];
+
+	bool display_irqs_enabled;
+	u32 enabled_irq_mask;
+
+	struct intel_uncore {
+		spinlock_t lock;
+	} uncore;
+
+	/* only to allow build, not used functionally */
+	struct {
+		unsigned int hpll_freq;
+		unsigned int czclk_freq;
+		unsigned int fsb_freq, mem_freq, is_ddr3;
+		u8 vblank_enabled;
+	};
+	struct {
+		const char *dmc_firmware_path;
+	} params;
+
+	void *pxp;
+#endif
+};
+
+/**
+ * struct xe_file - file handle for XE driver
+ */
+struct xe_file {
+	/** @xe: xe DEVICE **/
+	struct xe_device *xe;
+
+	/** @drm: base DRM file */
+	struct drm_file *drm;
+
+	/** @vm: VM state for file */
+	struct {
+		/** @xe: xarray to store VMs */
+		struct xarray xa;
+		/** @lock: protects file VM state */
+		struct mutex lock;
+	} vm;
+
+	/** @exec_queue: Submission exec queue state for file */
+	struct {
+		/** @xe: xarray to store engines */
+		struct xarray xa;
+		/** @lock: protects file engine state */
+		struct mutex lock;
+	} exec_queue;
+
+	/** @client: drm client */
+	struct xe_drm_client *client;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_display.c b/drivers/gpu/drm/xe/xe_display.c
new file mode 100644
index 000000000000..e4db069f0db3
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_display.c
@@ -0,0 +1,416 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_display.h"
+#include "regs/xe_regs.h"
+
+#include <linux/fb.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_managed.h>
+#include <drm/xe_drm.h>
+
+#include "soc/intel_dram.h"
+#include "i915_drv.h"		/* FIXME: HAS_DISPLAY() depends on this */
+#include "intel_acpi.h"
+#include "intel_audio.h"
+#include "intel_bw.h"
+#include "intel_display.h"
+#include "intel_display_driver.h"
+#include "intel_display_irq.h"
+#include "intel_display_types.h"
+#include "intel_dmc.h"
+#include "intel_dp.h"
+#include "intel_fbdev.h"
+#include "intel_hdcp.h"
+#include "intel_hotplug.h"
+#include "intel_opregion.h"
+#include "xe_module.h"
+
+/* Xe device functions */
+
+static bool has_display(struct xe_device *xe)
+{
+	return HAS_DISPLAY(xe);
+}
+
+/**
+ * xe_display_driver_probe_defer - Detect if we need to wait for other drivers
+ *				   early on
+ * @pdev: PCI device
+ *
+ * Returns: true if probe needs to be deferred, false otherwise
+ */
+bool xe_display_driver_probe_defer(struct pci_dev *pdev)
+{
+	if (!xe_modparam.enable_display)
+		return 0;
+
+	return intel_display_driver_probe_defer(pdev);
+}
+
+static void xe_display_last_close(struct drm_device *dev)
+{
+	struct xe_device *xe = to_xe_device(dev);
+
+	if (xe->info.enable_display)
+		intel_fbdev_restore_mode(to_xe_device(dev));
+}
+
+/**
+ * xe_display_driver_set_hooks - Add driver flags and hooks for display
+ * @driver: DRM device driver
+ *
+ * Set features and function hooks in @driver that are needed for driving the
+ * display IP. This sets the driver's capability of driving display, regardless
+ * if the device has it enabled
+ */
+void xe_display_driver_set_hooks(struct drm_driver *driver)
+{
+	if (!xe_modparam.enable_display)
+		return;
+
+	driver->driver_features |= DRIVER_MODESET | DRIVER_ATOMIC;
+	driver->lastclose = xe_display_last_close;
+}
+
+static void unset_display_features(struct xe_device *xe)
+{
+	xe->drm.driver_features &= ~(DRIVER_MODESET | DRIVER_ATOMIC);
+}
+
+static void display_destroy(struct drm_device *dev, void *dummy)
+{
+	struct xe_device *xe = to_xe_device(dev);
+
+	destroy_workqueue(xe->display.hotplug.dp_wq);
+}
+
+/**
+ * xe_display_create - create display struct
+ * @xe: XE device instance
+ *
+ * Initialize all fields used by the display part.
+ *
+ * TODO: once everything can be inside a single struct, make the struct opaque
+ * to the rest of xe and return it to be xe->display.
+ *
+ * Returns: 0 on success
+ */
+int xe_display_create(struct xe_device *xe)
+{
+	int err;
+
+	spin_lock_init(&xe->display.fb_tracking.lock);
+
+	xe->display.hotplug.dp_wq = alloc_ordered_workqueue("xe-dp", 0);
+
+	drmm_mutex_init(&xe->drm, &xe->sb_lock);
+	drmm_mutex_init(&xe->drm, &xe->display.backlight.lock);
+	drmm_mutex_init(&xe->drm, &xe->display.audio.mutex);
+	drmm_mutex_init(&xe->drm, &xe->display.wm.wm_mutex);
+	drmm_mutex_init(&xe->drm, &xe->display.pps.mutex);
+	drmm_mutex_init(&xe->drm, &xe->display.hdcp.hdcp_mutex);
+	xe->enabled_irq_mask = ~0;
+
+	err = drmm_add_action_or_reset(&xe->drm, display_destroy, NULL);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static void xe_display_fini_nommio(struct drm_device *dev, void *dummy)
+{
+	struct xe_device *xe = to_xe_device(dev);
+
+	if (!xe->info.enable_display)
+		return;
+
+	intel_power_domains_cleanup(xe);
+}
+
+int xe_display_init_nommio(struct xe_device *xe)
+{
+	if (!xe->info.enable_display)
+		return 0;
+
+	/* Fake uncore lock */
+	spin_lock_init(&xe->uncore.lock);
+
+	/* This must be called before any calls to HAS_PCH_* */
+	intel_detect_pch(xe);
+
+	return drmm_add_action_or_reset(&xe->drm, xe_display_fini_nommio, xe);
+}
+
+static void xe_display_fini_noirq(struct drm_device *dev, void *dummy)
+{
+	struct xe_device *xe = to_xe_device(dev);
+
+	if (!xe->info.enable_display)
+		return;
+
+	intel_display_driver_remove_noirq(xe);
+	intel_power_domains_driver_remove(xe);
+}
+
+int xe_display_init_noirq(struct xe_device *xe)
+{
+	int err;
+
+	if (!xe->info.enable_display)
+		return 0;
+
+	intel_display_driver_early_probe(xe);
+
+	/* Early display init.. */
+	intel_opregion_setup(xe);
+
+	/*
+	 * Fill the dram structure to get the system dram info. This will be
+	 * used for memory latency calculation.
+	 */
+	intel_dram_detect(xe);
+
+	intel_bw_init_hw(xe);
+
+	intel_display_device_info_runtime_init(xe);
+
+	err = intel_display_driver_probe_noirq(xe);
+	if (err)
+		return err;
+
+	return drmm_add_action_or_reset(&xe->drm, xe_display_fini_noirq, NULL);
+}
+
+static void xe_display_fini_noaccel(struct drm_device *dev, void *dummy)
+{
+	struct xe_device *xe = to_xe_device(dev);
+
+	if (!xe->info.enable_display)
+		return;
+
+	intel_display_driver_remove_nogem(xe);
+}
+
+int xe_display_init_noaccel(struct xe_device *xe)
+{
+	int err;
+
+	if (!xe->info.enable_display)
+		return 0;
+
+	err = intel_display_driver_probe_nogem(xe);
+	if (err)
+		return err;
+
+	return drmm_add_action_or_reset(&xe->drm, xe_display_fini_noaccel, NULL);
+}
+
+int xe_display_init(struct xe_device *xe)
+{
+	if (!xe->info.enable_display)
+		return 0;
+
+	return intel_display_driver_probe(xe);
+}
+
+void xe_display_fini(struct xe_device *xe)
+{
+	if (!xe->info.enable_display)
+		return;
+
+	/* poll work can call into fbdev, hence clean that up afterwards */
+	intel_hpd_poll_fini(xe);
+	intel_fbdev_fini(xe);
+
+	intel_hdcp_component_fini(xe);
+	intel_audio_deinit(xe);
+}
+
+void xe_display_register(struct xe_device *xe)
+{
+	if (!xe->info.enable_display)
+		return;
+
+	intel_display_driver_register(xe);
+	intel_register_dsm_handler();
+	intel_power_domains_enable(xe);
+}
+
+void xe_display_unregister(struct xe_device *xe)
+{
+	if (!xe->info.enable_display)
+		return;
+
+	intel_unregister_dsm_handler();
+	intel_power_domains_disable(xe);
+	intel_display_driver_unregister(xe);
+}
+
+void xe_display_driver_remove(struct xe_device *xe)
+{
+	if (!xe->info.enable_display)
+		return;
+
+	intel_display_driver_remove(xe);
+
+	intel_display_device_remove(xe);
+}
+
+/* IRQ-related functions */
+
+void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl)
+{
+	if (!xe->info.enable_display)
+		return;
+
+	if (master_ctl & DISPLAY_IRQ)
+		gen11_display_irq_handler(xe);
+}
+
+void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir)
+{
+	if (!xe->info.enable_display)
+		return;
+
+	if (gu_misc_iir & GU_MISC_GSE)
+		intel_opregion_asle_intr(xe);
+}
+
+void xe_display_irq_reset(struct xe_device *xe)
+{
+	if (!xe->info.enable_display)
+		return;
+
+	gen11_display_irq_reset(xe);
+}
+
+void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
+{
+	if (!xe->info.enable_display)
+		return;
+
+	if (gt->info.id == XE_GT0)
+		gen11_de_irq_postinstall(xe);
+}
+
+static void intel_suspend_encoders(struct xe_device *xe)
+{
+	struct drm_device *dev = &xe->drm;
+	struct intel_encoder *encoder;
+
+	if (has_display(xe))
+		return;
+
+	drm_modeset_lock_all(dev);
+	for_each_intel_encoder(dev, encoder)
+		if (encoder->suspend)
+			encoder->suspend(encoder);
+	drm_modeset_unlock_all(dev);
+}
+
+static bool suspend_to_idle(void)
+{
+#if IS_ENABLED(CONFIG_ACPI_SLEEP)
+	if (acpi_target_system_state() < ACPI_STATE_S3)
+		return true;
+#endif
+	return false;
+}
+
+void xe_display_pm_suspend(struct xe_device *xe)
+{
+	bool s2idle = suspend_to_idle();
+	if (!xe->info.enable_display)
+		return;
+
+	/*
+	 * We do a lot of poking in a lot of registers, make sure they work
+	 * properly.
+	 */
+	intel_power_domains_disable(xe);
+	if (has_display(xe))
+		drm_kms_helper_poll_disable(&xe->drm);
+
+	intel_display_driver_suspend(xe);
+
+	intel_dp_mst_suspend(xe);
+
+	intel_hpd_cancel_work(xe);
+
+	intel_suspend_encoders(xe);
+
+	intel_opregion_suspend(xe, s2idle ? PCI_D1 : PCI_D3cold);
+
+	intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true);
+
+	intel_dmc_suspend(xe);
+}
+
+void xe_display_pm_suspend_late(struct xe_device *xe)
+{
+	bool s2idle = suspend_to_idle();
+	if (!xe->info.enable_display)
+		return;
+
+	intel_power_domains_suspend(xe, s2idle);
+
+	intel_display_power_suspend_late(xe);
+}
+
+void xe_display_pm_resume_early(struct xe_device *xe)
+{
+	if (!xe->info.enable_display)
+		return;
+
+	intel_display_power_resume_early(xe);
+
+	intel_power_domains_resume(xe);
+}
+
+void xe_display_pm_resume(struct xe_device *xe)
+{
+	if (!xe->info.enable_display)
+		return;
+
+	intel_dmc_resume(xe);
+
+	if (has_display(xe))
+		drm_mode_config_reset(&xe->drm);
+
+	intel_display_driver_init_hw(xe);
+	intel_hpd_init(xe);
+
+	/* MST sideband requires HPD interrupts enabled */
+	intel_dp_mst_resume(xe);
+	intel_display_driver_resume(xe);
+
+	intel_hpd_poll_disable(xe);
+	if (has_display(xe))
+		drm_kms_helper_poll_enable(&xe->drm);
+
+	intel_opregion_resume(xe);
+
+	intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_RUNNING, false);
+
+	intel_power_domains_enable(xe);
+}
+
+void xe_display_probe(struct xe_device *xe)
+{
+	if (!xe->info.enable_display)
+		goto no_display;
+
+	intel_display_device_probe(xe);
+
+	if (has_display(xe))
+		return;
+
+no_display:
+	xe->info.enable_display = false;
+	unset_display_features(xe);
+}
diff --git a/drivers/gpu/drm/xe/xe_display.h b/drivers/gpu/drm/xe/xe_display.h
new file mode 100644
index 000000000000..710e56180b52
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_display.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_DISPLAY_H_
+#define _XE_DISPLAY_H_
+
+#include "xe_device.h"
+
+struct drm_driver;
+
+#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
+
+bool xe_display_driver_probe_defer(struct pci_dev *pdev);
+void xe_display_driver_set_hooks(struct drm_driver *driver);
+void xe_display_driver_remove(struct xe_device *xe);
+
+int xe_display_create(struct xe_device *xe);
+
+void xe_display_probe(struct xe_device *xe);
+
+int xe_display_init_nommio(struct xe_device *xe);
+int xe_display_init_noirq(struct xe_device *xe);
+int xe_display_init_noaccel(struct xe_device *xe);
+int xe_display_init(struct xe_device *xe);
+void xe_display_fini(struct xe_device *xe);
+
+void xe_display_register(struct xe_device *xe);
+void xe_display_unregister(struct xe_device *xe);
+
+void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl);
+void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir);
+void xe_display_irq_reset(struct xe_device *xe);
+void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt);
+
+void xe_display_pm_suspend(struct xe_device *xe);
+void xe_display_pm_suspend_late(struct xe_device *xe);
+void xe_display_pm_resume_early(struct xe_device *xe);
+void xe_display_pm_resume(struct xe_device *xe);
+
+#else
+
+static inline int xe_display_driver_probe_defer(struct pci_dev *pdev) { return 0; }
+static inline void xe_display_driver_set_hooks(struct drm_driver *driver) { }
+static inline void xe_display_driver_remove(struct xe_device *xe) {}
+
+static inline int xe_display_create(struct xe_device *xe) { return 0; }
+
+static inline void xe_display_probe(struct xe_device *xe) { }
+
+static inline int xe_display_init_nommio(struct xe_device *xe) { return 0; }
+static inline int xe_display_init_noirq(struct xe_device *xe) { return 0; }
+static inline int xe_display_init_noaccel(struct xe_device *xe) { return 0; }
+static inline int xe_display_init(struct xe_device *xe) { return 0; }
+static inline void xe_display_fini(struct xe_device *xe) {}
+
+static inline void xe_display_register(struct xe_device *xe) {}
+static inline void xe_display_unregister(struct xe_device *xe) {}
+
+static inline void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl) {}
+static inline void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir) {}
+static inline void xe_display_irq_reset(struct xe_device *xe) {}
+static inline void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) {}
+
+static inline void xe_display_pm_suspend(struct xe_device *xe) {}
+static inline void xe_display_pm_suspend_late(struct xe_device *xe) {}
+static inline void xe_display_pm_resume_early(struct xe_device *xe) {}
+static inline void xe_display_pm_resume(struct xe_device *xe) {}
+
+#endif /* CONFIG_DRM_XE_DISPLAY */
+#endif /* _XE_DISPLAY_H_ */
diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c
new file mode 100644
index 000000000000..da2627ed6ae7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_dma_buf.c
@@ -0,0 +1,322 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_dma_buf.h"
+
+#include <kunit/test.h>
+#include <linux/dma-buf.h>
+#include <linux/pci-p2pdma.h>
+
+#include <drm/drm_device.h>
+#include <drm/drm_prime.h>
+#include <drm/ttm/ttm_tt.h>
+
+#include "tests/xe_test.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_ttm_vram_mgr.h"
+#include "xe_vm.h"
+
+MODULE_IMPORT_NS(DMA_BUF);
+
+static int xe_dma_buf_attach(struct dma_buf *dmabuf,
+			     struct dma_buf_attachment *attach)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+
+	if (attach->peer2peer &&
+	    pci_p2pdma_distance(to_pci_dev(obj->dev->dev), attach->dev, false) < 0)
+		attach->peer2peer = false;
+
+	if (!attach->peer2peer && !xe_bo_can_migrate(gem_to_xe_bo(obj), XE_PL_TT))
+		return -EOPNOTSUPP;
+
+	xe_device_mem_access_get(to_xe_device(obj->dev));
+	return 0;
+}
+
+static void xe_dma_buf_detach(struct dma_buf *dmabuf,
+			      struct dma_buf_attachment *attach)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+
+	xe_device_mem_access_put(to_xe_device(obj->dev));
+}
+
+static int xe_dma_buf_pin(struct dma_buf_attachment *attach)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+	struct xe_bo *bo = gem_to_xe_bo(obj);
+	struct xe_device *xe = xe_bo_device(bo);
+	int ret;
+
+	/*
+	 * For now only support pinning in TT memory, for two reasons:
+	 * 1) Avoid pinning in a placement not accessible to some importers.
+	 * 2) Pinning in VRAM requires PIN accounting which is a to-do.
+	 */
+	if (xe_bo_is_pinned(bo) && bo->ttm.resource->placement != XE_PL_TT) {
+		drm_dbg(&xe->drm, "Can't migrate pinned bo for dma-buf pin.\n");
+		return -EINVAL;
+	}
+
+	ret = xe_bo_migrate(bo, XE_PL_TT);
+	if (ret) {
+		if (ret != -EINTR && ret != -ERESTARTSYS)
+			drm_dbg(&xe->drm,
+				"Failed migrating dma-buf to TT memory: %pe\n",
+				ERR_PTR(ret));
+		return ret;
+	}
+
+	ret = xe_bo_pin_external(bo);
+	xe_assert(xe, !ret);
+
+	return 0;
+}
+
+static void xe_dma_buf_unpin(struct dma_buf_attachment *attach)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+	struct xe_bo *bo = gem_to_xe_bo(obj);
+
+	xe_bo_unpin_external(bo);
+}
+
+static struct sg_table *xe_dma_buf_map(struct dma_buf_attachment *attach,
+				       enum dma_data_direction dir)
+{
+	struct dma_buf *dma_buf = attach->dmabuf;
+	struct drm_gem_object *obj = dma_buf->priv;
+	struct xe_bo *bo = gem_to_xe_bo(obj);
+	struct sg_table *sgt;
+	int r = 0;
+
+	if (!attach->peer2peer && !xe_bo_can_migrate(bo, XE_PL_TT))
+		return ERR_PTR(-EOPNOTSUPP);
+
+	if (!xe_bo_is_pinned(bo)) {
+		if (!attach->peer2peer)
+			r = xe_bo_migrate(bo, XE_PL_TT);
+		else
+			r = xe_bo_validate(bo, NULL, false);
+		if (r)
+			return ERR_PTR(r);
+	}
+
+	switch (bo->ttm.resource->mem_type) {
+	case XE_PL_TT:
+		sgt = drm_prime_pages_to_sg(obj->dev,
+					    bo->ttm.ttm->pages,
+					    bo->ttm.ttm->num_pages);
+		if (IS_ERR(sgt))
+			return sgt;
+
+		if (dma_map_sgtable(attach->dev, sgt, dir,
+				    DMA_ATTR_SKIP_CPU_SYNC))
+			goto error_free;
+		break;
+
+	case XE_PL_VRAM0:
+	case XE_PL_VRAM1:
+		r = xe_ttm_vram_mgr_alloc_sgt(xe_bo_device(bo),
+					      bo->ttm.resource, 0,
+					      bo->ttm.base.size, attach->dev,
+					      dir, &sgt);
+		if (r)
+			return ERR_PTR(r);
+		break;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+
+	return sgt;
+
+error_free:
+	sg_free_table(sgt);
+	kfree(sgt);
+	return ERR_PTR(-EBUSY);
+}
+
+static void xe_dma_buf_unmap(struct dma_buf_attachment *attach,
+			     struct sg_table *sgt,
+			     enum dma_data_direction dir)
+{
+	struct dma_buf *dma_buf = attach->dmabuf;
+	struct xe_bo *bo = gem_to_xe_bo(dma_buf->priv);
+
+	if (!xe_bo_is_vram(bo)) {
+		dma_unmap_sgtable(attach->dev, sgt, dir, 0);
+		sg_free_table(sgt);
+		kfree(sgt);
+	} else {
+		xe_ttm_vram_mgr_free_sgt(attach->dev, dir, sgt);
+	}
+}
+
+static int xe_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
+				       enum dma_data_direction direction)
+{
+	struct drm_gem_object *obj = dma_buf->priv;
+	struct xe_bo *bo = gem_to_xe_bo(obj);
+	bool reads =  (direction == DMA_BIDIRECTIONAL ||
+		       direction == DMA_FROM_DEVICE);
+
+	if (!reads)
+		return 0;
+
+	/* Can we do interruptible lock here? */
+	xe_bo_lock(bo, false);
+	(void)xe_bo_migrate(bo, XE_PL_TT);
+	xe_bo_unlock(bo);
+
+	return 0;
+}
+
+static const struct dma_buf_ops xe_dmabuf_ops = {
+	.attach = xe_dma_buf_attach,
+	.detach = xe_dma_buf_detach,
+	.pin = xe_dma_buf_pin,
+	.unpin = xe_dma_buf_unpin,
+	.map_dma_buf = xe_dma_buf_map,
+	.unmap_dma_buf = xe_dma_buf_unmap,
+	.release = drm_gem_dmabuf_release,
+	.begin_cpu_access = xe_dma_buf_begin_cpu_access,
+	.mmap = drm_gem_dmabuf_mmap,
+	.vmap = drm_gem_dmabuf_vmap,
+	.vunmap = drm_gem_dmabuf_vunmap,
+};
+
+struct dma_buf *xe_gem_prime_export(struct drm_gem_object *obj, int flags)
+{
+	struct xe_bo *bo = gem_to_xe_bo(obj);
+	struct dma_buf *buf;
+
+	if (bo->vm)
+		return ERR_PTR(-EPERM);
+
+	buf = drm_gem_prime_export(obj, flags);
+	if (!IS_ERR(buf))
+		buf->ops = &xe_dmabuf_ops;
+
+	return buf;
+}
+
+static struct drm_gem_object *
+xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage,
+		    struct dma_buf *dma_buf)
+{
+	struct dma_resv *resv = dma_buf->resv;
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_bo *bo;
+	int ret;
+
+	dma_resv_lock(resv, NULL);
+	bo = ___xe_bo_create_locked(xe, storage, NULL, resv, NULL, dma_buf->size,
+				    0, /* Will require 1way or 2way for vm_bind */
+				    ttm_bo_type_sg, XE_BO_CREATE_SYSTEM_BIT);
+	if (IS_ERR(bo)) {
+		ret = PTR_ERR(bo);
+		goto error;
+	}
+	dma_resv_unlock(resv);
+
+	return &bo->ttm.base;
+
+error:
+	dma_resv_unlock(resv);
+	return ERR_PTR(ret);
+}
+
+static void xe_dma_buf_move_notify(struct dma_buf_attachment *attach)
+{
+	struct drm_gem_object *obj = attach->importer_priv;
+	struct xe_bo *bo = gem_to_xe_bo(obj);
+
+	XE_WARN_ON(xe_bo_evict(bo, false));
+}
+
+static const struct dma_buf_attach_ops xe_dma_buf_attach_ops = {
+	.allow_peer2peer = true,
+	.move_notify = xe_dma_buf_move_notify
+};
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+
+struct dma_buf_test_params {
+	struct xe_test_priv base;
+	const struct dma_buf_attach_ops *attach_ops;
+	bool force_different_devices;
+	u32 mem_mask;
+};
+
+#define to_dma_buf_test_params(_priv) \
+	container_of(_priv, struct dma_buf_test_params, base)
+#endif
+
+struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev,
+					   struct dma_buf *dma_buf)
+{
+	XE_TEST_DECLARE(struct dma_buf_test_params *test =
+			to_dma_buf_test_params
+			(xe_cur_kunit_priv(XE_TEST_LIVE_DMA_BUF));)
+	const struct dma_buf_attach_ops *attach_ops;
+	struct dma_buf_attachment *attach;
+	struct drm_gem_object *obj;
+	struct xe_bo *bo;
+
+	if (dma_buf->ops == &xe_dmabuf_ops) {
+		obj = dma_buf->priv;
+		if (obj->dev == dev &&
+		    !XE_TEST_ONLY(test && test->force_different_devices)) {
+			/*
+			 * Importing dmabuf exported from out own gem increases
+			 * refcount on gem itself instead of f_count of dmabuf.
+			 */
+			drm_gem_object_get(obj);
+			return obj;
+		}
+	}
+
+	/*
+	 * Don't publish the bo until we have a valid attachment, and a
+	 * valid attachment needs the bo address. So pre-create a bo before
+	 * creating the attachment and publish.
+	 */
+	bo = xe_bo_alloc();
+	if (IS_ERR(bo))
+		return ERR_CAST(bo);
+
+	attach_ops = &xe_dma_buf_attach_ops;
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+	if (test)
+		attach_ops = test->attach_ops;
+#endif
+
+	attach = dma_buf_dynamic_attach(dma_buf, dev->dev, attach_ops, &bo->ttm.base);
+	if (IS_ERR(attach)) {
+		obj = ERR_CAST(attach);
+		goto out_err;
+	}
+
+	/* Errors here will take care of freeing the bo. */
+	obj = xe_dma_buf_init_obj(dev, bo, dma_buf);
+	if (IS_ERR(obj))
+		return obj;
+
+
+	get_dma_buf(dma_buf);
+	obj->import_attach = attach;
+	return obj;
+
+out_err:
+	xe_bo_free(bo);
+
+	return obj;
+}
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_dma_buf.c"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_dma_buf.h b/drivers/gpu/drm/xe/xe_dma_buf.h
new file mode 100644
index 000000000000..861dd28a862c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_dma_buf.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_DMA_BUF_H_
+#define _XE_DMA_BUF_H_
+
+#include <drm/drm_gem.h>
+
+struct dma_buf *xe_gem_prime_export(struct drm_gem_object *obj, int flags);
+struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev,
+					   struct dma_buf *dma_buf);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
new file mode 100644
index 000000000000..6040e4d22b28
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_drm_client.c
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_print.h>
+#include <drm/xe_drm.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include "xe_bo.h"
+#include "xe_bo_types.h"
+#include "xe_device_types.h"
+#include "xe_drm_client.h"
+#include "xe_trace.h"
+
+/**
+ * xe_drm_client_alloc() - Allocate drm client
+ * @void: No arg
+ *
+ * Allocate drm client struct to track client memory against
+ * same till client life. Call this API whenever new client
+ * has opened xe device.
+ *
+ * Return: pointer to client struct or NULL if can't allocate
+ */
+struct xe_drm_client *xe_drm_client_alloc(void)
+{
+	struct xe_drm_client *client;
+
+	client = kzalloc(sizeof(*client), GFP_KERNEL);
+	if (!client)
+		return NULL;
+
+	kref_init(&client->kref);
+
+#ifdef CONFIG_PROC_FS
+	spin_lock_init(&client->bos_lock);
+	INIT_LIST_HEAD(&client->bos_list);
+#endif
+	return client;
+}
+
+/**
+ * __xe_drm_client_free() - Free client struct
+ * @kref: The reference
+ *
+ * This frees client struct. Call this API when xe device is closed
+ * by drm client.
+ *
+ * Return: void
+ */
+void __xe_drm_client_free(struct kref *kref)
+{
+	struct xe_drm_client *client =
+		container_of(kref, typeof(*client), kref);
+
+	kfree(client);
+}
+
+#ifdef CONFIG_PROC_FS
+/**
+ * xe_drm_client_add_bo() - Add BO for tracking client mem usage
+ * @client: The drm client ptr
+ * @bo: The xe BO ptr
+ *
+ * Add all BO created by individual drm client by calling this function.
+ * This helps in tracking client memory usage.
+ *
+ * Return: void
+ */
+void xe_drm_client_add_bo(struct xe_drm_client *client,
+			  struct xe_bo *bo)
+{
+	XE_WARN_ON(bo->client);
+	XE_WARN_ON(!list_empty(&bo->client_link));
+
+	spin_lock(&client->bos_lock);
+	bo->client = xe_drm_client_get(client);
+	list_add_tail_rcu(&bo->client_link, &client->bos_list);
+	spin_unlock(&client->bos_lock);
+}
+
+/**
+ * xe_drm_client_remove_bo() - Remove BO for tracking client mem usage
+ * @bo: The xe BO ptr
+ *
+ * Remove all BO removed by individual drm client by calling this function.
+ * This helps in tracking client memory usage.
+ *
+ * Return: void
+ */
+void xe_drm_client_remove_bo(struct xe_bo *bo)
+{
+	struct xe_drm_client *client = bo->client;
+
+	spin_lock(&client->bos_lock);
+	list_del_rcu(&bo->client_link);
+	spin_unlock(&client->bos_lock);
+
+	xe_drm_client_put(client);
+}
+
+static void bo_meminfo(struct xe_bo *bo,
+		       struct drm_memory_stats stats[TTM_NUM_MEM_TYPES])
+{
+	u64 sz = bo->size;
+	u32 mem_type;
+
+	if (bo->placement.placement)
+		mem_type = bo->placement.placement->mem_type;
+	else
+		mem_type = XE_PL_TT;
+
+	if (bo->ttm.base.handle_count > 1)
+		stats[mem_type].shared += sz;
+	else
+		stats[mem_type].private += sz;
+
+	if (xe_bo_has_pages(bo)) {
+		stats[mem_type].resident += sz;
+
+		if (!dma_resv_test_signaled(bo->ttm.base.resv,
+					    DMA_RESV_USAGE_BOOKKEEP))
+			stats[mem_type].active += sz;
+		else if (mem_type == XE_PL_SYSTEM)
+			stats[mem_type].purgeable += sz;
+	}
+}
+
+static void show_meminfo(struct drm_printer *p, struct drm_file *file)
+{
+	struct drm_memory_stats stats[TTM_NUM_MEM_TYPES] = {};
+	struct xe_file *xef = file->driver_priv;
+	struct ttm_device *bdev = &xef->xe->ttm;
+	struct ttm_resource_manager *man;
+	struct xe_drm_client *client;
+	struct drm_gem_object *obj;
+	struct xe_bo *bo;
+	unsigned int id;
+	u32 mem_type;
+
+	client = xef->client;
+
+	/* Public objects. */
+	spin_lock(&file->table_lock);
+	idr_for_each_entry(&file->object_idr, obj, id) {
+		struct xe_bo *bo = gem_to_xe_bo(obj);
+
+		bo_meminfo(bo, stats);
+	}
+	spin_unlock(&file->table_lock);
+
+	/* Internal objects. */
+	spin_lock(&client->bos_lock);
+	list_for_each_entry_rcu(bo, &client->bos_list, client_link) {
+		if (!bo || !kref_get_unless_zero(&bo->ttm.base.refcount))
+			continue;
+		bo_meminfo(bo, stats);
+		xe_bo_put(bo);
+	}
+	spin_unlock(&client->bos_lock);
+
+	for (mem_type = XE_PL_SYSTEM; mem_type < TTM_NUM_MEM_TYPES; ++mem_type) {
+		if (!xe_mem_type_to_name[mem_type])
+			continue;
+
+		man = ttm_manager_type(bdev, mem_type);
+
+		if (man) {
+			drm_print_memory_stats(p,
+					       &stats[mem_type],
+					       DRM_GEM_OBJECT_RESIDENT |
+					       (mem_type != XE_PL_SYSTEM ? 0 :
+					       DRM_GEM_OBJECT_PURGEABLE),
+					       xe_mem_type_to_name[mem_type]);
+		}
+	}
+}
+
+/**
+ * xe_drm_client_fdinfo() - Callback for fdinfo interface
+ * @p: The drm_printer ptr
+ * @file: The drm_file ptr
+ *
+ * This is callabck for drm fdinfo interface. Register this callback
+ * in drm driver ops for show_fdinfo.
+ *
+ * Return: void
+ */
+void xe_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file)
+{
+	show_meminfo(p, file);
+}
+#endif
diff --git a/drivers/gpu/drm/xe/xe_drm_client.h b/drivers/gpu/drm/xe/xe_drm_client.h
new file mode 100644
index 000000000000..a9649aa36011
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_drm_client.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_DRM_CLIENT_H_
+#define _XE_DRM_CLIENT_H_
+
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/pid.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+
+struct drm_file;
+struct drm_printer;
+struct xe_bo;
+
+struct xe_drm_client {
+	struct kref kref;
+	unsigned int id;
+#ifdef CONFIG_PROC_FS
+	/**
+	 * @bos_lock: lock protecting @bos_list
+	 */
+	spinlock_t bos_lock;
+	/**
+	 * @bos_list: list of bos created by this client
+	 *
+	 * Protected by @bos_lock.
+	 */
+	struct list_head bos_list;
+#endif
+};
+
+	static inline struct xe_drm_client *
+xe_drm_client_get(struct xe_drm_client *client)
+{
+	kref_get(&client->kref);
+	return client;
+}
+
+void __xe_drm_client_free(struct kref *kref);
+
+static inline void xe_drm_client_put(struct xe_drm_client *client)
+{
+	kref_put(&client->kref, __xe_drm_client_free);
+}
+
+struct xe_drm_client *xe_drm_client_alloc(void);
+static inline struct xe_drm_client *
+xe_drm_client_get(struct xe_drm_client *client);
+static inline void xe_drm_client_put(struct xe_drm_client *client);
+#ifdef CONFIG_PROC_FS
+void xe_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file);
+void xe_drm_client_add_bo(struct xe_drm_client *client,
+			  struct xe_bo *bo);
+void xe_drm_client_remove_bo(struct xe_bo *bo);
+#else
+static inline void xe_drm_client_add_bo(struct xe_drm_client *client,
+					struct xe_bo *bo)
+{
+}
+
+static inline void xe_drm_client_remove_bo(struct xe_bo *bo)
+{
+}
+#endif
+#endif
diff --git a/drivers/gpu/drm/xe/xe_drv.h b/drivers/gpu/drm/xe/xe_drv.h
new file mode 100644
index 000000000000..d45b71426cc8
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_drv.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_DRV_H_
+#define _XE_DRV_H_
+
+#include <drm/drm_drv.h>
+
+#define DRIVER_NAME		"xe"
+#define DRIVER_DESC		"Intel Xe Graphics"
+#define DRIVER_DATE		"20201103"
+
+/* Interface history:
+ *
+ * 1.1: Original.
+ */
+#define DRIVER_MAJOR		1
+#define DRIVER_MINOR		1
+#define DRIVER_PATCHLEVEL	0
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
new file mode 100644
index 000000000000..17f26952e665
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -0,0 +1,359 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_exec.h"
+
+#include <drm/drm_device.h>
+#include <drm/drm_exec.h>
+#include <drm/drm_file.h>
+#include <drm/xe_drm.h>
+#include <linux/delay.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_exec_queue.h"
+#include "xe_macros.h"
+#include "xe_ring_ops_types.h"
+#include "xe_sched_job.h"
+#include "xe_sync.h"
+#include "xe_vm.h"
+
+/**
+ * DOC: Execbuf (User GPU command submission)
+ *
+ * Execs have historically been rather complicated in DRM drivers (at least in
+ * the i915) because a few things:
+ *
+ * - Passing in a list BO which are read / written to creating implicit syncs
+ * - Binding at exec time
+ * - Flow controlling the ring at exec time
+ *
+ * In XE we avoid all of this complication by not allowing a BO list to be
+ * passed into an exec, using the dma-buf implicit sync uAPI, have binds as
+ * seperate operations, and using the DRM scheduler to flow control the ring.
+ * Let's deep dive on each of these.
+ *
+ * We can get away from a BO list by forcing the user to use in / out fences on
+ * every exec rather than the kernel tracking dependencies of BO (e.g. if the
+ * user knows an exec writes to a BO and reads from the BO in the next exec, it
+ * is the user's responsibility to pass in / out fence between the two execs).
+ *
+ * Implicit dependencies for external BOs are handled by using the dma-buf
+ * implicit dependency uAPI (TODO: add link). To make this works each exec must
+ * install the job's fence into the DMA_RESV_USAGE_WRITE slot of every external
+ * BO mapped in the VM.
+ *
+ * We do not allow a user to trigger a bind at exec time rather we have a VM
+ * bind IOCTL which uses the same in / out fence interface as exec. In that
+ * sense, a VM bind is basically the same operation as an exec from the user
+ * perspective. e.g. If an exec depends on a VM bind use the in / out fence
+ * interface (struct drm_xe_sync) to synchronize like syncing between two
+ * dependent execs.
+ *
+ * Although a user cannot trigger a bind, we still have to rebind userptrs in
+ * the VM that have been invalidated since the last exec, likewise we also have
+ * to rebind BOs that have been evicted by the kernel. We schedule these rebinds
+ * behind any pending kernel operations on any external BOs in VM or any BOs
+ * private to the VM. This is accomplished by the rebinds waiting on BOs
+ * DMA_RESV_USAGE_KERNEL slot (kernel ops) and kernel ops waiting on all BOs
+ * slots (inflight execs are in the DMA_RESV_USAGE_BOOKING for private BOs and
+ * in DMA_RESV_USAGE_WRITE for external BOs).
+ *
+ * Rebinds / dma-resv usage applies to non-compute mode VMs only as for compute
+ * mode VMs we use preempt fences and a rebind worker (TODO: add link).
+ *
+ * There is no need to flow control the ring in the exec as we write the ring at
+ * submission time and set the DRM scheduler max job limit SIZE_OF_RING /
+ * MAX_JOB_SIZE. The DRM scheduler will then hold all jobs until space in the
+ * ring is available.
+ *
+ * All of this results in a rather simple exec implementation.
+ *
+ * Flow
+ * ~~~~
+ *
+ * .. code-block::
+ *
+ *	Parse input arguments
+ *	Wait for any async VM bind passed as in-fences to start
+ *	<----------------------------------------------------------------------|
+ *	Lock global VM lock in read mode                                       |
+ *	Pin userptrs (also finds userptr invalidated since last exec)          |
+ *	Lock exec (VM dma-resv lock, external BOs dma-resv locks)              |
+ *	Validate BOs that have been evicted                                    |
+ *	Create job                                                             |
+ *	Rebind invalidated userptrs + evicted BOs (non-compute-mode)           |
+ *	Add rebind fence dependency to job                                     |
+ *	Add job VM dma-resv bookkeeping slot (non-compute mode)                |
+ *	Add job to external BOs dma-resv write slots (non-compute mode)        |
+ *	Check if any userptrs invalidated since pin ------ Drop locks ---------|
+ *	Install in / out fences for job
+ *	Submit job
+ *	Unlock all
+ */
+
+static int xe_exec_fn(struct drm_gpuvm_exec *vm_exec)
+{
+	return drm_gpuvm_validate(vm_exec->vm, &vm_exec->exec);
+}
+
+int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_exec *args = data;
+	struct drm_xe_sync __user *syncs_user = u64_to_user_ptr(args->syncs);
+	u64 __user *addresses_user = u64_to_user_ptr(args->address);
+	struct xe_exec_queue *q;
+	struct xe_sync_entry *syncs = NULL;
+	u64 addresses[XE_HW_ENGINE_MAX_INSTANCE];
+	struct drm_gpuvm_exec vm_exec = {.extra.fn = xe_exec_fn};
+	struct drm_exec *exec = &vm_exec.exec;
+	u32 i, num_syncs = 0, num_ufence = 0;
+	struct xe_sched_job *job;
+	struct dma_fence *rebind_fence;
+	struct xe_vm *vm;
+	bool write_locked, skip_retry = false;
+	ktime_t end = 0;
+	int err = 0;
+
+	if (XE_IOCTL_DBG(xe, args->extensions) ||
+	    XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) ||
+	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+		return -EINVAL;
+
+	q = xe_exec_queue_lookup(xef, args->exec_queue_id);
+	if (XE_IOCTL_DBG(xe, !q))
+		return -ENOENT;
+
+	if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_VM))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->num_batch_buffer &&
+			 q->width != args->num_batch_buffer))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_BANNED)) {
+		err = -ECANCELED;
+		goto err_exec_queue;
+	}
+
+	if (args->num_syncs) {
+		syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
+		if (!syncs) {
+			err = -ENOMEM;
+			goto err_exec_queue;
+		}
+	}
+
+	vm = q->vm;
+
+	for (i = 0; i < args->num_syncs; i++) {
+		err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs++],
+					  &syncs_user[i], SYNC_PARSE_FLAG_EXEC |
+					  (xe_vm_in_lr_mode(vm) ?
+					   SYNC_PARSE_FLAG_LR_MODE : 0));
+		if (err)
+			goto err_syncs;
+
+		if (xe_sync_is_ufence(&syncs[i]))
+			num_ufence++;
+	}
+
+	if (XE_IOCTL_DBG(xe, num_ufence > 1)) {
+		err = -EINVAL;
+		goto err_syncs;
+	}
+
+	if (xe_exec_queue_is_parallel(q)) {
+		err = __copy_from_user(addresses, addresses_user, sizeof(u64) *
+				       q->width);
+		if (err) {
+			err = -EFAULT;
+			goto err_syncs;
+		}
+	}
+
+retry:
+	if (!xe_vm_in_lr_mode(vm) && xe_vm_userptr_check_repin(vm)) {
+		err = down_write_killable(&vm->lock);
+		write_locked = true;
+	} else {
+		/* We don't allow execs while the VM is in error state */
+		err = down_read_interruptible(&vm->lock);
+		write_locked = false;
+	}
+	if (err)
+		goto err_syncs;
+
+	if (write_locked) {
+		err = xe_vm_userptr_pin(vm);
+		downgrade_write(&vm->lock);
+		write_locked = false;
+		if (err)
+			goto err_unlock_list;
+	}
+
+	vm_exec.vm = &vm->gpuvm;
+	vm_exec.num_fences = 1 + vm->xe->info.tile_count;
+	vm_exec.flags = DRM_EXEC_INTERRUPTIBLE_WAIT;
+	if (xe_vm_in_lr_mode(vm)) {
+		drm_exec_init(exec, vm_exec.flags, 0);
+	} else {
+		err = drm_gpuvm_exec_lock(&vm_exec);
+		if (err) {
+			if (xe_vm_validate_should_retry(exec, err, &end))
+				err = -EAGAIN;
+			goto err_unlock_list;
+		}
+	}
+
+	if (xe_vm_is_closed_or_banned(q->vm)) {
+		drm_warn(&xe->drm, "Trying to schedule after vm is closed or banned\n");
+		err = -ECANCELED;
+		goto err_exec;
+	}
+
+	if (!args->num_batch_buffer) {
+		if (!xe_vm_in_lr_mode(vm)) {
+			struct dma_fence *fence;
+
+			fence = xe_sync_in_fence_get(syncs, num_syncs, q, vm);
+			if (IS_ERR(fence)) {
+				err = PTR_ERR(fence);
+				goto err_exec;
+			}
+			for (i = 0; i < num_syncs; i++)
+				xe_sync_entry_signal(&syncs[i], NULL, fence);
+			xe_exec_queue_last_fence_set(q, vm, fence);
+			dma_fence_put(fence);
+		}
+
+		goto err_exec;
+	}
+
+	if (xe_exec_queue_is_lr(q) && xe_exec_queue_ring_full(q)) {
+		err = -EWOULDBLOCK;	/* Aliased to -EAGAIN */
+		skip_retry = true;
+		goto err_exec;
+	}
+
+	job = xe_sched_job_create(q, xe_exec_queue_is_parallel(q) ?
+				  addresses : &args->address);
+	if (IS_ERR(job)) {
+		err = PTR_ERR(job);
+		goto err_exec;
+	}
+
+	/*
+	 * Rebind any invalidated userptr or evicted BOs in the VM, non-compute
+	 * VM mode only.
+	 */
+	rebind_fence = xe_vm_rebind(vm, false);
+	if (IS_ERR(rebind_fence)) {
+		err = PTR_ERR(rebind_fence);
+		goto err_put_job;
+	}
+
+	/*
+	 * We store the rebind_fence in the VM so subsequent execs don't get
+	 * scheduled before the rebinds of userptrs / evicted BOs is complete.
+	 */
+	if (rebind_fence) {
+		dma_fence_put(vm->rebind_fence);
+		vm->rebind_fence = rebind_fence;
+	}
+	if (vm->rebind_fence) {
+		if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+			     &vm->rebind_fence->flags)) {
+			dma_fence_put(vm->rebind_fence);
+			vm->rebind_fence = NULL;
+		} else {
+			dma_fence_get(vm->rebind_fence);
+			err = drm_sched_job_add_dependency(&job->drm,
+							   vm->rebind_fence);
+			if (err)
+				goto err_put_job;
+		}
+	}
+
+	/* Wait behind munmap style rebinds */
+	if (!xe_vm_in_lr_mode(vm)) {
+		err = drm_sched_job_add_resv_dependencies(&job->drm,
+							  xe_vm_resv(vm),
+							  DMA_RESV_USAGE_KERNEL);
+		if (err)
+			goto err_put_job;
+	}
+
+	for (i = 0; i < num_syncs && !err; i++)
+		err = xe_sync_entry_add_deps(&syncs[i], job);
+	if (err)
+		goto err_put_job;
+
+	if (!xe_vm_in_lr_mode(vm)) {
+		err = xe_sched_job_last_fence_add_dep(job, vm);
+		if (err)
+			goto err_put_job;
+
+		err = down_read_interruptible(&vm->userptr.notifier_lock);
+		if (err)
+			goto err_put_job;
+
+		err = __xe_vm_userptr_needs_repin(vm);
+		if (err)
+			goto err_repin;
+	}
+
+	/*
+	 * Point of no return, if we error after this point just set an error on
+	 * the job and let the DRM scheduler / backend clean up the job.
+	 */
+	xe_sched_job_arm(job);
+	if (!xe_vm_in_lr_mode(vm))
+		drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, &job->drm.s_fence->finished,
+					 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_WRITE);
+
+	for (i = 0; i < num_syncs; i++)
+		xe_sync_entry_signal(&syncs[i], job,
+				     &job->drm.s_fence->finished);
+
+	if (xe_exec_queue_is_lr(q))
+		q->ring_ops->emit_job(job);
+	if (!xe_vm_in_lr_mode(vm))
+		xe_exec_queue_last_fence_set(q, vm, &job->drm.s_fence->finished);
+	xe_sched_job_push(job);
+	xe_vm_reactivate_rebind(vm);
+
+	if (!err && !xe_vm_in_lr_mode(vm)) {
+		spin_lock(&xe->ttm.lru_lock);
+		ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
+		spin_unlock(&xe->ttm.lru_lock);
+	}
+
+err_repin:
+	if (!xe_vm_in_lr_mode(vm))
+		up_read(&vm->userptr.notifier_lock);
+err_put_job:
+	if (err)
+		xe_sched_job_put(job);
+err_exec:
+	drm_exec_fini(exec);
+err_unlock_list:
+	if (write_locked)
+		up_write(&vm->lock);
+	else
+		up_read(&vm->lock);
+	if (err == -EAGAIN && !skip_retry)
+		goto retry;
+err_syncs:
+	for (i = 0; i < num_syncs; i++)
+		xe_sync_entry_cleanup(&syncs[i]);
+	kfree(syncs);
+err_exec_queue:
+	xe_exec_queue_put(q);
+
+	return err;
+}
diff --git a/drivers/gpu/drm/xe/xe_exec.h b/drivers/gpu/drm/xe/xe_exec.h
new file mode 100644
index 000000000000..e4932494cea3
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_exec.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_EXEC_H_
+#define _XE_EXEC_H_
+
+struct drm_device;
+struct drm_file;
+
+int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
new file mode 100644
index 000000000000..49223026c89f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -0,0 +1,860 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_exec_queue.h"
+
+#include <linux/nospec.h>
+
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/xe_drm.h>
+
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_hw_engine_class_sysfs.h"
+#include "xe_hw_fence.h"
+#include "xe_lrc.h"
+#include "xe_macros.h"
+#include "xe_migrate.h"
+#include "xe_pm.h"
+#include "xe_ring_ops_types.h"
+#include "xe_trace.h"
+#include "xe_vm.h"
+
+enum xe_exec_queue_sched_prop {
+	XE_EXEC_QUEUE_JOB_TIMEOUT = 0,
+	XE_EXEC_QUEUE_TIMESLICE = 1,
+	XE_EXEC_QUEUE_PREEMPT_TIMEOUT = 2,
+	XE_EXEC_QUEUE_SCHED_PROP_MAX = 3,
+};
+
+static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe,
+						    struct xe_vm *vm,
+						    u32 logical_mask,
+						    u16 width, struct xe_hw_engine *hwe,
+						    u32 flags)
+{
+	struct xe_exec_queue *q;
+	struct xe_gt *gt = hwe->gt;
+	int err;
+	int i;
+
+	/* only kernel queues can be permanent */
+	XE_WARN_ON((flags & EXEC_QUEUE_FLAG_PERMANENT) && !(flags & EXEC_QUEUE_FLAG_KERNEL));
+
+	q = kzalloc(sizeof(*q) + sizeof(struct xe_lrc) * width, GFP_KERNEL);
+	if (!q)
+		return ERR_PTR(-ENOMEM);
+
+	kref_init(&q->refcount);
+	q->flags = flags;
+	q->hwe = hwe;
+	q->gt = gt;
+	if (vm)
+		q->vm = xe_vm_get(vm);
+	q->class = hwe->class;
+	q->width = width;
+	q->logical_mask = logical_mask;
+	q->fence_irq = &gt->fence_irq[hwe->class];
+	q->ring_ops = gt->ring_ops[hwe->class];
+	q->ops = gt->exec_queue_ops;
+	INIT_LIST_HEAD(&q->compute.link);
+	INIT_LIST_HEAD(&q->multi_gt_link);
+
+	q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us;
+	q->sched_props.preempt_timeout_us =
+				hwe->eclass->sched_props.preempt_timeout_us;
+	if (q->flags & EXEC_QUEUE_FLAG_KERNEL &&
+	    q->flags & EXEC_QUEUE_FLAG_HIGH_PRIORITY)
+		q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_KERNEL;
+	else
+		q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL;
+
+	if (xe_exec_queue_is_parallel(q)) {
+		q->parallel.composite_fence_ctx = dma_fence_context_alloc(1);
+		q->parallel.composite_fence_seqno = XE_FENCE_INITIAL_SEQNO;
+	}
+	if (q->flags & EXEC_QUEUE_FLAG_VM) {
+		q->bind.fence_ctx = dma_fence_context_alloc(1);
+		q->bind.fence_seqno = XE_FENCE_INITIAL_SEQNO;
+	}
+
+	for (i = 0; i < width; ++i) {
+		err = xe_lrc_init(q->lrc + i, hwe, q, vm, SZ_16K);
+		if (err)
+			goto err_lrc;
+	}
+
+	err = q->ops->init(q);
+	if (err)
+		goto err_lrc;
+
+	/*
+	 * Normally the user vm holds an rpm ref to keep the device
+	 * awake, and the context holds a ref for the vm, however for
+	 * some engines we use the kernels migrate vm underneath which offers no
+	 * such rpm ref, or we lack a vm. Make sure we keep a ref here, so we
+	 * can perform GuC CT actions when needed. Caller is expected to have
+	 * already grabbed the rpm ref outside any sensitive locks.
+	 */
+	if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !vm))
+		drm_WARN_ON(&xe->drm, !xe_device_mem_access_get_if_ongoing(xe));
+
+	return q;
+
+err_lrc:
+	for (i = i - 1; i >= 0; --i)
+		xe_lrc_finish(q->lrc + i);
+	kfree(q);
+	return ERR_PTR(err);
+}
+
+struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm,
+					   u32 logical_mask, u16 width,
+					   struct xe_hw_engine *hwe, u32 flags)
+{
+	struct xe_exec_queue *q;
+	int err;
+
+	if (vm) {
+		err = xe_vm_lock(vm, true);
+		if (err)
+			return ERR_PTR(err);
+	}
+	q = __xe_exec_queue_create(xe, vm, logical_mask, width, hwe, flags);
+	if (vm)
+		xe_vm_unlock(vm);
+
+	return q;
+}
+
+struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt,
+						 struct xe_vm *vm,
+						 enum xe_engine_class class, u32 flags)
+{
+	struct xe_hw_engine *hwe, *hwe0 = NULL;
+	enum xe_hw_engine_id id;
+	u32 logical_mask = 0;
+
+	for_each_hw_engine(hwe, gt, id) {
+		if (xe_hw_engine_is_reserved(hwe))
+			continue;
+
+		if (hwe->class == class) {
+			logical_mask |= BIT(hwe->logical_instance);
+			if (!hwe0)
+				hwe0 = hwe;
+		}
+	}
+
+	if (!logical_mask)
+		return ERR_PTR(-ENODEV);
+
+	return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags);
+}
+
+void xe_exec_queue_destroy(struct kref *ref)
+{
+	struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount);
+	struct xe_exec_queue *eq, *next;
+
+	xe_exec_queue_last_fence_put_unlocked(q);
+	if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) {
+		list_for_each_entry_safe(eq, next, &q->multi_gt_list,
+					 multi_gt_link)
+			xe_exec_queue_put(eq);
+	}
+
+	q->ops->fini(q);
+}
+
+void xe_exec_queue_fini(struct xe_exec_queue *q)
+{
+	int i;
+
+	for (i = 0; i < q->width; ++i)
+		xe_lrc_finish(q->lrc + i);
+	if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !q->vm))
+		xe_device_mem_access_put(gt_to_xe(q->gt));
+	if (q->vm)
+		xe_vm_put(q->vm);
+
+	kfree(q);
+}
+
+void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance)
+{
+	switch (q->class) {
+	case XE_ENGINE_CLASS_RENDER:
+		sprintf(q->name, "rcs%d", instance);
+		break;
+	case XE_ENGINE_CLASS_VIDEO_DECODE:
+		sprintf(q->name, "vcs%d", instance);
+		break;
+	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+		sprintf(q->name, "vecs%d", instance);
+		break;
+	case XE_ENGINE_CLASS_COPY:
+		sprintf(q->name, "bcs%d", instance);
+		break;
+	case XE_ENGINE_CLASS_COMPUTE:
+		sprintf(q->name, "ccs%d", instance);
+		break;
+	case XE_ENGINE_CLASS_OTHER:
+		sprintf(q->name, "gsccs%d", instance);
+		break;
+	default:
+		XE_WARN_ON(q->class);
+	}
+}
+
+struct xe_exec_queue *xe_exec_queue_lookup(struct xe_file *xef, u32 id)
+{
+	struct xe_exec_queue *q;
+
+	mutex_lock(&xef->exec_queue.lock);
+	q = xa_load(&xef->exec_queue.xa, id);
+	if (q)
+		xe_exec_queue_get(q);
+	mutex_unlock(&xef->exec_queue.lock);
+
+	return q;
+}
+
+enum xe_exec_queue_priority
+xe_exec_queue_device_get_max_priority(struct xe_device *xe)
+{
+	return capable(CAP_SYS_NICE) ? XE_EXEC_QUEUE_PRIORITY_HIGH :
+				       XE_EXEC_QUEUE_PRIORITY_NORMAL;
+}
+
+static int exec_queue_set_priority(struct xe_device *xe, struct xe_exec_queue *q,
+				   u64 value, bool create)
+{
+	if (XE_IOCTL_DBG(xe, value > XE_EXEC_QUEUE_PRIORITY_HIGH))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, value > xe_exec_queue_device_get_max_priority(xe)))
+		return -EPERM;
+
+	return q->ops->set_priority(q, value);
+}
+
+static bool xe_exec_queue_enforce_schedule_limit(void)
+{
+#if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT)
+	return true;
+#else
+	return !capable(CAP_SYS_NICE);
+#endif
+}
+
+static void
+xe_exec_queue_get_prop_minmax(struct xe_hw_engine_class_intf *eclass,
+			      enum xe_exec_queue_sched_prop prop,
+			      u32 *min, u32 *max)
+{
+	switch (prop) {
+	case XE_EXEC_QUEUE_JOB_TIMEOUT:
+		*min = eclass->sched_props.job_timeout_min;
+		*max = eclass->sched_props.job_timeout_max;
+		break;
+	case XE_EXEC_QUEUE_TIMESLICE:
+		*min = eclass->sched_props.timeslice_min;
+		*max = eclass->sched_props.timeslice_max;
+		break;
+	case XE_EXEC_QUEUE_PREEMPT_TIMEOUT:
+		*min = eclass->sched_props.preempt_timeout_min;
+		*max = eclass->sched_props.preempt_timeout_max;
+		break;
+	default:
+		break;
+	}
+#if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT)
+	if (capable(CAP_SYS_NICE)) {
+		switch (prop) {
+		case XE_EXEC_QUEUE_JOB_TIMEOUT:
+			*min = XE_HW_ENGINE_JOB_TIMEOUT_MIN;
+			*max = XE_HW_ENGINE_JOB_TIMEOUT_MAX;
+			break;
+		case XE_EXEC_QUEUE_TIMESLICE:
+			*min = XE_HW_ENGINE_TIMESLICE_MIN;
+			*max = XE_HW_ENGINE_TIMESLICE_MAX;
+			break;
+		case XE_EXEC_QUEUE_PREEMPT_TIMEOUT:
+			*min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN;
+			*max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX;
+			break;
+		default:
+			break;
+		}
+	}
+#endif
+}
+
+static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *q,
+				    u64 value, bool create)
+{
+	u32 min = 0, max = 0;
+
+	xe_exec_queue_get_prop_minmax(q->hwe->eclass,
+				      XE_EXEC_QUEUE_TIMESLICE, &min, &max);
+
+	if (xe_exec_queue_enforce_schedule_limit() &&
+	    !xe_hw_engine_timeout_in_range(value, min, max))
+		return -EINVAL;
+
+	return q->ops->set_timeslice(q, value);
+}
+
+typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe,
+					     struct xe_exec_queue *q,
+					     u64 value, bool create);
+
+static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = {
+	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority,
+	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice,
+};
+
+static int exec_queue_user_ext_set_property(struct xe_device *xe,
+					    struct xe_exec_queue *q,
+					    u64 extension,
+					    bool create)
+{
+	u64 __user *address = u64_to_user_ptr(extension);
+	struct drm_xe_ext_set_property ext;
+	int err;
+	u32 idx;
+
+	err = __copy_from_user(&ext, address, sizeof(ext));
+	if (XE_IOCTL_DBG(xe, err))
+		return -EFAULT;
+
+	if (XE_IOCTL_DBG(xe, ext.property >=
+			 ARRAY_SIZE(exec_queue_set_property_funcs)) ||
+	    XE_IOCTL_DBG(xe, ext.pad) ||
+	    XE_IOCTL_DBG(xe, ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY &&
+			 ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE))
+		return -EINVAL;
+
+	idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs));
+	if (!exec_queue_set_property_funcs[idx])
+		return -EINVAL;
+
+	return exec_queue_set_property_funcs[idx](xe, q, ext.value,  create);
+}
+
+typedef int (*xe_exec_queue_user_extension_fn)(struct xe_device *xe,
+					       struct xe_exec_queue *q,
+					       u64 extension,
+					       bool create);
+
+static const xe_exec_queue_set_property_fn exec_queue_user_extension_funcs[] = {
+	[DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY] = exec_queue_user_ext_set_property,
+};
+
+#define MAX_USER_EXTENSIONS	16
+static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q,
+				      u64 extensions, int ext_number, bool create)
+{
+	u64 __user *address = u64_to_user_ptr(extensions);
+	struct drm_xe_user_extension ext;
+	int err;
+	u32 idx;
+
+	if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS))
+		return -E2BIG;
+
+	err = __copy_from_user(&ext, address, sizeof(ext));
+	if (XE_IOCTL_DBG(xe, err))
+		return -EFAULT;
+
+	if (XE_IOCTL_DBG(xe, ext.pad) ||
+	    XE_IOCTL_DBG(xe, ext.name >=
+			 ARRAY_SIZE(exec_queue_user_extension_funcs)))
+		return -EINVAL;
+
+	idx = array_index_nospec(ext.name,
+				 ARRAY_SIZE(exec_queue_user_extension_funcs));
+	err = exec_queue_user_extension_funcs[idx](xe, q, extensions, create);
+	if (XE_IOCTL_DBG(xe, err))
+		return err;
+
+	if (ext.next_extension)
+		return exec_queue_user_extensions(xe, q, ext.next_extension,
+					      ++ext_number, create);
+
+	return 0;
+}
+
+static const enum xe_engine_class user_to_xe_engine_class[] = {
+	[DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER,
+	[DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY,
+	[DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE,
+	[DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE,
+	[DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE,
+};
+
+static struct xe_hw_engine *
+find_hw_engine(struct xe_device *xe,
+	       struct drm_xe_engine_class_instance eci)
+{
+	u32 idx;
+
+	if (eci.engine_class > ARRAY_SIZE(user_to_xe_engine_class))
+		return NULL;
+
+	if (eci.gt_id >= xe->info.gt_count)
+		return NULL;
+
+	idx = array_index_nospec(eci.engine_class,
+				 ARRAY_SIZE(user_to_xe_engine_class));
+
+	return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id),
+			       user_to_xe_engine_class[idx],
+			       eci.engine_instance, true);
+}
+
+static u32 bind_exec_queue_logical_mask(struct xe_device *xe, struct xe_gt *gt,
+					struct drm_xe_engine_class_instance *eci,
+					u16 width, u16 num_placements)
+{
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	u32 logical_mask = 0;
+
+	if (XE_IOCTL_DBG(xe, width != 1))
+		return 0;
+	if (XE_IOCTL_DBG(xe, num_placements != 1))
+		return 0;
+	if (XE_IOCTL_DBG(xe, eci[0].engine_instance != 0))
+		return 0;
+
+	eci[0].engine_class = DRM_XE_ENGINE_CLASS_COPY;
+
+	for_each_hw_engine(hwe, gt, id) {
+		if (xe_hw_engine_is_reserved(hwe))
+			continue;
+
+		if (hwe->class ==
+		    user_to_xe_engine_class[DRM_XE_ENGINE_CLASS_COPY])
+			logical_mask |= BIT(hwe->logical_instance);
+	}
+
+	return logical_mask;
+}
+
+static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt,
+				      struct drm_xe_engine_class_instance *eci,
+				      u16 width, u16 num_placements)
+{
+	int len = width * num_placements;
+	int i, j, n;
+	u16 class;
+	u16 gt_id;
+	u32 return_mask = 0, prev_mask;
+
+	if (XE_IOCTL_DBG(xe, !xe_device_uc_enabled(xe) &&
+			 len > 1))
+		return 0;
+
+	for (i = 0; i < width; ++i) {
+		u32 current_mask = 0;
+
+		for (j = 0; j < num_placements; ++j) {
+			struct xe_hw_engine *hwe;
+
+			n = j * width + i;
+
+			hwe = find_hw_engine(xe, eci[n]);
+			if (XE_IOCTL_DBG(xe, !hwe))
+				return 0;
+
+			if (XE_IOCTL_DBG(xe, xe_hw_engine_is_reserved(hwe)))
+				return 0;
+
+			if (XE_IOCTL_DBG(xe, n && eci[n].gt_id != gt_id) ||
+			    XE_IOCTL_DBG(xe, n && eci[n].engine_class != class))
+				return 0;
+
+			class = eci[n].engine_class;
+			gt_id = eci[n].gt_id;
+
+			if (width == 1 || !i)
+				return_mask |= BIT(eci[n].engine_instance);
+			current_mask |= BIT(eci[n].engine_instance);
+		}
+
+		/* Parallel submissions must be logically contiguous */
+		if (i && XE_IOCTL_DBG(xe, current_mask != prev_mask << 1))
+			return 0;
+
+		prev_mask = current_mask;
+	}
+
+	return return_mask;
+}
+
+int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_exec_queue_create *args = data;
+	struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE];
+	struct drm_xe_engine_class_instance __user *user_eci =
+		u64_to_user_ptr(args->instances);
+	struct xe_hw_engine *hwe;
+	struct xe_vm *vm, *migrate_vm;
+	struct xe_gt *gt;
+	struct xe_exec_queue *q = NULL;
+	u32 logical_mask;
+	u32 id;
+	u32 len;
+	int err;
+
+	if (XE_IOCTL_DBG(xe, args->flags) ||
+	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+		return -EINVAL;
+
+	len = args->width * args->num_placements;
+	if (XE_IOCTL_DBG(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE))
+		return -EINVAL;
+
+	err = __copy_from_user(eci, user_eci,
+			       sizeof(struct drm_xe_engine_class_instance) *
+			       len);
+	if (XE_IOCTL_DBG(xe, err))
+		return -EFAULT;
+
+	if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.gt_count))
+		return -EINVAL;
+
+	if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) {
+		for_each_gt(gt, xe, id) {
+			struct xe_exec_queue *new;
+
+			if (xe_gt_is_media_type(gt))
+				continue;
+
+			eci[0].gt_id = gt->info.id;
+			logical_mask = bind_exec_queue_logical_mask(xe, gt, eci,
+								    args->width,
+								    args->num_placements);
+			if (XE_IOCTL_DBG(xe, !logical_mask))
+				return -EINVAL;
+
+			hwe = find_hw_engine(xe, eci[0]);
+			if (XE_IOCTL_DBG(xe, !hwe))
+				return -EINVAL;
+
+			/* The migration vm doesn't hold rpm ref */
+			xe_device_mem_access_get(xe);
+
+			migrate_vm = xe_migrate_get_vm(gt_to_tile(gt)->migrate);
+			new = xe_exec_queue_create(xe, migrate_vm, logical_mask,
+						   args->width, hwe,
+						   EXEC_QUEUE_FLAG_PERSISTENT |
+						   EXEC_QUEUE_FLAG_VM |
+						   (id ?
+						    EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD :
+						    0));
+
+			xe_device_mem_access_put(xe); /* now held by engine */
+
+			xe_vm_put(migrate_vm);
+			if (IS_ERR(new)) {
+				err = PTR_ERR(new);
+				if (q)
+					goto put_exec_queue;
+				return err;
+			}
+			if (id == 0)
+				q = new;
+			else
+				list_add_tail(&new->multi_gt_list,
+					      &q->multi_gt_link);
+		}
+	} else {
+		gt = xe_device_get_gt(xe, eci[0].gt_id);
+		logical_mask = calc_validate_logical_mask(xe, gt, eci,
+							  args->width,
+							  args->num_placements);
+		if (XE_IOCTL_DBG(xe, !logical_mask))
+			return -EINVAL;
+
+		hwe = find_hw_engine(xe, eci[0]);
+		if (XE_IOCTL_DBG(xe, !hwe))
+			return -EINVAL;
+
+		vm = xe_vm_lookup(xef, args->vm_id);
+		if (XE_IOCTL_DBG(xe, !vm))
+			return -ENOENT;
+
+		err = down_read_interruptible(&vm->lock);
+		if (err) {
+			xe_vm_put(vm);
+			return err;
+		}
+
+		if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
+			up_read(&vm->lock);
+			xe_vm_put(vm);
+			return -ENOENT;
+		}
+
+		q = xe_exec_queue_create(xe, vm, logical_mask,
+					 args->width, hwe, 0);
+		up_read(&vm->lock);
+		xe_vm_put(vm);
+		if (IS_ERR(q))
+			return PTR_ERR(q);
+
+		if (xe_vm_in_preempt_fence_mode(vm)) {
+			q->compute.context = dma_fence_context_alloc(1);
+			spin_lock_init(&q->compute.lock);
+
+			err = xe_vm_add_compute_exec_queue(vm, q);
+			if (XE_IOCTL_DBG(xe, err))
+				goto put_exec_queue;
+		}
+	}
+
+	if (args->extensions) {
+		err = exec_queue_user_extensions(xe, q, args->extensions, 0, true);
+		if (XE_IOCTL_DBG(xe, err))
+			goto kill_exec_queue;
+	}
+
+	mutex_lock(&xef->exec_queue.lock);
+	err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL);
+	mutex_unlock(&xef->exec_queue.lock);
+	if (err)
+		goto kill_exec_queue;
+
+	args->exec_queue_id = id;
+
+	return 0;
+
+kill_exec_queue:
+	xe_exec_queue_kill(q);
+put_exec_queue:
+	xe_exec_queue_put(q);
+	return err;
+}
+
+int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data,
+				     struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_exec_queue_get_property *args = data;
+	struct xe_exec_queue *q;
+	int ret;
+
+	if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+		return -EINVAL;
+
+	q = xe_exec_queue_lookup(xef, args->exec_queue_id);
+	if (XE_IOCTL_DBG(xe, !q))
+		return -ENOENT;
+
+	switch (args->property) {
+	case DRM_XE_EXEC_QUEUE_GET_PROPERTY_BAN:
+		args->value = !!(q->flags & EXEC_QUEUE_FLAG_BANNED);
+		ret = 0;
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	xe_exec_queue_put(q);
+
+	return ret;
+}
+
+/**
+ * xe_exec_queue_is_lr() - Whether an exec_queue is long-running
+ * @q: The exec_queue
+ *
+ * Return: True if the exec_queue is long-running, false otherwise.
+ */
+bool xe_exec_queue_is_lr(struct xe_exec_queue *q)
+{
+	return q->vm && xe_vm_in_lr_mode(q->vm) &&
+		!(q->flags & EXEC_QUEUE_FLAG_VM);
+}
+
+static s32 xe_exec_queue_num_job_inflight(struct xe_exec_queue *q)
+{
+	return q->lrc->fence_ctx.next_seqno - xe_lrc_seqno(q->lrc) - 1;
+}
+
+/**
+ * xe_exec_queue_ring_full() - Whether an exec_queue's ring is full
+ * @q: The exec_queue
+ *
+ * Return: True if the exec_queue's ring is full, false otherwise.
+ */
+bool xe_exec_queue_ring_full(struct xe_exec_queue *q)
+{
+	struct xe_lrc *lrc = q->lrc;
+	s32 max_job = lrc->ring.size / MAX_JOB_SIZE_BYTES;
+
+	return xe_exec_queue_num_job_inflight(q) >= max_job;
+}
+
+/**
+ * xe_exec_queue_is_idle() - Whether an exec_queue is idle.
+ * @q: The exec_queue
+ *
+ * FIXME: Need to determine what to use as the short-lived
+ * timeline lock for the exec_queues, so that the return value
+ * of this function becomes more than just an advisory
+ * snapshot in time. The timeline lock must protect the
+ * seqno from racing submissions on the same exec_queue.
+ * Typically vm->resv, but user-created timeline locks use the migrate vm
+ * and never grabs the migrate vm->resv so we have a race there.
+ *
+ * Return: True if the exec_queue is idle, false otherwise.
+ */
+bool xe_exec_queue_is_idle(struct xe_exec_queue *q)
+{
+	if (xe_exec_queue_is_parallel(q)) {
+		int i;
+
+		for (i = 0; i < q->width; ++i) {
+			if (xe_lrc_seqno(&q->lrc[i]) !=
+			    q->lrc[i].fence_ctx.next_seqno - 1)
+				return false;
+		}
+
+		return true;
+	}
+
+	return xe_lrc_seqno(&q->lrc[0]) ==
+		q->lrc[0].fence_ctx.next_seqno - 1;
+}
+
+void xe_exec_queue_kill(struct xe_exec_queue *q)
+{
+	struct xe_exec_queue *eq = q, *next;
+
+	list_for_each_entry_safe(eq, next, &eq->multi_gt_list,
+				 multi_gt_link) {
+		q->ops->kill(eq);
+		xe_vm_remove_compute_exec_queue(q->vm, eq);
+	}
+
+	q->ops->kill(q);
+	xe_vm_remove_compute_exec_queue(q->vm, q);
+}
+
+int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_exec_queue_destroy *args = data;
+	struct xe_exec_queue *q;
+
+	if (XE_IOCTL_DBG(xe, args->pad) ||
+	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+		return -EINVAL;
+
+	mutex_lock(&xef->exec_queue.lock);
+	q = xa_erase(&xef->exec_queue.xa, args->exec_queue_id);
+	mutex_unlock(&xef->exec_queue.lock);
+	if (XE_IOCTL_DBG(xe, !q))
+		return -ENOENT;
+
+	xe_exec_queue_kill(q);
+
+	trace_xe_exec_queue_close(q);
+	xe_exec_queue_put(q);
+
+	return 0;
+}
+
+static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q,
+						    struct xe_vm *vm)
+{
+	if (q->flags & EXEC_QUEUE_FLAG_VM)
+		lockdep_assert_held(&vm->lock);
+	else
+		xe_vm_assert_held(vm);
+}
+
+/**
+ * xe_exec_queue_last_fence_put() - Drop ref to last fence
+ * @q: The exec queue
+ * @vm: The VM the engine does a bind or exec for
+ */
+void xe_exec_queue_last_fence_put(struct xe_exec_queue *q, struct xe_vm *vm)
+{
+	xe_exec_queue_last_fence_lockdep_assert(q, vm);
+
+	if (q->last_fence) {
+		dma_fence_put(q->last_fence);
+		q->last_fence = NULL;
+	}
+}
+
+/**
+ * xe_exec_queue_last_fence_put_unlocked() - Drop ref to last fence unlocked
+ * @q: The exec queue
+ *
+ * Only safe to be called from xe_exec_queue_destroy().
+ */
+void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *q)
+{
+	if (q->last_fence) {
+		dma_fence_put(q->last_fence);
+		q->last_fence = NULL;
+	}
+}
+
+/**
+ * xe_exec_queue_last_fence_get() - Get last fence
+ * @q: The exec queue
+ * @vm: The VM the engine does a bind or exec for
+ *
+ * Get last fence, takes a ref
+ *
+ * Returns: last fence if not signaled, dma fence stub if signaled
+ */
+struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *q,
+					       struct xe_vm *vm)
+{
+	struct dma_fence *fence;
+
+	xe_exec_queue_last_fence_lockdep_assert(q, vm);
+
+	if (q->last_fence &&
+	    test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags))
+		xe_exec_queue_last_fence_put(q, vm);
+
+	fence = q->last_fence ? q->last_fence : dma_fence_get_stub();
+	dma_fence_get(fence);
+	return fence;
+}
+
+/**
+ * xe_exec_queue_last_fence_set() - Set last fence
+ * @q: The exec queue
+ * @vm: The VM the engine does a bind or exec for
+ * @fence: The fence
+ *
+ * Set the last fence for the engine. Increases reference count for fence, when
+ * closing engine xe_exec_queue_last_fence_put should be called.
+ */
+void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm,
+				  struct dma_fence *fence)
+{
+	xe_exec_queue_last_fence_lockdep_assert(q, vm);
+
+	xe_exec_queue_last_fence_put(q, vm);
+	q->last_fence = dma_fence_get(fence);
+}
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h
new file mode 100644
index 000000000000..d959cc4a1a82
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_exec_queue.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_EXEC_QUEUE_H_
+#define _XE_EXEC_QUEUE_H_
+
+#include "xe_exec_queue_types.h"
+#include "xe_vm_types.h"
+
+struct drm_device;
+struct drm_file;
+struct xe_device;
+struct xe_file;
+
+struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm,
+					   u32 logical_mask, u16 width,
+					   struct xe_hw_engine *hw_engine, u32 flags);
+struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt,
+						 struct xe_vm *vm,
+						 enum xe_engine_class class, u32 flags);
+
+void xe_exec_queue_fini(struct xe_exec_queue *q);
+void xe_exec_queue_destroy(struct kref *ref);
+void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance);
+
+struct xe_exec_queue *xe_exec_queue_lookup(struct xe_file *xef, u32 id);
+
+static inline struct xe_exec_queue *xe_exec_queue_get(struct xe_exec_queue *q)
+{
+	kref_get(&q->refcount);
+	return q;
+}
+
+static inline void xe_exec_queue_put(struct xe_exec_queue *q)
+{
+	kref_put(&q->refcount, xe_exec_queue_destroy);
+}
+
+static inline bool xe_exec_queue_is_parallel(struct xe_exec_queue *q)
+{
+	return q->width > 1;
+}
+
+bool xe_exec_queue_is_lr(struct xe_exec_queue *q);
+
+bool xe_exec_queue_ring_full(struct xe_exec_queue *q);
+
+bool xe_exec_queue_is_idle(struct xe_exec_queue *q);
+
+void xe_exec_queue_kill(struct xe_exec_queue *q);
+
+int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file);
+int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *file);
+int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data,
+				     struct drm_file *file);
+enum xe_exec_queue_priority xe_exec_queue_device_get_max_priority(struct xe_device *xe);
+
+void xe_exec_queue_last_fence_put(struct xe_exec_queue *e, struct xe_vm *vm);
+void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *e);
+struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *e,
+					       struct xe_vm *vm);
+void xe_exec_queue_last_fence_set(struct xe_exec_queue *e, struct xe_vm *vm,
+				  struct dma_fence *fence);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
new file mode 100644
index 000000000000..36f4901d8d7e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -0,0 +1,204 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_EXEC_QUEUE_TYPES_H_
+#define _XE_EXEC_QUEUE_TYPES_H_
+
+#include <linux/kref.h>
+
+#include <drm/gpu_scheduler.h>
+
+#include "xe_gpu_scheduler_types.h"
+#include "xe_hw_engine_types.h"
+#include "xe_hw_fence_types.h"
+#include "xe_lrc_types.h"
+
+struct xe_execlist_exec_queue;
+struct xe_gt;
+struct xe_guc_exec_queue;
+struct xe_hw_engine;
+struct xe_vm;
+
+enum xe_exec_queue_priority {
+	XE_EXEC_QUEUE_PRIORITY_UNSET = -2, /* For execlist usage only */
+	XE_EXEC_QUEUE_PRIORITY_LOW = 0,
+	XE_EXEC_QUEUE_PRIORITY_NORMAL,
+	XE_EXEC_QUEUE_PRIORITY_HIGH,
+	XE_EXEC_QUEUE_PRIORITY_KERNEL,
+
+	XE_EXEC_QUEUE_PRIORITY_COUNT
+};
+
+/**
+ * struct xe_exec_queue - Execution queue
+ *
+ * Contains all state necessary for submissions. Can either be a user object or
+ * a kernel object.
+ */
+struct xe_exec_queue {
+	/** @gt: graphics tile this exec queue can submit to */
+	struct xe_gt *gt;
+	/**
+	 * @hwe: A hardware of the same class. May (physical engine) or may not
+	 * (virtual engine) be where jobs actual engine up running. Should never
+	 * really be used for submissions.
+	 */
+	struct xe_hw_engine *hwe;
+	/** @refcount: ref count of this exec queue */
+	struct kref refcount;
+	/** @vm: VM (address space) for this exec queue */
+	struct xe_vm *vm;
+	/** @class: class of this exec queue */
+	enum xe_engine_class class;
+	/**
+	 * @logical_mask: logical mask of where job submitted to exec queue can run
+	 */
+	u32 logical_mask;
+	/** @name: name of this exec queue */
+	char name[MAX_FENCE_NAME_LEN];
+	/** @width: width (number BB submitted per exec) of this exec queue */
+	u16 width;
+	/** @fence_irq: fence IRQ used to signal job completion */
+	struct xe_hw_fence_irq *fence_irq;
+
+	/**
+	 * @last_fence: last fence on exec queue, protected by vm->lock in write
+	 * mode if bind exec queue, protected by dma resv lock if non-bind exec
+	 * queue
+	 */
+	struct dma_fence *last_fence;
+
+/* queue no longer allowed to submit */
+#define EXEC_QUEUE_FLAG_BANNED			BIT(0)
+/* queue used for kernel submission only */
+#define EXEC_QUEUE_FLAG_KERNEL			BIT(1)
+/* kernel engine only destroyed at driver unload */
+#define EXEC_QUEUE_FLAG_PERMANENT		BIT(2)
+/* queue keeps running pending jobs after destroy ioctl */
+#define EXEC_QUEUE_FLAG_PERSISTENT		BIT(3)
+/* for VM jobs. Caller needs to hold rpm ref when creating queue with this flag */
+#define EXEC_QUEUE_FLAG_VM			BIT(4)
+/* child of VM queue for multi-tile VM jobs */
+#define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD	BIT(5)
+/* kernel exec_queue only, set priority to highest level */
+#define EXEC_QUEUE_FLAG_HIGH_PRIORITY		BIT(6)
+
+	/**
+	 * @flags: flags for this exec queue, should statically setup aside from ban
+	 * bit
+	 */
+	unsigned long flags;
+
+	union {
+		/** @multi_gt_list: list head for VM bind engines if multi-GT */
+		struct list_head multi_gt_list;
+		/** @multi_gt_link: link for VM bind engines if multi-GT */
+		struct list_head multi_gt_link;
+	};
+
+	union {
+		/** @execlist: execlist backend specific state for exec queue */
+		struct xe_execlist_exec_queue *execlist;
+		/** @guc: GuC backend specific state for exec queue */
+		struct xe_guc_exec_queue *guc;
+	};
+
+	union {
+		/**
+		 * @parallel: parallel submission state
+		 */
+		struct {
+			/** @composite_fence_ctx: context composite fence */
+			u64 composite_fence_ctx;
+			/** @composite_fence_seqno: seqno for composite fence */
+			u32 composite_fence_seqno;
+		} parallel;
+		/**
+		 * @bind: bind submission state
+		 */
+		struct {
+			/** @fence_ctx: context bind fence */
+			u64 fence_ctx;
+			/** @fence_seqno: seqno for bind fence */
+			u32 fence_seqno;
+		} bind;
+	};
+
+	/** @sched_props: scheduling properties */
+	struct {
+		/** @timeslice_us: timeslice period in micro-seconds */
+		u32 timeslice_us;
+		/** @preempt_timeout_us: preemption timeout in micro-seconds */
+		u32 preempt_timeout_us;
+		/** @priority: priority of this exec queue */
+		enum xe_exec_queue_priority priority;
+	} sched_props;
+
+	/** @compute: compute exec queue state */
+	struct {
+		/** @pfence: preemption fence */
+		struct dma_fence *pfence;
+		/** @context: preemption fence context */
+		u64 context;
+		/** @seqno: preemption fence seqno */
+		u32 seqno;
+		/** @link: link into VM's list of exec queues */
+		struct list_head link;
+		/** @lock: preemption fences lock */
+		spinlock_t lock;
+	} compute;
+
+	/** @ops: submission backend exec queue operations */
+	const struct xe_exec_queue_ops *ops;
+
+	/** @ring_ops: ring operations for this exec queue */
+	const struct xe_ring_ops *ring_ops;
+	/** @entity: DRM sched entity for this exec queue (1 to 1 relationship) */
+	struct drm_sched_entity *entity;
+	/** @lrc: logical ring context for this exec queue */
+	struct xe_lrc lrc[];
+};
+
+/**
+ * struct xe_exec_queue_ops - Submission backend exec queue operations
+ */
+struct xe_exec_queue_ops {
+	/** @init: Initialize exec queue for submission backend */
+	int (*init)(struct xe_exec_queue *q);
+	/** @kill: Kill inflight submissions for backend */
+	void (*kill)(struct xe_exec_queue *q);
+	/** @fini: Fini exec queue for submission backend */
+	void (*fini)(struct xe_exec_queue *q);
+	/** @set_priority: Set priority for exec queue */
+	int (*set_priority)(struct xe_exec_queue *q,
+			    enum xe_exec_queue_priority priority);
+	/** @set_timeslice: Set timeslice for exec queue */
+	int (*set_timeslice)(struct xe_exec_queue *q, u32 timeslice_us);
+	/** @set_preempt_timeout: Set preemption timeout for exec queue */
+	int (*set_preempt_timeout)(struct xe_exec_queue *q, u32 preempt_timeout_us);
+	/** @set_job_timeout: Set job timeout for exec queue */
+	int (*set_job_timeout)(struct xe_exec_queue *q, u32 job_timeout_ms);
+	/**
+	 * @suspend: Suspend exec queue from executing, allowed to be called
+	 * multiple times in a row before resume with the caveat that
+	 * suspend_wait returns before calling suspend again.
+	 */
+	int (*suspend)(struct xe_exec_queue *q);
+	/**
+	 * @suspend_wait: Wait for an exec queue to suspend executing, should be
+	 * call after suspend.
+	 */
+	void (*suspend_wait)(struct xe_exec_queue *q);
+	/**
+	 * @resume: Resume exec queue execution, exec queue must be in a suspended
+	 * state and dma fence returned from most recent suspend call must be
+	 * signalled when this function is called.
+	 */
+	void (*resume)(struct xe_exec_queue *q);
+	/** @reset_status: check exec queue reset status */
+	bool (*reset_status)(struct xe_exec_queue *q);
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
new file mode 100644
index 000000000000..acb4d9f38fd7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -0,0 +1,472 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_execlist.h"
+
+#include <drm/drm_managed.h>
+
+#include "instructions/xe_mi_commands.h"
+#include "regs/xe_engine_regs.h"
+#include "regs/xe_gpu_commands.h"
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_lrc_layout.h"
+#include "xe_assert.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_exec_queue.h"
+#include "xe_gt.h"
+#include "xe_hw_fence.h"
+#include "xe_lrc.h"
+#include "xe_macros.h"
+#include "xe_mmio.h"
+#include "xe_mocs.h"
+#include "xe_ring_ops_types.h"
+#include "xe_sched_job.h"
+
+#define XE_EXECLIST_HANG_LIMIT 1
+
+#define SW_CTX_ID_SHIFT 37
+#define SW_CTX_ID_WIDTH 11
+#define XEHP_SW_CTX_ID_SHIFT  39
+#define XEHP_SW_CTX_ID_WIDTH  16
+
+#define SW_CTX_ID \
+	GENMASK_ULL(SW_CTX_ID_WIDTH + SW_CTX_ID_SHIFT - 1, \
+		    SW_CTX_ID_SHIFT)
+
+#define XEHP_SW_CTX_ID \
+	GENMASK_ULL(XEHP_SW_CTX_ID_WIDTH + XEHP_SW_CTX_ID_SHIFT - 1, \
+		    XEHP_SW_CTX_ID_SHIFT)
+
+
+static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
+			u32 ctx_id)
+{
+	struct xe_gt *gt = hwe->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	u64 lrc_desc;
+
+	lrc_desc = xe_lrc_descriptor(lrc);
+
+	if (GRAPHICS_VERx100(xe) >= 1250) {
+		xe_gt_assert(hwe->gt, FIELD_FIT(XEHP_SW_CTX_ID, ctx_id));
+		lrc_desc |= FIELD_PREP(XEHP_SW_CTX_ID, ctx_id);
+	} else {
+		xe_gt_assert(hwe->gt, FIELD_FIT(SW_CTX_ID, ctx_id));
+		lrc_desc |= FIELD_PREP(SW_CTX_ID, ctx_id);
+	}
+
+	if (hwe->class == XE_ENGINE_CLASS_COMPUTE)
+		xe_mmio_write32(hwe->gt, RCU_MODE,
+				_MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
+
+	xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
+	lrc->ring.old_tail = lrc->ring.tail;
+
+	/*
+	 * Make sure the context image is complete before we submit it to HW.
+	 *
+	 * Ostensibly, writes (including the WCB) should be flushed prior to
+	 * an uncached write such as our mmio register access, the empirical
+	 * evidence (esp. on Braswell) suggests that the WC write into memory
+	 * may not be visible to the HW prior to the completion of the UC
+	 * register write and that we may begin execution from the context
+	 * before its image is complete leading to invalid PD chasing.
+	 */
+	wmb();
+
+	xe_mmio_write32(gt, RING_HWS_PGA(hwe->mmio_base),
+			xe_bo_ggtt_addr(hwe->hwsp));
+	xe_mmio_read32(gt, RING_HWS_PGA(hwe->mmio_base));
+	xe_mmio_write32(gt, RING_MODE(hwe->mmio_base),
+			_MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
+
+	xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base),
+			lower_32_bits(lrc_desc));
+	xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_HI(hwe->mmio_base),
+			upper_32_bits(lrc_desc));
+	xe_mmio_write32(gt, RING_EXECLIST_CONTROL(hwe->mmio_base),
+			EL_CTRL_LOAD);
+}
+
+static void __xe_execlist_port_start(struct xe_execlist_port *port,
+				     struct xe_execlist_exec_queue *exl)
+{
+	struct xe_device *xe = gt_to_xe(port->hwe->gt);
+	int max_ctx = FIELD_MAX(SW_CTX_ID);
+
+	if (GRAPHICS_VERx100(xe) >= 1250)
+		max_ctx = FIELD_MAX(XEHP_SW_CTX_ID);
+
+	xe_execlist_port_assert_held(port);
+
+	if (port->running_exl != exl || !exl->has_run) {
+		port->last_ctx_id++;
+
+		/* 0 is reserved for the kernel context */
+		if (port->last_ctx_id > max_ctx)
+			port->last_ctx_id = 1;
+	}
+
+	__start_lrc(port->hwe, exl->q->lrc, port->last_ctx_id);
+	port->running_exl = exl;
+	exl->has_run = true;
+}
+
+static void __xe_execlist_port_idle(struct xe_execlist_port *port)
+{
+	u32 noop[2] = { MI_NOOP, MI_NOOP };
+
+	xe_execlist_port_assert_held(port);
+
+	if (!port->running_exl)
+		return;
+
+	xe_lrc_write_ring(&port->hwe->kernel_lrc, noop, sizeof(noop));
+	__start_lrc(port->hwe, &port->hwe->kernel_lrc, 0);
+	port->running_exl = NULL;
+}
+
+static bool xe_execlist_is_idle(struct xe_execlist_exec_queue *exl)
+{
+	struct xe_lrc *lrc = exl->q->lrc;
+
+	return lrc->ring.tail == lrc->ring.old_tail;
+}
+
+static void __xe_execlist_port_start_next_active(struct xe_execlist_port *port)
+{
+	struct xe_execlist_exec_queue *exl = NULL;
+	int i;
+
+	xe_execlist_port_assert_held(port);
+
+	for (i = ARRAY_SIZE(port->active) - 1; i >= 0; i--) {
+		while (!list_empty(&port->active[i])) {
+			exl = list_first_entry(&port->active[i],
+					       struct xe_execlist_exec_queue,
+					       active_link);
+			list_del(&exl->active_link);
+
+			if (xe_execlist_is_idle(exl)) {
+				exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
+				continue;
+			}
+
+			list_add_tail(&exl->active_link, &port->active[i]);
+			__xe_execlist_port_start(port, exl);
+			return;
+		}
+	}
+
+	__xe_execlist_port_idle(port);
+}
+
+static u64 read_execlist_status(struct xe_hw_engine *hwe)
+{
+	struct xe_gt *gt = hwe->gt;
+	u32 hi, lo;
+
+	lo = xe_mmio_read32(gt, RING_EXECLIST_STATUS_LO(hwe->mmio_base));
+	hi = xe_mmio_read32(gt, RING_EXECLIST_STATUS_HI(hwe->mmio_base));
+
+	return lo | (u64)hi << 32;
+}
+
+static void xe_execlist_port_irq_handler_locked(struct xe_execlist_port *port)
+{
+	u64 status;
+
+	xe_execlist_port_assert_held(port);
+
+	status = read_execlist_status(port->hwe);
+	if (status & BIT(7))
+		return;
+
+	__xe_execlist_port_start_next_active(port);
+}
+
+static void xe_execlist_port_irq_handler(struct xe_hw_engine *hwe,
+					 u16 intr_vec)
+{
+	struct xe_execlist_port *port = hwe->exl_port;
+
+	spin_lock(&port->lock);
+	xe_execlist_port_irq_handler_locked(port);
+	spin_unlock(&port->lock);
+}
+
+static void xe_execlist_port_wake_locked(struct xe_execlist_port *port,
+					 enum xe_exec_queue_priority priority)
+{
+	xe_execlist_port_assert_held(port);
+
+	if (port->running_exl && port->running_exl->active_priority >= priority)
+		return;
+
+	__xe_execlist_port_start_next_active(port);
+}
+
+static void xe_execlist_make_active(struct xe_execlist_exec_queue *exl)
+{
+	struct xe_execlist_port *port = exl->port;
+	enum xe_exec_queue_priority priority = exl->q->sched_props.priority;
+
+	XE_WARN_ON(priority == XE_EXEC_QUEUE_PRIORITY_UNSET);
+	XE_WARN_ON(priority < 0);
+	XE_WARN_ON(priority >= ARRAY_SIZE(exl->port->active));
+
+	spin_lock_irq(&port->lock);
+
+	if (exl->active_priority != priority &&
+	    exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET) {
+		/* Priority changed, move it to the right list */
+		list_del(&exl->active_link);
+		exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
+	}
+
+	if (exl->active_priority == XE_EXEC_QUEUE_PRIORITY_UNSET) {
+		exl->active_priority = priority;
+		list_add_tail(&exl->active_link, &port->active[priority]);
+	}
+
+	xe_execlist_port_wake_locked(exl->port, priority);
+
+	spin_unlock_irq(&port->lock);
+}
+
+static void xe_execlist_port_irq_fail_timer(struct timer_list *timer)
+{
+	struct xe_execlist_port *port =
+		container_of(timer, struct xe_execlist_port, irq_fail);
+
+	spin_lock_irq(&port->lock);
+	xe_execlist_port_irq_handler_locked(port);
+	spin_unlock_irq(&port->lock);
+
+	port->irq_fail.expires = jiffies + msecs_to_jiffies(1000);
+	add_timer(&port->irq_fail);
+}
+
+struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe,
+						 struct xe_hw_engine *hwe)
+{
+	struct drm_device *drm = &xe->drm;
+	struct xe_execlist_port *port;
+	int i;
+
+	port = drmm_kzalloc(drm, sizeof(*port), GFP_KERNEL);
+	if (!port)
+		return ERR_PTR(-ENOMEM);
+
+	port->hwe = hwe;
+
+	spin_lock_init(&port->lock);
+	for (i = 0; i < ARRAY_SIZE(port->active); i++)
+		INIT_LIST_HEAD(&port->active[i]);
+
+	port->last_ctx_id = 1;
+	port->running_exl = NULL;
+
+	hwe->irq_handler = xe_execlist_port_irq_handler;
+
+	/* TODO: Fix the interrupt code so it doesn't race like mad */
+	timer_setup(&port->irq_fail, xe_execlist_port_irq_fail_timer, 0);
+	port->irq_fail.expires = jiffies + msecs_to_jiffies(1000);
+	add_timer(&port->irq_fail);
+
+	return port;
+}
+
+void xe_execlist_port_destroy(struct xe_execlist_port *port)
+{
+	del_timer(&port->irq_fail);
+
+	/* Prevent an interrupt while we're destroying */
+	spin_lock_irq(&gt_to_xe(port->hwe->gt)->irq.lock);
+	port->hwe->irq_handler = NULL;
+	spin_unlock_irq(&gt_to_xe(port->hwe->gt)->irq.lock);
+}
+
+static struct dma_fence *
+execlist_run_job(struct drm_sched_job *drm_job)
+{
+	struct xe_sched_job *job = to_xe_sched_job(drm_job);
+	struct xe_exec_queue *q = job->q;
+	struct xe_execlist_exec_queue *exl = job->q->execlist;
+
+	q->ring_ops->emit_job(job);
+	xe_execlist_make_active(exl);
+
+	return dma_fence_get(job->fence);
+}
+
+static void execlist_job_free(struct drm_sched_job *drm_job)
+{
+	struct xe_sched_job *job = to_xe_sched_job(drm_job);
+
+	xe_sched_job_put(job);
+}
+
+static const struct drm_sched_backend_ops drm_sched_ops = {
+	.run_job = execlist_run_job,
+	.free_job = execlist_job_free,
+};
+
+static int execlist_exec_queue_init(struct xe_exec_queue *q)
+{
+	struct drm_gpu_scheduler *sched;
+	struct xe_execlist_exec_queue *exl;
+	struct xe_device *xe = gt_to_xe(q->gt);
+	int err;
+
+	xe_assert(xe, !xe_device_uc_enabled(xe));
+
+	drm_info(&xe->drm, "Enabling execlist submission (GuC submission disabled)\n");
+
+	exl = kzalloc(sizeof(*exl), GFP_KERNEL);
+	if (!exl)
+		return -ENOMEM;
+
+	exl->q = q;
+
+	err = drm_sched_init(&exl->sched, &drm_sched_ops, NULL, 1,
+			     q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES,
+			     XE_SCHED_HANG_LIMIT, XE_SCHED_JOB_TIMEOUT,
+			     NULL, NULL, q->hwe->name,
+			     gt_to_xe(q->gt)->drm.dev);
+	if (err)
+		goto err_free;
+
+	sched = &exl->sched;
+	err = drm_sched_entity_init(&exl->entity, 0, &sched, 1, NULL);
+	if (err)
+		goto err_sched;
+
+	exl->port = q->hwe->exl_port;
+	exl->has_run = false;
+	exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
+	q->execlist = exl;
+	q->entity = &exl->entity;
+
+	xe_exec_queue_assign_name(q, ffs(q->logical_mask) - 1);
+
+	return 0;
+
+err_sched:
+	drm_sched_fini(&exl->sched);
+err_free:
+	kfree(exl);
+	return err;
+}
+
+static void execlist_exec_queue_fini_async(struct work_struct *w)
+{
+	struct xe_execlist_exec_queue *ee =
+		container_of(w, struct xe_execlist_exec_queue, fini_async);
+	struct xe_exec_queue *q = ee->q;
+	struct xe_execlist_exec_queue *exl = q->execlist;
+	struct xe_device *xe = gt_to_xe(q->gt);
+	unsigned long flags;
+
+	xe_assert(xe, !xe_device_uc_enabled(xe));
+
+	spin_lock_irqsave(&exl->port->lock, flags);
+	if (WARN_ON(exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET))
+		list_del(&exl->active_link);
+	spin_unlock_irqrestore(&exl->port->lock, flags);
+
+	drm_sched_entity_fini(&exl->entity);
+	drm_sched_fini(&exl->sched);
+	kfree(exl);
+
+	xe_exec_queue_fini(q);
+}
+
+static void execlist_exec_queue_kill(struct xe_exec_queue *q)
+{
+	/* NIY */
+}
+
+static void execlist_exec_queue_fini(struct xe_exec_queue *q)
+{
+	INIT_WORK(&q->execlist->fini_async, execlist_exec_queue_fini_async);
+	queue_work(system_unbound_wq, &q->execlist->fini_async);
+}
+
+static int execlist_exec_queue_set_priority(struct xe_exec_queue *q,
+					    enum xe_exec_queue_priority priority)
+{
+	/* NIY */
+	return 0;
+}
+
+static int execlist_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us)
+{
+	/* NIY */
+	return 0;
+}
+
+static int execlist_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
+						   u32 preempt_timeout_us)
+{
+	/* NIY */
+	return 0;
+}
+
+static int execlist_exec_queue_set_job_timeout(struct xe_exec_queue *q,
+					       u32 job_timeout_ms)
+{
+	/* NIY */
+	return 0;
+}
+
+static int execlist_exec_queue_suspend(struct xe_exec_queue *q)
+{
+	/* NIY */
+	return 0;
+}
+
+static void execlist_exec_queue_suspend_wait(struct xe_exec_queue *q)
+
+{
+	/* NIY */
+}
+
+static void execlist_exec_queue_resume(struct xe_exec_queue *q)
+{
+	/* NIY */
+}
+
+static bool execlist_exec_queue_reset_status(struct xe_exec_queue *q)
+{
+	/* NIY */
+	return false;
+}
+
+static const struct xe_exec_queue_ops execlist_exec_queue_ops = {
+	.init = execlist_exec_queue_init,
+	.kill = execlist_exec_queue_kill,
+	.fini = execlist_exec_queue_fini,
+	.set_priority = execlist_exec_queue_set_priority,
+	.set_timeslice = execlist_exec_queue_set_timeslice,
+	.set_preempt_timeout = execlist_exec_queue_set_preempt_timeout,
+	.set_job_timeout = execlist_exec_queue_set_job_timeout,
+	.suspend = execlist_exec_queue_suspend,
+	.suspend_wait = execlist_exec_queue_suspend_wait,
+	.resume = execlist_exec_queue_resume,
+	.reset_status = execlist_exec_queue_reset_status,
+};
+
+int xe_execlist_init(struct xe_gt *gt)
+{
+	/* GuC submission enabled, nothing to do */
+	if (xe_device_uc_enabled(gt_to_xe(gt)))
+		return 0;
+
+	gt->exec_queue_ops = &execlist_exec_queue_ops;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_execlist.h b/drivers/gpu/drm/xe/xe_execlist.h
new file mode 100644
index 000000000000..26f600ac8552
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_execlist.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_EXECLIST_H_
+#define _XE_EXECLIST_H_
+
+#include "xe_execlist_types.h"
+
+struct xe_device;
+struct xe_gt;
+
+#define xe_execlist_port_assert_held(port) lockdep_assert_held(&(port)->lock)
+
+int xe_execlist_init(struct xe_gt *gt);
+struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe,
+						 struct xe_hw_engine *hwe);
+void xe_execlist_port_destroy(struct xe_execlist_port *port);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_execlist_types.h b/drivers/gpu/drm/xe/xe_execlist_types.h
new file mode 100644
index 000000000000..f94bbf4c53e4
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_execlist_types.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_EXECLIST_TYPES_H_
+#define _XE_EXECLIST_TYPES_H_
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+
+#include "xe_exec_queue_types.h"
+
+struct xe_hw_engine;
+struct xe_execlist_exec_queue;
+
+struct xe_execlist_port {
+	struct xe_hw_engine *hwe;
+
+	spinlock_t lock;
+
+	struct list_head active[XE_EXEC_QUEUE_PRIORITY_COUNT];
+
+	u32 last_ctx_id;
+
+	struct xe_execlist_exec_queue *running_exl;
+
+	struct timer_list irq_fail;
+};
+
+struct xe_execlist_exec_queue {
+	struct xe_exec_queue *q;
+
+	struct drm_gpu_scheduler sched;
+
+	struct drm_sched_entity entity;
+
+	struct xe_execlist_port *port;
+
+	bool has_run;
+
+	struct work_struct fini_async;
+
+	enum xe_exec_queue_priority active_priority;
+	struct list_head active_link;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c
new file mode 100644
index 000000000000..9bbe8a5040da
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_force_wake.c
@@ -0,0 +1,199 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_force_wake.h"
+
+#include <drm/drm_util.h>
+
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_reg_defs.h"
+#include "xe_gt.h"
+#include "xe_mmio.h"
+
+#define XE_FORCE_WAKE_ACK_TIMEOUT_MS	50
+
+static struct xe_gt *
+fw_to_gt(struct xe_force_wake *fw)
+{
+	return fw->gt;
+}
+
+static struct xe_device *
+fw_to_xe(struct xe_force_wake *fw)
+{
+	return gt_to_xe(fw_to_gt(fw));
+}
+
+static void domain_init(struct xe_force_wake_domain *domain,
+			enum xe_force_wake_domain_id id,
+			struct xe_reg reg, struct xe_reg ack, u32 val, u32 mask)
+{
+	domain->id = id;
+	domain->reg_ctl = reg;
+	domain->reg_ack = ack;
+	domain->val = val;
+	domain->mask = mask;
+}
+
+void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	fw->gt = gt;
+	spin_lock_init(&fw->lock);
+
+	/* Assuming gen11+ so assert this assumption is correct */
+	xe_gt_assert(gt, GRAPHICS_VER(gt_to_xe(gt)) >= 11);
+
+	if (xe->info.graphics_verx100 >= 1270) {
+		domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT],
+			    XE_FW_DOMAIN_ID_GT,
+			    FORCEWAKE_GT,
+			    FORCEWAKE_ACK_GT_MTL,
+			    BIT(0), BIT(16));
+	} else {
+		domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT],
+			    XE_FW_DOMAIN_ID_GT,
+			    FORCEWAKE_GT,
+			    FORCEWAKE_ACK_GT,
+			    BIT(0), BIT(16));
+	}
+}
+
+void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw)
+{
+	int i, j;
+
+	/* Assuming gen11+ so assert this assumption is correct */
+	xe_gt_assert(gt, GRAPHICS_VER(gt_to_xe(gt)) >= 11);
+
+	if (!xe_gt_is_media_type(gt))
+		domain_init(&fw->domains[XE_FW_DOMAIN_ID_RENDER],
+			    XE_FW_DOMAIN_ID_RENDER,
+			    FORCEWAKE_RENDER,
+			    FORCEWAKE_ACK_RENDER,
+			    BIT(0), BIT(16));
+
+	for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
+		if (!(gt->info.engine_mask & BIT(i)))
+			continue;
+
+		domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j],
+			    XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j,
+			    FORCEWAKE_MEDIA_VDBOX(j),
+			    FORCEWAKE_ACK_MEDIA_VDBOX(j),
+			    BIT(0), BIT(16));
+	}
+
+	for (i = XE_HW_ENGINE_VECS0, j = 0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) {
+		if (!(gt->info.engine_mask & BIT(i)))
+			continue;
+
+		domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j],
+			    XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j,
+			    FORCEWAKE_MEDIA_VEBOX(j),
+			    FORCEWAKE_ACK_MEDIA_VEBOX(j),
+			    BIT(0), BIT(16));
+	}
+
+	if (gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0))
+		domain_init(&fw->domains[XE_FW_DOMAIN_ID_GSC],
+			    XE_FW_DOMAIN_ID_GSC,
+			    FORCEWAKE_GSC,
+			    FORCEWAKE_ACK_GSC,
+			    BIT(0), BIT(16));
+}
+
+static void domain_wake(struct xe_gt *gt, struct xe_force_wake_domain *domain)
+{
+	xe_mmio_write32(gt, domain->reg_ctl, domain->mask | domain->val);
+}
+
+static int domain_wake_wait(struct xe_gt *gt,
+			    struct xe_force_wake_domain *domain)
+{
+	return xe_mmio_wait32(gt, domain->reg_ack, domain->val, domain->val,
+			      XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC,
+			      NULL, true);
+}
+
+static void domain_sleep(struct xe_gt *gt, struct xe_force_wake_domain *domain)
+{
+	xe_mmio_write32(gt, domain->reg_ctl, domain->mask);
+}
+
+static int domain_sleep_wait(struct xe_gt *gt,
+			     struct xe_force_wake_domain *domain)
+{
+	return xe_mmio_wait32(gt, domain->reg_ack, domain->val, 0,
+			      XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC,
+			      NULL, true);
+}
+
+#define for_each_fw_domain_masked(domain__, mask__, fw__, tmp__) \
+	for (tmp__ = (mask__); tmp__; tmp__ &= ~BIT(ffs(tmp__) - 1)) \
+		for_each_if((domain__ = ((fw__)->domains + \
+					 (ffs(tmp__) - 1))) && \
+					 domain__->reg_ctl.addr)
+
+int xe_force_wake_get(struct xe_force_wake *fw,
+		      enum xe_force_wake_domains domains)
+{
+	struct xe_device *xe = fw_to_xe(fw);
+	struct xe_gt *gt = fw_to_gt(fw);
+	struct xe_force_wake_domain *domain;
+	enum xe_force_wake_domains tmp, woken = 0;
+	unsigned long flags;
+	int ret, ret2 = 0;
+
+	spin_lock_irqsave(&fw->lock, flags);
+	for_each_fw_domain_masked(domain, domains, fw, tmp) {
+		if (!domain->ref++) {
+			woken |= BIT(domain->id);
+			domain_wake(gt, domain);
+		}
+	}
+	for_each_fw_domain_masked(domain, woken, fw, tmp) {
+		ret = domain_wake_wait(gt, domain);
+		ret2 |= ret;
+		if (ret)
+			drm_notice(&xe->drm, "Force wake domain (%d) failed to ack wake, ret=%d\n",
+				   domain->id, ret);
+	}
+	fw->awake_domains |= woken;
+	spin_unlock_irqrestore(&fw->lock, flags);
+
+	return ret2;
+}
+
+int xe_force_wake_put(struct xe_force_wake *fw,
+		      enum xe_force_wake_domains domains)
+{
+	struct xe_device *xe = fw_to_xe(fw);
+	struct xe_gt *gt = fw_to_gt(fw);
+	struct xe_force_wake_domain *domain;
+	enum xe_force_wake_domains tmp, sleep = 0;
+	unsigned long flags;
+	int ret, ret2 = 0;
+
+	spin_lock_irqsave(&fw->lock, flags);
+	for_each_fw_domain_masked(domain, domains, fw, tmp) {
+		if (!--domain->ref) {
+			sleep |= BIT(domain->id);
+			domain_sleep(gt, domain);
+		}
+	}
+	for_each_fw_domain_masked(domain, sleep, fw, tmp) {
+		ret = domain_sleep_wait(gt, domain);
+		ret2 |= ret;
+		if (ret)
+			drm_notice(&xe->drm, "Force wake domain (%d) failed to ack sleep, ret=%d\n",
+				   domain->id, ret);
+	}
+	fw->awake_domains &= ~sleep;
+	spin_unlock_irqrestore(&fw->lock, flags);
+
+	return ret2;
+}
diff --git a/drivers/gpu/drm/xe/xe_force_wake.h b/drivers/gpu/drm/xe/xe_force_wake.h
new file mode 100644
index 000000000000..83cb157da7cc
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_force_wake.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_FORCE_WAKE_H_
+#define _XE_FORCE_WAKE_H_
+
+#include "xe_assert.h"
+#include "xe_force_wake_types.h"
+
+struct xe_gt;
+
+void xe_force_wake_init_gt(struct xe_gt *gt,
+			   struct xe_force_wake *fw);
+void xe_force_wake_init_engines(struct xe_gt *gt,
+				struct xe_force_wake *fw);
+int xe_force_wake_get(struct xe_force_wake *fw,
+		      enum xe_force_wake_domains domains);
+int xe_force_wake_put(struct xe_force_wake *fw,
+		      enum xe_force_wake_domains domains);
+
+static inline int
+xe_force_wake_ref(struct xe_force_wake *fw,
+		  enum xe_force_wake_domains domain)
+{
+	xe_gt_assert(fw->gt, domain);
+	return fw->domains[ffs(domain) - 1].ref;
+}
+
+static inline void
+xe_force_wake_assert_held(struct xe_force_wake *fw,
+			  enum xe_force_wake_domains domain)
+{
+	xe_gt_assert(fw->gt, fw->awake_domains & domain);
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_force_wake_types.h b/drivers/gpu/drm/xe/xe_force_wake_types.h
new file mode 100644
index 000000000000..ed0edc2cdf9f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_force_wake_types.h
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_FORCE_WAKE_TYPES_H_
+#define _XE_FORCE_WAKE_TYPES_H_
+
+#include <linux/mutex.h>
+#include <linux/types.h>
+
+#include "regs/xe_reg_defs.h"
+
+enum xe_force_wake_domain_id {
+	XE_FW_DOMAIN_ID_GT = 0,
+	XE_FW_DOMAIN_ID_RENDER,
+	XE_FW_DOMAIN_ID_MEDIA,
+	XE_FW_DOMAIN_ID_MEDIA_VDBOX0,
+	XE_FW_DOMAIN_ID_MEDIA_VDBOX1,
+	XE_FW_DOMAIN_ID_MEDIA_VDBOX2,
+	XE_FW_DOMAIN_ID_MEDIA_VDBOX3,
+	XE_FW_DOMAIN_ID_MEDIA_VDBOX4,
+	XE_FW_DOMAIN_ID_MEDIA_VDBOX5,
+	XE_FW_DOMAIN_ID_MEDIA_VDBOX6,
+	XE_FW_DOMAIN_ID_MEDIA_VDBOX7,
+	XE_FW_DOMAIN_ID_MEDIA_VEBOX0,
+	XE_FW_DOMAIN_ID_MEDIA_VEBOX1,
+	XE_FW_DOMAIN_ID_MEDIA_VEBOX2,
+	XE_FW_DOMAIN_ID_MEDIA_VEBOX3,
+	XE_FW_DOMAIN_ID_GSC,
+	XE_FW_DOMAIN_ID_COUNT
+};
+
+enum xe_force_wake_domains {
+	XE_FW_GT		= BIT(XE_FW_DOMAIN_ID_GT),
+	XE_FW_RENDER		= BIT(XE_FW_DOMAIN_ID_RENDER),
+	XE_FW_MEDIA		= BIT(XE_FW_DOMAIN_ID_MEDIA),
+	XE_FW_MEDIA_VDBOX0	= BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX0),
+	XE_FW_MEDIA_VDBOX1	= BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX1),
+	XE_FW_MEDIA_VDBOX2	= BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX2),
+	XE_FW_MEDIA_VDBOX3	= BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX3),
+	XE_FW_MEDIA_VDBOX4	= BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX4),
+	XE_FW_MEDIA_VDBOX5	= BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX5),
+	XE_FW_MEDIA_VDBOX6	= BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX6),
+	XE_FW_MEDIA_VDBOX7	= BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX7),
+	XE_FW_MEDIA_VEBOX0	= BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX0),
+	XE_FW_MEDIA_VEBOX1	= BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX1),
+	XE_FW_MEDIA_VEBOX2	= BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX2),
+	XE_FW_MEDIA_VEBOX3	= BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX3),
+	XE_FW_GSC		= BIT(XE_FW_DOMAIN_ID_GSC),
+	XE_FORCEWAKE_ALL	= BIT(XE_FW_DOMAIN_ID_COUNT) - 1
+};
+
+/**
+ * struct xe_force_wake_domain - XE force wake domains
+ */
+struct xe_force_wake_domain {
+	/** @id: domain force wake id */
+	enum xe_force_wake_domain_id id;
+	/** @reg_ctl: domain wake control register address */
+	struct xe_reg reg_ctl;
+	/** @reg_ack: domain ack register address */
+	struct xe_reg reg_ack;
+	/** @val: domain wake write value */
+	u32 val;
+	/** @mask: domain mask */
+	u32 mask;
+	/** @ref: domain reference */
+	u32 ref;
+};
+
+/**
+ * struct xe_force_wake - XE force wake
+ */
+struct xe_force_wake {
+	/** @gt: back pointers to GT */
+	struct xe_gt *gt;
+	/** @lock: protects everything force wake struct */
+	spinlock_t lock;
+	/** @awake_domains: mask of all domains awake */
+	enum xe_force_wake_domains awake_domains;
+	/** @domains: force wake domains */
+	struct xe_force_wake_domain domains[XE_FW_DOMAIN_ID_COUNT];
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gen_wa_oob.c b/drivers/gpu/drm/xe/xe_gen_wa_oob.c
new file mode 100644
index 000000000000..106ee2b027f0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gen_wa_oob.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#define _GNU_SOURCE
+#include <ctype.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+
+#define HEADER \
+	"// SPDX-License-Identifier: MIT\n" \
+	"\n" \
+	"/*\n" \
+	" * DO NOT MODIFY.\n" \
+	" *\n" \
+	" * This file was generated from rules: %s\n" \
+	" */\n" \
+	"#ifndef _GENERATED_XE_WA_OOB_\n" \
+	"#define _GENERATED_XE_WA_OOB_\n" \
+	"\n" \
+	"enum {\n"
+
+#define FOOTER \
+	"};\n" \
+	"\n" \
+	"#endif\n"
+
+static void print_usage(FILE *f)
+{
+	fprintf(f, "usage: %s <input-rule-file> <generated-c-source-file> <generated-c-header-file>\n",
+		program_invocation_short_name);
+}
+
+static void print_parse_error(const char *err_msg, const char *line,
+			      unsigned int lineno)
+{
+	fprintf(stderr, "ERROR: %s\nERROR: %u: %.60s\n",
+		err_msg, lineno, line);
+}
+
+static char *strip(char *line, size_t linelen)
+{
+	while (isspace(*(line + linelen)))
+		linelen--;
+
+	line[linelen - 1] = '\0';
+
+	return  line + strspn(line, " \f\n\r\t\v");
+}
+
+#define MAX_LINE_LEN 4096
+static int parse(FILE *input, FILE *csource, FILE *cheader)
+{
+	char line[MAX_LINE_LEN + 1];
+	char *name, *prev_name = NULL, *rules;
+	unsigned int lineno = 0, idx = 0;
+
+	while (fgets(line, sizeof(line), input)) {
+		size_t linelen;
+		bool is_continuation;
+
+		if (line[0] == '\0' || line[0] == '#' || line[0] == '\n') {
+			lineno++;
+			continue;
+		}
+
+		linelen = strlen(line);
+		if (linelen == MAX_LINE_LEN) {
+			print_parse_error("line too long", line, lineno);
+			return -EINVAL;
+		}
+
+		is_continuation = isspace(line[0]);
+		name = strip(line, linelen);
+
+		if (!is_continuation) {
+			name = strtok(name, " \t");
+			rules = strtok(NULL, "");
+		} else {
+			if (!prev_name) {
+				print_parse_error("invalid rule continuation",
+						  line, lineno);
+				return -EINVAL;
+			}
+
+			rules = name;
+			name = NULL;
+		}
+
+		if (rules[0] == '\0') {
+			print_parse_error("invalid empty rule\n", line, lineno);
+			return -EINVAL;
+		}
+
+		if (name) {
+			fprintf(cheader, "\tXE_WA_OOB_%s = %u,\n", name, idx);
+			fprintf(csource, "{ XE_RTP_NAME(\"%s\"), XE_RTP_RULES(%s) },\n",
+				name, rules);
+		} else {
+			fprintf(csource, "{ XE_RTP_NAME(NULL), XE_RTP_RULES(%s) },\n",
+				rules);
+		}
+
+		idx++;
+		lineno++;
+		if (!is_continuation)
+			prev_name = name;
+	}
+
+	fprintf(cheader, "\t_XE_WA_OOB_COUNT = %u\n", idx);
+
+	return 0;
+}
+
+int main(int argc, const char *argv[])
+{
+	enum {
+		ARGS_INPUT,
+		ARGS_CSOURCE,
+		ARGS_CHEADER,
+		_ARGS_COUNT
+	};
+	struct {
+		const char *fn;
+		const char *mode;
+		FILE *f;
+	} args[] = {
+		[ARGS_INPUT] = { .fn = argv[1], .mode = "r" },
+		[ARGS_CSOURCE] = { .fn = argv[2], .mode = "w" },
+		[ARGS_CHEADER] = { .fn = argv[3], .mode = "w" },
+	};
+	int ret = 1;
+
+	if (argc < 3) {
+		fprintf(stderr, "ERROR: wrong arguments\n");
+		print_usage(stderr);
+		return 1;
+	}
+
+	for (int i = 0; i < _ARGS_COUNT; i++) {
+		args[i].f = fopen(args[i].fn, args[i].mode);
+		if (!args[i].f) {
+			fprintf(stderr, "ERROR: Can't open %s: %m\n",
+				args[i].fn);
+			goto err;
+		}
+	}
+
+	fprintf(args[ARGS_CHEADER].f, HEADER, args[ARGS_INPUT].fn);
+	ret = parse(args[ARGS_INPUT].f, args[ARGS_CSOURCE].f,
+		    args[ARGS_CHEADER].f);
+	if (!ret)
+		fprintf(args[ARGS_CHEADER].f, FOOTER);
+
+err:
+	for (int i = 0; i < _ARGS_COUNT; i++) {
+		if (args[i].f)
+			fclose(args[i].f);
+	}
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
new file mode 100644
index 000000000000..3efd2d066bf7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -0,0 +1,428 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_ggtt.h"
+
+#include <linux/sizes.h>
+
+#include <drm/drm_managed.h>
+#include <drm/i915_drm.h>
+
+#include "regs/xe_gt_regs.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_tlb_invalidation.h"
+#include "xe_map.h"
+#include "xe_mmio.h"
+#include "xe_wopcm.h"
+
+#define XELPG_GGTT_PTE_PAT0	BIT_ULL(52)
+#define XELPG_GGTT_PTE_PAT1	BIT_ULL(53)
+
+/* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */
+#define GUC_GGTT_TOP	0xFEE00000
+
+static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
+				   u16 pat_index)
+{
+	u64 pte;
+
+	pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
+	pte |= XE_PAGE_PRESENT;
+
+	if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
+		pte |= XE_GGTT_PTE_DM;
+
+	return pte;
+}
+
+static u64 xelpg_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
+				    u16 pat_index)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+	u64 pte;
+
+	pte = xelp_ggtt_pte_encode_bo(bo, bo_offset, pat_index);
+
+	xe_assert(xe, pat_index <= 3);
+
+	if (pat_index & BIT(0))
+		pte |= XELPG_GGTT_PTE_PAT0;
+
+	if (pat_index & BIT(1))
+		pte |= XELPG_GGTT_PTE_PAT1;
+
+	return pte;
+}
+
+static unsigned int probe_gsm_size(struct pci_dev *pdev)
+{
+	u16 gmch_ctl, ggms;
+
+	pci_read_config_word(pdev, SNB_GMCH_CTRL, &gmch_ctl);
+	ggms = (gmch_ctl >> BDW_GMCH_GGMS_SHIFT) & BDW_GMCH_GGMS_MASK;
+	return ggms ? SZ_1M << ggms : 0;
+}
+
+void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte)
+{
+	xe_tile_assert(ggtt->tile, !(addr & XE_PTE_MASK));
+	xe_tile_assert(ggtt->tile, addr < ggtt->size);
+
+	writeq(pte, &ggtt->gsm[addr >> XE_PTE_SHIFT]);
+}
+
+static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size)
+{
+	u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB];
+	u64 end = start + size - 1;
+	u64 scratch_pte;
+
+	xe_tile_assert(ggtt->tile, start < end);
+
+	if (ggtt->scratch)
+		scratch_pte = ggtt->pt_ops->pte_encode_bo(ggtt->scratch, 0,
+							  pat_index);
+	else
+		scratch_pte = 0;
+
+	while (start < end) {
+		xe_ggtt_set_pte(ggtt, start, scratch_pte);
+		start += XE_PAGE_SIZE;
+	}
+}
+
+static void ggtt_fini_early(struct drm_device *drm, void *arg)
+{
+	struct xe_ggtt *ggtt = arg;
+
+	mutex_destroy(&ggtt->lock);
+	drm_mm_takedown(&ggtt->mm);
+}
+
+static void ggtt_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_ggtt *ggtt = arg;
+
+	ggtt->scratch = NULL;
+}
+
+static void primelockdep(struct xe_ggtt *ggtt)
+{
+	if (!IS_ENABLED(CONFIG_LOCKDEP))
+		return;
+
+	fs_reclaim_acquire(GFP_KERNEL);
+	might_lock(&ggtt->lock);
+	fs_reclaim_release(GFP_KERNEL);
+}
+
+static const struct xe_ggtt_pt_ops xelp_pt_ops = {
+	.pte_encode_bo = xelp_ggtt_pte_encode_bo,
+};
+
+static const struct xe_ggtt_pt_ops xelpg_pt_ops = {
+	.pte_encode_bo = xelpg_ggtt_pte_encode_bo,
+};
+
+/*
+ * Early GGTT initialization, which allows to create new mappings usable by the
+ * GuC.
+ * Mappings are not usable by the HW engines, as it doesn't have scratch /
+ * initial clear done to it yet. That will happen in the regular, non-early
+ * GGTT init.
+ */
+int xe_ggtt_init_early(struct xe_ggtt *ggtt)
+{
+	struct xe_device *xe = tile_to_xe(ggtt->tile);
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	unsigned int gsm_size;
+
+	gsm_size = probe_gsm_size(pdev);
+	if (gsm_size == 0) {
+		drm_err(&xe->drm, "Hardware reported no preallocated GSM\n");
+		return -ENOMEM;
+	}
+
+	ggtt->gsm = ggtt->tile->mmio.regs + SZ_8M;
+	ggtt->size = (gsm_size / 8) * (u64) XE_PAGE_SIZE;
+
+	if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
+		ggtt->flags |= XE_GGTT_FLAGS_64K;
+
+	/*
+	 * 8B per entry, each points to a 4KB page.
+	 *
+	 * The GuC address space is limited on both ends of the GGTT, because
+	 * the GuC shim HW redirects accesses to those addresses to other HW
+	 * areas instead of going through the GGTT. On the bottom end, the GuC
+	 * can't access offsets below the WOPCM size, while on the top side the
+	 * limit is fixed at GUC_GGTT_TOP. To keep things simple, instead of
+	 * checking each object to see if they are accessed by GuC or not, we
+	 * just exclude those areas from the allocator. Additionally, to
+	 * simplify the driver load, we use the maximum WOPCM size in this logic
+	 * instead of the programmed one, so we don't need to wait until the
+	 * actual size to be programmed is determined (which requires FW fetch)
+	 * before initializing the GGTT. These simplifications might waste space
+	 * in the GGTT (about 20-25 MBs depending on the platform) but we can
+	 * live with this.
+	 *
+	 * Another benifit of this is the GuC bootrom can't access anything
+	 * below the WOPCM max size so anything the bootom needs to access (e.g.
+	 * a RSA key) needs to be placed in the GGTT above the WOPCM max size.
+	 * Starting the GGTT allocations above the WOPCM max give us the correct
+	 * placement for free.
+	 */
+	if (ggtt->size > GUC_GGTT_TOP)
+		ggtt->size = GUC_GGTT_TOP;
+
+	if (GRAPHICS_VERx100(xe) >= 1270)
+		ggtt->pt_ops = &xelpg_pt_ops;
+	else
+		ggtt->pt_ops = &xelp_pt_ops;
+
+	drm_mm_init(&ggtt->mm, xe_wopcm_size(xe),
+		    ggtt->size - xe_wopcm_size(xe));
+	mutex_init(&ggtt->lock);
+	primelockdep(ggtt);
+
+	return drmm_add_action_or_reset(&xe->drm, ggtt_fini_early, ggtt);
+}
+
+static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt)
+{
+	struct drm_mm_node *hole;
+	u64 start, end;
+
+	/* Display may have allocated inside ggtt, so be careful with clearing here */
+	xe_device_mem_access_get(tile_to_xe(ggtt->tile));
+	mutex_lock(&ggtt->lock);
+	drm_mm_for_each_hole(hole, &ggtt->mm, start, end)
+		xe_ggtt_clear(ggtt, start, end - start);
+
+	xe_ggtt_invalidate(ggtt);
+	mutex_unlock(&ggtt->lock);
+	xe_device_mem_access_put(tile_to_xe(ggtt->tile));
+}
+
+int xe_ggtt_init(struct xe_ggtt *ggtt)
+{
+	struct xe_device *xe = tile_to_xe(ggtt->tile);
+	unsigned int flags;
+	int err;
+
+	/*
+	 * So we don't need to worry about 64K GGTT layout when dealing with
+	 * scratch entires, rather keep the scratch page in system memory on
+	 * platforms where 64K pages are needed for VRAM.
+	 */
+	flags = XE_BO_CREATE_PINNED_BIT;
+	if (ggtt->flags & XE_GGTT_FLAGS_64K)
+		flags |= XE_BO_CREATE_SYSTEM_BIT;
+	else
+		flags |= XE_BO_CREATE_VRAM_IF_DGFX(ggtt->tile);
+
+	ggtt->scratch = xe_managed_bo_create_pin_map(xe, ggtt->tile, XE_PAGE_SIZE, flags);
+	if (IS_ERR(ggtt->scratch)) {
+		err = PTR_ERR(ggtt->scratch);
+		goto err;
+	}
+
+	xe_map_memset(xe, &ggtt->scratch->vmap, 0, 0, ggtt->scratch->size);
+
+	xe_ggtt_initial_clear(ggtt);
+
+	return drmm_add_action_or_reset(&xe->drm, ggtt_fini, ggtt);
+err:
+	ggtt->scratch = NULL;
+	return err;
+}
+
+#define GUC_TLB_INV_CR				XE_REG(0xcee8)
+#define   GUC_TLB_INV_CR_INVALIDATE		REG_BIT(0)
+#define PVC_GUC_TLB_INV_DESC0			XE_REG(0xcf7c)
+#define   PVC_GUC_TLB_INV_DESC0_VALID		REG_BIT(0)
+#define PVC_GUC_TLB_INV_DESC1			XE_REG(0xcf80)
+#define   PVC_GUC_TLB_INV_DESC1_INVALIDATE	REG_BIT(6)
+
+static void ggtt_invalidate_gt_tlb(struct xe_gt *gt)
+{
+	if (!gt)
+		return;
+
+	/*
+	 * Invalidation can happen when there's no in-flight work keeping the
+	 * GT awake.  We need to explicitly grab forcewake to ensure the GT
+	 * and GuC are accessible.
+	 */
+	xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+
+	/* TODO: vfunc for GuC vs. non-GuC */
+
+	if (gt->uc.guc.submission_state.enabled) {
+		int seqno;
+
+		seqno = xe_gt_tlb_invalidation_guc(gt);
+		xe_gt_assert(gt, seqno > 0);
+		if (seqno > 0)
+			xe_gt_tlb_invalidation_wait(gt, seqno);
+	} else if (xe_device_uc_enabled(gt_to_xe(gt))) {
+		struct xe_device *xe = gt_to_xe(gt);
+
+		if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) {
+			xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1,
+					PVC_GUC_TLB_INV_DESC1_INVALIDATE);
+			xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC0,
+					PVC_GUC_TLB_INV_DESC0_VALID);
+		} else
+			xe_mmio_write32(gt, GUC_TLB_INV_CR,
+					GUC_TLB_INV_CR_INVALIDATE);
+	}
+
+	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
+
+void xe_ggtt_invalidate(struct xe_ggtt *ggtt)
+{
+	/* Each GT in a tile has its own TLB to cache GGTT lookups */
+	ggtt_invalidate_gt_tlb(ggtt->tile->primary_gt);
+	ggtt_invalidate_gt_tlb(ggtt->tile->media_gt);
+}
+
+void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix)
+{
+	u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB];
+	u64 addr, scratch_pte;
+
+	scratch_pte = ggtt->pt_ops->pte_encode_bo(ggtt->scratch, 0, pat_index);
+
+	printk("%sGlobal GTT:", prefix);
+	for (addr = 0; addr < ggtt->size; addr += XE_PAGE_SIZE) {
+		unsigned int i = addr / XE_PAGE_SIZE;
+
+		xe_tile_assert(ggtt->tile, addr <= U32_MAX);
+		if (ggtt->gsm[i] == scratch_pte)
+			continue;
+
+		printk("%s    ggtt[0x%08x] = 0x%016llx",
+		       prefix, (u32)addr, ggtt->gsm[i]);
+	}
+}
+
+int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, struct drm_mm_node *node,
+				       u32 size, u32 align, u32 mm_flags)
+{
+	return drm_mm_insert_node_generic(&ggtt->mm, node, size, align, 0,
+					  mm_flags);
+}
+
+int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
+				u32 size, u32 align)
+{
+	int ret;
+
+	mutex_lock(&ggtt->lock);
+	ret = xe_ggtt_insert_special_node_locked(ggtt, node, size,
+						 align, DRM_MM_INSERT_HIGH);
+	mutex_unlock(&ggtt->lock);
+
+	return ret;
+}
+
+void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
+{
+	u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB];
+	u64 start = bo->ggtt_node.start;
+	u64 offset, pte;
+
+	for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) {
+		pte = ggtt->pt_ops->pte_encode_bo(bo, offset, pat_index);
+		xe_ggtt_set_pte(ggtt, start + offset, pte);
+	}
+
+	xe_ggtt_invalidate(ggtt);
+}
+
+static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
+				  u64 start, u64 end)
+{
+	int err;
+	u64 alignment = XE_PAGE_SIZE;
+
+	if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K)
+		alignment = SZ_64K;
+
+	if (XE_WARN_ON(bo->ggtt_node.size)) {
+		/* Someone's already inserted this BO in the GGTT */
+		xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size);
+		return 0;
+	}
+
+	err = xe_bo_validate(bo, NULL, false);
+	if (err)
+		return err;
+
+	xe_device_mem_access_get(tile_to_xe(ggtt->tile));
+	mutex_lock(&ggtt->lock);
+	err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node, bo->size,
+					  alignment, 0, start, end, 0);
+	if (!err)
+		xe_ggtt_map_bo(ggtt, bo);
+	mutex_unlock(&ggtt->lock);
+	xe_device_mem_access_put(tile_to_xe(ggtt->tile));
+
+	return err;
+}
+
+int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
+			 u64 start, u64 end)
+{
+	return __xe_ggtt_insert_bo_at(ggtt, bo, start, end);
+}
+
+int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
+{
+	return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX);
+}
+
+void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node)
+{
+	xe_device_mem_access_get(tile_to_xe(ggtt->tile));
+	mutex_lock(&ggtt->lock);
+
+	xe_ggtt_clear(ggtt, node->start, node->size);
+	drm_mm_remove_node(node);
+	node->size = 0;
+
+	xe_ggtt_invalidate(ggtt);
+
+	mutex_unlock(&ggtt->lock);
+	xe_device_mem_access_put(tile_to_xe(ggtt->tile));
+}
+
+void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
+{
+	if (XE_WARN_ON(!bo->ggtt_node.size))
+		return;
+
+	/* This BO is not currently in the GGTT */
+	xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size);
+
+	xe_ggtt_remove_node(ggtt, &bo->ggtt_node);
+}
+
+int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p)
+{
+	int err;
+
+	err = mutex_lock_interruptible(&ggtt->lock);
+	if (err)
+		return err;
+
+	drm_mm_print(&ggtt->mm, p);
+	mutex_unlock(&ggtt->lock);
+	return err;
+}
diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h
new file mode 100644
index 000000000000..a09c166dff70
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ggtt.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_GGTT_H_
+#define _XE_GGTT_H_
+
+#include "xe_ggtt_types.h"
+
+struct drm_printer;
+
+void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte);
+void xe_ggtt_invalidate(struct xe_ggtt *ggtt);
+int xe_ggtt_init_early(struct xe_ggtt *ggtt);
+int xe_ggtt_init(struct xe_ggtt *ggtt);
+void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix);
+
+int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
+				u32 size, u32 align);
+int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt,
+				       struct drm_mm_node *node,
+				       u32 size, u32 align, u32 mm_flags);
+void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node);
+void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
+int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
+int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
+			 u64 start, u64 end);
+void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
+
+int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h
new file mode 100644
index 000000000000..d8c584d9a8c3
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ggtt_types.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GGTT_TYPES_H_
+#define _XE_GGTT_TYPES_H_
+
+#include <drm/drm_mm.h>
+
+#include "xe_pt_types.h"
+
+struct xe_bo;
+struct xe_gt;
+
+struct xe_ggtt_pt_ops {
+	u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, u16 pat_index);
+};
+
+struct xe_ggtt {
+	struct xe_tile *tile;
+
+	u64 size;
+
+#define XE_GGTT_FLAGS_64K BIT(0)
+	unsigned int flags;
+
+	struct xe_bo *scratch;
+
+	struct mutex lock;
+
+	u64 __iomem *gsm;
+
+	const struct xe_ggtt_pt_ops *pt_ops;
+
+	struct drm_mm mm;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.c b/drivers/gpu/drm/xe/xe_gpu_scheduler.c
new file mode 100644
index 000000000000..e4ad1d6ce1d5
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_gpu_scheduler.h"
+
+static void xe_sched_process_msg_queue(struct xe_gpu_scheduler *sched)
+{
+	if (!READ_ONCE(sched->base.pause_submit))
+		queue_work(sched->base.submit_wq, &sched->work_process_msg);
+}
+
+static void xe_sched_process_msg_queue_if_ready(struct xe_gpu_scheduler *sched)
+{
+	struct xe_sched_msg *msg;
+
+	spin_lock(&sched->base.job_list_lock);
+	msg = list_first_entry_or_null(&sched->msgs, struct xe_sched_msg, link);
+	if (msg)
+		xe_sched_process_msg_queue(sched);
+	spin_unlock(&sched->base.job_list_lock);
+}
+
+static struct xe_sched_msg *
+xe_sched_get_msg(struct xe_gpu_scheduler *sched)
+{
+	struct xe_sched_msg *msg;
+
+	spin_lock(&sched->base.job_list_lock);
+	msg = list_first_entry_or_null(&sched->msgs,
+				       struct xe_sched_msg, link);
+	if (msg)
+		list_del(&msg->link);
+	spin_unlock(&sched->base.job_list_lock);
+
+	return msg;
+}
+
+static void xe_sched_process_msg_work(struct work_struct *w)
+{
+	struct xe_gpu_scheduler *sched =
+		container_of(w, struct xe_gpu_scheduler, work_process_msg);
+	struct xe_sched_msg *msg;
+
+	if (READ_ONCE(sched->base.pause_submit))
+		return;
+
+	msg = xe_sched_get_msg(sched);
+	if (msg) {
+		sched->ops->process_msg(msg);
+
+		xe_sched_process_msg_queue_if_ready(sched);
+	}
+}
+
+int xe_sched_init(struct xe_gpu_scheduler *sched,
+		  const struct drm_sched_backend_ops *ops,
+		  const struct xe_sched_backend_ops *xe_ops,
+		  struct workqueue_struct *submit_wq,
+		  uint32_t hw_submission, unsigned hang_limit,
+		  long timeout, struct workqueue_struct *timeout_wq,
+		  atomic_t *score, const char *name,
+		  struct device *dev)
+{
+	sched->ops = xe_ops;
+	INIT_LIST_HEAD(&sched->msgs);
+	INIT_WORK(&sched->work_process_msg, xe_sched_process_msg_work);
+
+	return drm_sched_init(&sched->base, ops, submit_wq, 1, hw_submission,
+			      hang_limit, timeout, timeout_wq, score, name,
+			      dev);
+}
+
+void xe_sched_fini(struct xe_gpu_scheduler *sched)
+{
+	xe_sched_submission_stop(sched);
+	drm_sched_fini(&sched->base);
+}
+
+void xe_sched_submission_start(struct xe_gpu_scheduler *sched)
+{
+	drm_sched_wqueue_start(&sched->base);
+	queue_work(sched->base.submit_wq, &sched->work_process_msg);
+}
+
+void xe_sched_submission_stop(struct xe_gpu_scheduler *sched)
+{
+	drm_sched_wqueue_stop(&sched->base);
+	cancel_work_sync(&sched->work_process_msg);
+}
+
+void xe_sched_add_msg(struct xe_gpu_scheduler *sched,
+		      struct xe_sched_msg *msg)
+{
+	spin_lock(&sched->base.job_list_lock);
+	list_add_tail(&msg->link, &sched->msgs);
+	spin_unlock(&sched->base.job_list_lock);
+
+	xe_sched_process_msg_queue(sched);
+}
diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h
new file mode 100644
index 000000000000..10c6bb9c9386
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GPU_SCHEDULER_H_
+#define _XE_GPU_SCHEDULER_H_
+
+#include "xe_gpu_scheduler_types.h"
+#include "xe_sched_job_types.h"
+
+int xe_sched_init(struct xe_gpu_scheduler *sched,
+		  const struct drm_sched_backend_ops *ops,
+		  const struct xe_sched_backend_ops *xe_ops,
+		  struct workqueue_struct *submit_wq,
+		  uint32_t hw_submission, unsigned hang_limit,
+		  long timeout, struct workqueue_struct *timeout_wq,
+		  atomic_t *score, const char *name,
+		  struct device *dev);
+void xe_sched_fini(struct xe_gpu_scheduler *sched);
+
+void xe_sched_submission_start(struct xe_gpu_scheduler *sched);
+void xe_sched_submission_stop(struct xe_gpu_scheduler *sched);
+
+void xe_sched_add_msg(struct xe_gpu_scheduler *sched,
+		      struct xe_sched_msg *msg);
+
+static inline void xe_sched_stop(struct xe_gpu_scheduler *sched)
+{
+	drm_sched_stop(&sched->base, NULL);
+}
+
+static inline void xe_sched_tdr_queue_imm(struct xe_gpu_scheduler *sched)
+{
+	drm_sched_tdr_queue_imm(&sched->base);
+}
+
+static inline void xe_sched_resubmit_jobs(struct xe_gpu_scheduler *sched)
+{
+	drm_sched_resubmit_jobs(&sched->base);
+}
+
+static inline bool
+xe_sched_invalidate_job(struct xe_sched_job *job, int threshold)
+{
+	return drm_sched_invalidate_job(&job->drm, threshold);
+}
+
+static inline void xe_sched_add_pending_job(struct xe_gpu_scheduler *sched,
+					    struct xe_sched_job *job)
+{
+	list_add(&job->drm.list, &sched->base.pending_list);
+}
+
+static inline
+struct xe_sched_job *xe_sched_first_pending_job(struct xe_gpu_scheduler *sched)
+{
+	return list_first_entry_or_null(&sched->base.pending_list,
+					struct xe_sched_job, drm.list);
+}
+
+static inline int
+xe_sched_entity_init(struct xe_sched_entity *entity,
+		     struct xe_gpu_scheduler *sched)
+{
+	return drm_sched_entity_init(entity, 0,
+				     (struct drm_gpu_scheduler **)&sched,
+				     1, NULL);
+}
+
+#define xe_sched_entity_fini drm_sched_entity_fini
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler_types.h b/drivers/gpu/drm/xe/xe_gpu_scheduler_types.h
new file mode 100644
index 000000000000..6731b13da8bb
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gpu_scheduler_types.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GPU_SCHEDULER_TYPES_H_
+#define _XE_GPU_SCHEDULER_TYPES_H_
+
+#include <drm/gpu_scheduler.h>
+
+/**
+ * struct xe_sched_msg - an in-band (relative to GPU scheduler run queue)
+ * message
+ *
+ * Generic enough for backend defined messages, backend can expand if needed.
+ */
+struct xe_sched_msg {
+	/** @link: list link into the gpu scheduler list of messages */
+	struct list_head		link;
+	/**
+	 * @private_data: opaque pointer to message private data (backend defined)
+	 */
+	void				*private_data;
+	/** @opcode: opcode of message (backend defined) */
+	unsigned int			opcode;
+};
+
+/**
+ * struct xe_sched_backend_ops - Define the backend operations called by the
+ * scheduler
+ */
+struct xe_sched_backend_ops {
+	/**
+	 * @process_msg: Process a message. Allowed to block, it is this
+	 * function's responsibility to free message if dynamically allocated.
+	 */
+	void (*process_msg)(struct xe_sched_msg *msg);
+};
+
+/**
+ * struct xe_gpu_scheduler - Xe GPU scheduler
+ */
+struct xe_gpu_scheduler {
+	/** @base: DRM GPU scheduler */
+	struct drm_gpu_scheduler		base;
+	/** @ops: Xe scheduler ops */
+	const struct xe_sched_backend_ops	*ops;
+	/** @msgs: list of messages to be processed in @work_process_msg */
+	struct list_head			msgs;
+	/** @work_process_msg: processes messages */
+	struct work_struct		work_process_msg;
+};
+
+#define xe_sched_entity		drm_sched_entity
+#define xe_sched_policy		drm_sched_policy
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
new file mode 100644
index 000000000000..a8a895cf4b44
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gsc.c
@@ -0,0 +1,438 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_gsc.h"
+
+#include <drm/drm_managed.h>
+
+#include "abi/gsc_mkhi_commands_abi.h"
+#include "generated/xe_wa_oob.h"
+#include "xe_bb.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_exec_queue.h"
+#include "xe_gsc_submit.h"
+#include "xe_gt.h"
+#include "xe_gt_printk.h"
+#include "xe_huc.h"
+#include "xe_map.h"
+#include "xe_mmio.h"
+#include "xe_sched_job.h"
+#include "xe_uc_fw.h"
+#include "xe_wa.h"
+#include "instructions/xe_gsc_commands.h"
+#include "regs/xe_gsc_regs.h"
+
+static struct xe_gt *
+gsc_to_gt(struct xe_gsc *gsc)
+{
+	return container_of(gsc, struct xe_gt, uc.gsc);
+}
+
+static int memcpy_fw(struct xe_gsc *gsc)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 fw_size = gsc->fw.size;
+	void *storage;
+
+	/*
+	 * FIXME: xe_migrate_copy does not work with stolen mem yet, so we use
+	 * a memcpy for now.
+	 */
+	storage = kmalloc(fw_size, GFP_KERNEL);
+	if (!storage)
+		return -ENOMEM;
+
+	xe_map_memcpy_from(xe, storage, &gsc->fw.bo->vmap, 0, fw_size);
+	xe_map_memcpy_to(xe, &gsc->private->vmap, 0, storage, fw_size);
+	xe_map_memset(xe, &gsc->private->vmap, fw_size, 0, gsc->private->size - fw_size);
+
+	kfree(storage);
+
+	return 0;
+}
+
+static int emit_gsc_upload(struct xe_gsc *gsc)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	u64 offset = xe_bo_ggtt_addr(gsc->private);
+	struct xe_bb *bb;
+	struct xe_sched_job *job;
+	struct dma_fence *fence;
+	long timeout;
+
+	bb = xe_bb_new(gt, 4, false);
+	if (IS_ERR(bb))
+		return PTR_ERR(bb);
+
+	bb->cs[bb->len++] = GSC_FW_LOAD;
+	bb->cs[bb->len++] = lower_32_bits(offset);
+	bb->cs[bb->len++] = upper_32_bits(offset);
+	bb->cs[bb->len++] = (gsc->private->size / SZ_4K) | GSC_FW_LOAD_LIMIT_VALID;
+
+	job = xe_bb_create_job(gsc->q, bb);
+	if (IS_ERR(job)) {
+		xe_bb_free(bb, NULL);
+		return PTR_ERR(job);
+	}
+
+	xe_sched_job_arm(job);
+	fence = dma_fence_get(&job->drm.s_fence->finished);
+	xe_sched_job_push(job);
+
+	timeout = dma_fence_wait_timeout(fence, false, HZ);
+	dma_fence_put(fence);
+	xe_bb_free(bb, NULL);
+	if (timeout < 0)
+		return timeout;
+	else if (!timeout)
+		return -ETIME;
+
+	return 0;
+}
+
+#define version_query_wr(xe_, map_, offset_, field_, val_) \
+	xe_map_wr_field(xe_, map_, offset_, struct gsc_get_compatibility_version_in, field_, val_)
+#define version_query_rd(xe_, map_, offset_, field_) \
+	xe_map_rd_field(xe_, map_, offset_, struct gsc_get_compatibility_version_out, field_)
+
+static u32 emit_version_query_msg(struct xe_device *xe, struct iosys_map *map, u32 wr_offset)
+{
+	xe_map_memset(xe, map, wr_offset, 0, sizeof(struct gsc_get_compatibility_version_in));
+
+	version_query_wr(xe, map, wr_offset, header.group_id, MKHI_GROUP_ID_GFX_SRV);
+	version_query_wr(xe, map, wr_offset, header.command,
+			 MKHI_GFX_SRV_GET_HOST_COMPATIBILITY_VERSION);
+
+	return wr_offset + sizeof(struct gsc_get_compatibility_version_in);
+}
+
+#define GSC_VER_PKT_SZ SZ_4K /* 4K each for input and output */
+static int query_compatibility_version(struct xe_gsc *gsc)
+{
+	struct xe_uc_fw_version *compat = &gsc->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY];
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_bo *bo;
+	u32 wr_offset;
+	u32 rd_offset;
+	u64 ggtt_offset;
+	int err;
+
+	bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_VER_PKT_SZ * 2,
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_SYSTEM_BIT |
+				  XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(bo)) {
+		xe_gt_err(gt, "failed to allocate bo for GSC version query\n");
+		return PTR_ERR(bo);
+	}
+
+	ggtt_offset = xe_bo_ggtt_addr(bo);
+
+	wr_offset = xe_gsc_emit_header(xe, &bo->vmap, 0, HECI_MEADDRESS_MKHI, 0,
+				       sizeof(struct gsc_get_compatibility_version_in));
+	wr_offset = emit_version_query_msg(xe, &bo->vmap, wr_offset);
+
+	err = xe_gsc_pkt_submit_kernel(gsc, ggtt_offset, wr_offset,
+				       ggtt_offset + GSC_VER_PKT_SZ,
+				       GSC_VER_PKT_SZ);
+	if (err) {
+		xe_gt_err(gt,
+			  "failed to submit GSC request for compatibility version: %d\n",
+			  err);
+		goto out_bo;
+	}
+
+	err = xe_gsc_read_out_header(xe, &bo->vmap, GSC_VER_PKT_SZ,
+				     sizeof(struct gsc_get_compatibility_version_out),
+				     &rd_offset);
+	if (err) {
+		xe_gt_err(gt, "HuC: invalid GSC reply for version query (err=%d)\n", err);
+		return err;
+	}
+
+	compat->major = version_query_rd(xe, &bo->vmap, rd_offset, compat_major);
+	compat->minor = version_query_rd(xe, &bo->vmap, rd_offset, compat_minor);
+
+	xe_gt_info(gt, "found GSC cv%u.%u\n", compat->major, compat->minor);
+
+out_bo:
+	xe_bo_unpin_map_no_vm(bo);
+	return err;
+}
+
+static int gsc_fw_is_loaded(struct xe_gt *gt)
+{
+	return xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE)) &
+			      HECI1_FWSTS1_INIT_COMPLETE;
+}
+
+static int gsc_fw_wait(struct xe_gt *gt)
+{
+	/*
+	 * GSC load can take up to 250ms from the moment the instruction is
+	 * executed by the GSCCS. To account for possible submission delays or
+	 * other issues, we use a 500ms timeout in the wait here.
+	 */
+	return xe_mmio_wait32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE),
+			      HECI1_FWSTS1_INIT_COMPLETE,
+			      HECI1_FWSTS1_INIT_COMPLETE,
+			      500 * USEC_PER_MSEC, NULL, false);
+}
+
+static int gsc_upload(struct xe_gsc *gsc)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	struct xe_device *xe = gt_to_xe(gt);
+	int err;
+
+	/* we should only be here if the init step were successful */
+	xe_assert(xe, xe_uc_fw_is_loadable(&gsc->fw) && gsc->q);
+
+	if (gsc_fw_is_loaded(gt)) {
+		xe_gt_err(gt, "GSC already loaded at upload time\n");
+		return -EEXIST;
+	}
+
+	err = memcpy_fw(gsc);
+	if (err) {
+		xe_gt_err(gt, "Failed to memcpy GSC FW\n");
+		return err;
+	}
+
+	/*
+	 * GSC is only killed by an FLR, so we need to trigger one on unload to
+	 * make sure we stop it. This is because we assign a chunk of memory to
+	 * the GSC as part of the FW load, so we need to make sure it stops
+	 * using it when we release it to the system on driver unload. Note that
+	 * this is not a problem of the unload per-se, because the GSC will not
+	 * touch that memory unless there are requests for it coming from the
+	 * driver; therefore, no accesses will happen while Xe is not loaded,
+	 * but if we re-load the driver then the GSC might wake up and try to
+	 * access that old memory location again.
+	 * Given that an FLR is a very disruptive action (see the FLR function
+	 * for details), we want to do it as the last action before releasing
+	 * the access to the MMIO bar, which means we need to do it as part of
+	 * mmio cleanup.
+	 */
+	xe->needs_flr_on_fini = true;
+
+	err = emit_gsc_upload(gsc);
+	if (err) {
+		xe_gt_err(gt, "Failed to emit GSC FW upload (%pe)\n", ERR_PTR(err));
+		return err;
+	}
+
+	err = gsc_fw_wait(gt);
+	if (err) {
+		xe_gt_err(gt, "Failed to wait for GSC load (%pe)\n", ERR_PTR(err));
+		return err;
+	}
+
+	err = query_compatibility_version(gsc);
+	if (err)
+		return err;
+
+	err = xe_uc_fw_check_version_requirements(&gsc->fw);
+	if (err)
+		return err;
+
+	xe_gt_dbg(gt, "GSC FW async load completed\n");
+
+	return 0;
+}
+
+static void gsc_work(struct work_struct *work)
+{
+	struct xe_gsc *gsc = container_of(work, typeof(*gsc), work);
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	struct xe_device *xe = gt_to_xe(gt);
+	int ret;
+
+	xe_device_mem_access_get(xe);
+	xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
+
+	ret = gsc_upload(gsc);
+	if (ret && ret != -EEXIST) {
+		xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOAD_FAIL);
+		goto out;
+	}
+
+	xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED);
+
+	/* HuC auth failure is not fatal */
+	if (xe_huc_is_authenticated(&gt->uc.huc, XE_HUC_AUTH_VIA_GUC))
+		xe_huc_auth(&gt->uc.huc, XE_HUC_AUTH_VIA_GSC);
+
+out:
+	xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC);
+	xe_device_mem_access_put(xe);
+}
+
+int xe_gsc_init(struct xe_gsc *gsc)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	struct xe_tile *tile = gt_to_tile(gt);
+	int ret;
+
+	gsc->fw.type = XE_UC_FW_TYPE_GSC;
+	INIT_WORK(&gsc->work, gsc_work);
+
+	/* The GSC uC is only available on the media GT */
+	if (tile->media_gt && (gt != tile->media_gt)) {
+		xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_NOT_SUPPORTED);
+		return 0;
+	}
+
+	/*
+	 * Some platforms can have GuC but not GSC. That would cause
+	 * xe_uc_fw_init(gsc) to return a "not supported" failure code and abort
+	 * all firmware loading. So check for GSC being enabled before
+	 * propagating the failure back up. That way the higher level will keep
+	 * going and load GuC as appropriate.
+	 */
+	ret = xe_uc_fw_init(&gsc->fw);
+	if (!xe_uc_fw_is_enabled(&gsc->fw))
+		return 0;
+	else if (ret)
+		goto out;
+
+	return 0;
+
+out:
+	xe_gt_err(gt, "GSC init failed with %d", ret);
+	return ret;
+}
+
+static void free_resources(struct drm_device *drm, void *arg)
+{
+	struct xe_gsc *gsc = arg;
+
+	if (gsc->wq) {
+		destroy_workqueue(gsc->wq);
+		gsc->wq = NULL;
+	}
+
+	if (gsc->q) {
+		xe_exec_queue_put(gsc->q);
+		gsc->q = NULL;
+	}
+
+	if (gsc->private) {
+		xe_bo_unpin_map_no_vm(gsc->private);
+		gsc->private = NULL;
+	}
+}
+
+int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, XE_ENGINE_CLASS_OTHER, 0, true);
+	struct xe_exec_queue *q;
+	struct workqueue_struct *wq;
+	struct xe_bo *bo;
+	int err;
+
+	if (!xe_uc_fw_is_available(&gsc->fw))
+		return 0;
+
+	if (!hwe)
+		return -ENODEV;
+
+	bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4M,
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_STOLEN_BIT |
+				  XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	q = xe_exec_queue_create(xe, NULL,
+				 BIT(hwe->logical_instance), 1, hwe,
+				 EXEC_QUEUE_FLAG_KERNEL |
+				 EXEC_QUEUE_FLAG_PERMANENT);
+	if (IS_ERR(q)) {
+		xe_gt_err(gt, "Failed to create queue for GSC submission\n");
+		err = PTR_ERR(q);
+		goto out_bo;
+	}
+
+	wq = alloc_ordered_workqueue("gsc-ordered-wq", 0);
+	if (!wq) {
+		err = -ENOMEM;
+		goto out_q;
+	}
+
+	gsc->private = bo;
+	gsc->q = q;
+	gsc->wq = wq;
+
+	err = drmm_add_action_or_reset(&xe->drm, free_resources, gsc);
+	if (err)
+		return err;
+
+	xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOADABLE);
+
+	return 0;
+
+out_q:
+	xe_exec_queue_put(q);
+out_bo:
+	xe_bo_unpin_map_no_vm(bo);
+	return err;
+}
+
+void xe_gsc_load_start(struct xe_gsc *gsc)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+
+	if (!xe_uc_fw_is_loadable(&gsc->fw) || !gsc->q)
+		return;
+
+	/* GSC FW survives GT reset and D3Hot */
+	if (gsc_fw_is_loaded(gt)) {
+		xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED);
+		return;
+	}
+
+	queue_work(gsc->wq, &gsc->work);
+}
+
+void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc)
+{
+	if (xe_uc_fw_is_loadable(&gsc->fw) && gsc->wq)
+		flush_work(&gsc->work);
+}
+
+/*
+ * wa_14015076503: if the GSC FW is loaded, we need to alert it before doing a
+ * GSC engine reset by writing a notification bit in the GS1 register and then
+ * triggering an interrupt to GSC; from the interrupt it will take up to 200ms
+ * for the FW to get prepare for the reset, so we need to wait for that amount
+ * of time.
+ * After the reset is complete we need to then clear the GS1 register.
+ */
+void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep)
+{
+	u32 gs1_set = prep ? HECI_H_GS1_ER_PREP : 0;
+	u32 gs1_clr = prep ? 0 : HECI_H_GS1_ER_PREP;
+
+	/* WA only applies if the GSC is loaded */
+	if (!XE_WA(gt, 14015076503) || !gsc_fw_is_loaded(gt))
+		return;
+
+	xe_mmio_rmw32(gt, HECI_H_GS1(MTL_GSC_HECI2_BASE), gs1_clr, gs1_set);
+
+	if (prep) {
+		/* make sure the reset bit is clear when writing the CSR reg */
+		xe_mmio_rmw32(gt, HECI_H_CSR(MTL_GSC_HECI2_BASE),
+			      HECI_H_CSR_RST, HECI_H_CSR_IG);
+		msleep(200);
+	}
+}
diff --git a/drivers/gpu/drm/xe/xe_gsc.h b/drivers/gpu/drm/xe/xe_gsc.h
new file mode 100644
index 000000000000..bc1ef7f31ea2
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gsc.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GSC_H_
+#define _XE_GSC_H_
+
+#include "xe_gsc_types.h"
+
+struct xe_gt;
+
+int xe_gsc_init(struct xe_gsc *gsc);
+int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc);
+void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc);
+void xe_gsc_load_start(struct xe_gsc *gsc);
+
+void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gsc_submit.c b/drivers/gpu/drm/xe/xe_gsc_submit.c
new file mode 100644
index 000000000000..8c5381e5913f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gsc_submit.c
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_gsc_submit.h"
+
+#include "abi/gsc_command_header_abi.h"
+#include "xe_bb.h"
+#include "xe_exec_queue.h"
+#include "xe_gt_printk.h"
+#include "xe_gt_types.h"
+#include "xe_map.h"
+#include "xe_sched_job.h"
+#include "instructions/xe_gsc_commands.h"
+#include "regs/xe_gsc_regs.h"
+
+#define GSC_HDR_SIZE (sizeof(struct intel_gsc_mtl_header)) /* shorthand define */
+
+#define mtl_gsc_header_wr(xe_, map_, offset_, field_, val_) \
+	xe_map_wr_field(xe_, map_, offset_, struct intel_gsc_mtl_header, field_, val_)
+
+#define mtl_gsc_header_rd(xe_, map_, offset_, field_) \
+	xe_map_rd_field(xe_, map_, offset_, struct intel_gsc_mtl_header, field_)
+
+/*
+ * GSC FW allows us to define the host_session_handle as we see fit, as long
+ * as we use unique identifier for each user, with handle 0 being reserved for
+ * kernel usage.
+ * To be able to differentiate which client subsystem owns the given session, we
+ * include the client id in the top 8 bits of the handle.
+ */
+#define HOST_SESSION_CLIENT_MASK GENMASK_ULL(63, 56)
+
+static struct xe_gt *
+gsc_to_gt(struct xe_gsc *gsc)
+{
+	return container_of(gsc, struct xe_gt, uc.gsc);
+}
+
+/**
+ * xe_gsc_emit_header - write the MTL GSC header in memory
+ * @xe: the Xe device
+ * @map: the iosys map to write to
+ * @offset: offset from the start of the map at which to write the header
+ * @heci_client_id: client id identifying the type of command (see abi for values)
+ * @host_session_id: host session ID of the caller
+ * @payload_size: size of the payload that follows the header
+ *
+ * Returns: offset memory location following the header
+ */
+u32 xe_gsc_emit_header(struct xe_device *xe, struct iosys_map *map, u32 offset,
+		       u8 heci_client_id, u64 host_session_id, u32 payload_size)
+{
+	xe_assert(xe, !(host_session_id & HOST_SESSION_CLIENT_MASK));
+
+	if (host_session_id)
+		host_session_id |= FIELD_PREP(HOST_SESSION_CLIENT_MASK, heci_client_id);
+
+	xe_map_memset(xe, map, offset, 0, GSC_HDR_SIZE);
+
+	mtl_gsc_header_wr(xe, map, offset, validity_marker, GSC_HECI_VALIDITY_MARKER);
+	mtl_gsc_header_wr(xe, map, offset, heci_client_id, heci_client_id);
+	mtl_gsc_header_wr(xe, map, offset, host_session_handle, host_session_id);
+	mtl_gsc_header_wr(xe, map, offset, header_version, MTL_GSC_HEADER_VERSION);
+	mtl_gsc_header_wr(xe, map, offset, message_size, payload_size + GSC_HDR_SIZE);
+
+	return offset + GSC_HDR_SIZE;
+};
+
+/**
+ * xe_gsc_check_and_update_pending - check the pending bit and update the input
+ * header with the retry handle from the output header
+ * @xe: the Xe device
+ * @in: the iosys map containing the input buffer
+ * @offset_in: offset within the iosys at which the input buffer is located
+ * @out: the iosys map containing the output buffer
+ * @offset_out: offset within the iosys at which the output buffer is located
+ *
+ * Returns: true if the pending bit was set, false otherwise
+ */
+bool xe_gsc_check_and_update_pending(struct xe_device *xe,
+				     struct iosys_map *in, u32 offset_in,
+				     struct iosys_map *out, u32 offset_out)
+{
+	if (mtl_gsc_header_rd(xe, out, offset_out, flags) & GSC_OUTFLAG_MSG_PENDING) {
+		u64 handle = mtl_gsc_header_rd(xe, out, offset_out, gsc_message_handle);
+
+		mtl_gsc_header_wr(xe, in, offset_in, gsc_message_handle, handle);
+
+		return true;
+	}
+
+	return false;
+}
+
+/**
+ * xe_gsc_read_out_header - reads and validates the output header and returns
+ * the offset of the reply following the header
+ * @xe: the Xe device
+ * @map: the iosys map containing the output buffer
+ * @offset: offset within the iosys at which the output buffer is located
+ * @min_payload_size: minimum size of the message excluding the gsc header
+ * @payload_offset: optional pointer to be set to the payload offset
+ *
+ * Returns: -errno value on failure, 0 otherwise
+ */
+int xe_gsc_read_out_header(struct xe_device *xe,
+			   struct iosys_map *map, u32 offset,
+			   u32 min_payload_size,
+			   u32 *payload_offset)
+{
+	u32 marker = mtl_gsc_header_rd(xe, map, offset, validity_marker);
+	u32 size = mtl_gsc_header_rd(xe, map, offset, message_size);
+	u32 payload_size = size - GSC_HDR_SIZE;
+
+	if (marker != GSC_HECI_VALIDITY_MARKER)
+		return -EPROTO;
+
+	if (size < GSC_HDR_SIZE || payload_size < min_payload_size)
+		return -ENODATA;
+
+	if (payload_offset)
+		*payload_offset = offset + GSC_HDR_SIZE;
+
+	return 0;
+}
+
+/**
+ * xe_gsc_pkt_submit_kernel - submit a kernel heci pkt to the GSC
+ * @gsc: the GSC uC
+ * @addr_in: GGTT address of the message to send to the GSC
+ * @size_in: size of the message to send to the GSC
+ * @addr_out: GGTT address for the GSC to write the reply to
+ * @size_out: size of the memory reserved for the reply
+ */
+int xe_gsc_pkt_submit_kernel(struct xe_gsc *gsc, u64 addr_in, u32 size_in,
+			     u64 addr_out, u32 size_out)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	struct xe_bb *bb;
+	struct xe_sched_job *job;
+	struct dma_fence *fence;
+	long timeout;
+
+	if (size_in < GSC_HDR_SIZE)
+		return -ENODATA;
+
+	if (size_out < GSC_HDR_SIZE)
+		return -ENOMEM;
+
+	bb = xe_bb_new(gt, 8, false);
+	if (IS_ERR(bb))
+		return PTR_ERR(bb);
+
+	bb->cs[bb->len++] = GSC_HECI_CMD_PKT;
+	bb->cs[bb->len++] = lower_32_bits(addr_in);
+	bb->cs[bb->len++] = upper_32_bits(addr_in);
+	bb->cs[bb->len++] = size_in;
+	bb->cs[bb->len++] = lower_32_bits(addr_out);
+	bb->cs[bb->len++] = upper_32_bits(addr_out);
+	bb->cs[bb->len++] = size_out;
+	bb->cs[bb->len++] = 0;
+
+	job = xe_bb_create_job(gsc->q, bb);
+	if (IS_ERR(job)) {
+		xe_bb_free(bb, NULL);
+		return PTR_ERR(job);
+	}
+
+	xe_sched_job_arm(job);
+	fence = dma_fence_get(&job->drm.s_fence->finished);
+	xe_sched_job_push(job);
+
+	timeout = dma_fence_wait_timeout(fence, false, HZ);
+	dma_fence_put(fence);
+	xe_bb_free(bb, NULL);
+	if (timeout < 0)
+		return timeout;
+	else if (!timeout)
+		return -ETIME;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_gsc_submit.h b/drivers/gpu/drm/xe/xe_gsc_submit.h
new file mode 100644
index 000000000000..0801da5d446a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gsc_submit.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GSC_SUBMIT_H_
+#define _XE_GSC_SUBMIT_H_
+
+#include <linux/types.h>
+
+struct iosys_map;
+struct xe_device;
+struct xe_gsc;
+
+u32 xe_gsc_emit_header(struct xe_device *xe, struct iosys_map *map, u32 offset,
+		       u8 heci_client_id, u64 host_session_id, u32 payload_size);
+
+bool xe_gsc_check_and_update_pending(struct xe_device *xe,
+				     struct iosys_map *in, u32 offset_in,
+				     struct iosys_map *out, u32 offset_out);
+
+int xe_gsc_read_out_header(struct xe_device *xe,
+			   struct iosys_map *map, u32 offset,
+			   u32 min_payload_size,
+			   u32 *payload_offset);
+
+int xe_gsc_pkt_submit_kernel(struct xe_gsc *gsc, u64 addr_in, u32 size_in,
+			     u64 addr_out, u32 size_out);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gsc_types.h b/drivers/gpu/drm/xe/xe_gsc_types.h
new file mode 100644
index 000000000000..57fefd66a7ea
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gsc_types.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GSC_TYPES_H_
+#define _XE_GSC_TYPES_H_
+
+#include <linux/workqueue.h>
+
+#include "xe_uc_fw_types.h"
+
+struct xe_bo;
+struct xe_exec_queue;
+
+/**
+ * struct xe_gsc - GSC
+ */
+struct xe_gsc {
+	/** @fw: Generic uC firmware management */
+	struct xe_uc_fw fw;
+
+	/** @security_version: SVN found in the fetched blob */
+	u32 security_version;
+
+	/** @private: Private data for use by the GSC FW */
+	struct xe_bo *private;
+
+	/** @q: Default queue used for submissions to GSC FW */
+	struct xe_exec_queue *q;
+
+	/** @wq: workqueue to handle jobs for delayed load and proxy handling */
+	struct workqueue_struct *wq;
+
+	/** @work: delayed load and proxy handling work */
+	struct work_struct work;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
new file mode 100644
index 000000000000..35474ddbaf97
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -0,0 +1,781 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_gt.h"
+
+#include <linux/minmax.h>
+
+#include <drm/drm_managed.h>
+#include <drm/xe_drm.h>
+
+#include "instructions/xe_gfxpipe_commands.h"
+#include "instructions/xe_mi_commands.h"
+#include "regs/xe_gt_regs.h"
+#include "xe_assert.h"
+#include "xe_bb.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_exec_queue.h"
+#include "xe_execlist.h"
+#include "xe_force_wake.h"
+#include "xe_ggtt.h"
+#include "xe_gsc.h"
+#include "xe_gt_ccs_mode.h"
+#include "xe_gt_clock.h"
+#include "xe_gt_freq.h"
+#include "xe_gt_idle.h"
+#include "xe_gt_mcr.h"
+#include "xe_gt_pagefault.h"
+#include "xe_gt_printk.h"
+#include "xe_gt_sysfs.h"
+#include "xe_gt_tlb_invalidation.h"
+#include "xe_gt_topology.h"
+#include "xe_guc_exec_queue_types.h"
+#include "xe_guc_pc.h"
+#include "xe_hw_fence.h"
+#include "xe_hw_engine_class_sysfs.h"
+#include "xe_irq.h"
+#include "xe_lmtt.h"
+#include "xe_lrc.h"
+#include "xe_map.h"
+#include "xe_migrate.h"
+#include "xe_mmio.h"
+#include "xe_pat.h"
+#include "xe_mocs.h"
+#include "xe_reg_sr.h"
+#include "xe_ring_ops.h"
+#include "xe_sa.h"
+#include "xe_sched_job.h"
+#include "xe_sriov.h"
+#include "xe_tuning.h"
+#include "xe_uc.h"
+#include "xe_vm.h"
+#include "xe_wa.h"
+#include "xe_wopcm.h"
+
+struct xe_gt *xe_gt_alloc(struct xe_tile *tile)
+{
+	struct xe_gt *gt;
+
+	gt = drmm_kzalloc(&tile_to_xe(tile)->drm, sizeof(*gt), GFP_KERNEL);
+	if (!gt)
+		return ERR_PTR(-ENOMEM);
+
+	gt->tile = tile;
+	gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0);
+
+	return gt;
+}
+
+void xe_gt_sanitize(struct xe_gt *gt)
+{
+	/*
+	 * FIXME: if xe_uc_sanitize is called here, on TGL driver will not
+	 * reload
+	 */
+	gt->uc.guc.submission_state.enabled = false;
+}
+
+static void gt_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_gt *gt = arg;
+	int i;
+
+	destroy_workqueue(gt->ordered_wq);
+
+	for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
+		xe_hw_fence_irq_finish(&gt->fence_irq[i]);
+}
+
+static void gt_reset_worker(struct work_struct *w);
+
+static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q)
+{
+	struct xe_sched_job *job;
+	struct xe_bb *bb;
+	struct dma_fence *fence;
+	long timeout;
+
+	bb = xe_bb_new(gt, 4, false);
+	if (IS_ERR(bb))
+		return PTR_ERR(bb);
+
+	job = xe_bb_create_job(q, bb);
+	if (IS_ERR(job)) {
+		xe_bb_free(bb, NULL);
+		return PTR_ERR(job);
+	}
+
+	xe_sched_job_arm(job);
+	fence = dma_fence_get(&job->drm.s_fence->finished);
+	xe_sched_job_push(job);
+
+	timeout = dma_fence_wait_timeout(fence, false, HZ);
+	dma_fence_put(fence);
+	xe_bb_free(bb, NULL);
+	if (timeout < 0)
+		return timeout;
+	else if (!timeout)
+		return -ETIME;
+
+	return 0;
+}
+
+/*
+ * Convert back from encoded value to type-safe, only to be used when reg.mcr
+ * is true
+ */
+static struct xe_reg_mcr to_xe_reg_mcr(const struct xe_reg reg)
+{
+	return (const struct xe_reg_mcr){.__reg.raw = reg.raw };
+}
+
+static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
+{
+	struct xe_reg_sr *sr = &q->hwe->reg_lrc;
+	struct xe_reg_sr_entry *entry;
+	unsigned long idx;
+	struct xe_sched_job *job;
+	struct xe_bb *bb;
+	struct dma_fence *fence;
+	long timeout;
+	int count = 0;
+
+	if (q->hwe->class == XE_ENGINE_CLASS_RENDER)
+		/* Big enough to emit all of the context's 3DSTATE */
+		bb = xe_bb_new(gt, xe_lrc_size(gt_to_xe(gt), q->hwe->class), false);
+	else
+		/* Just pick a large BB size */
+		bb = xe_bb_new(gt, SZ_4K, false);
+
+	if (IS_ERR(bb))
+		return PTR_ERR(bb);
+
+	xa_for_each(&sr->xa, idx, entry)
+		++count;
+
+	if (count) {
+		xe_gt_dbg(gt, "LRC WA %s save-restore batch\n", sr->name);
+
+		bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
+
+		xa_for_each(&sr->xa, idx, entry) {
+			struct xe_reg reg = entry->reg;
+			struct xe_reg_mcr reg_mcr = to_xe_reg_mcr(reg);
+			u32 val;
+
+			/*
+			 * Skip reading the register if it's not really needed
+			 */
+			if (reg.masked)
+				val = entry->clr_bits << 16;
+			else if (entry->clr_bits + 1)
+				val = (reg.mcr ?
+				       xe_gt_mcr_unicast_read_any(gt, reg_mcr) :
+				       xe_mmio_read32(gt, reg)) & (~entry->clr_bits);
+			else
+				val = 0;
+
+			val |= entry->set_bits;
+
+			bb->cs[bb->len++] = reg.addr;
+			bb->cs[bb->len++] = val;
+			xe_gt_dbg(gt, "REG[0x%x] = 0x%08x", reg.addr, val);
+		}
+	}
+
+	xe_lrc_emit_hwe_state_instructions(q, bb);
+
+	job = xe_bb_create_job(q, bb);
+	if (IS_ERR(job)) {
+		xe_bb_free(bb, NULL);
+		return PTR_ERR(job);
+	}
+
+	xe_sched_job_arm(job);
+	fence = dma_fence_get(&job->drm.s_fence->finished);
+	xe_sched_job_push(job);
+
+	timeout = dma_fence_wait_timeout(fence, false, HZ);
+	dma_fence_put(fence);
+	xe_bb_free(bb, NULL);
+	if (timeout < 0)
+		return timeout;
+	else if (!timeout)
+		return -ETIME;
+
+	return 0;
+}
+
+int xe_gt_record_default_lrcs(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	int err = 0;
+
+	for_each_hw_engine(hwe, gt, id) {
+		struct xe_exec_queue *q, *nop_q;
+		void *default_lrc;
+
+		if (gt->default_lrc[hwe->class])
+			continue;
+
+		xe_reg_sr_init(&hwe->reg_lrc, hwe->name, xe);
+		xe_wa_process_lrc(hwe);
+		xe_hw_engine_setup_default_lrc_state(hwe);
+		xe_tuning_process_lrc(hwe);
+
+		default_lrc = drmm_kzalloc(&xe->drm,
+					   xe_lrc_size(xe, hwe->class),
+					   GFP_KERNEL);
+		if (!default_lrc)
+			return -ENOMEM;
+
+		q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance), 1,
+					 hwe, EXEC_QUEUE_FLAG_KERNEL);
+		if (IS_ERR(q)) {
+			err = PTR_ERR(q);
+			xe_gt_err(gt, "hwe %s: xe_exec_queue_create failed (%pe)\n",
+				  hwe->name, q);
+			return err;
+		}
+
+		/* Prime golden LRC with known good state */
+		err = emit_wa_job(gt, q);
+		if (err) {
+			xe_gt_err(gt, "hwe %s: emit_wa_job failed (%pe) guc_id=%u\n",
+				  hwe->name, ERR_PTR(err), q->guc->id);
+			goto put_exec_queue;
+		}
+
+		nop_q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance),
+					     1, hwe, EXEC_QUEUE_FLAG_KERNEL);
+		if (IS_ERR(nop_q)) {
+			err = PTR_ERR(nop_q);
+			xe_gt_err(gt, "hwe %s: nop xe_exec_queue_create failed (%pe)\n",
+				  hwe->name, nop_q);
+			goto put_exec_queue;
+		}
+
+		/* Switch to different LRC */
+		err = emit_nop_job(gt, nop_q);
+		if (err) {
+			xe_gt_err(gt, "hwe %s: nop emit_nop_job failed (%pe) guc_id=%u\n",
+				  hwe->name, ERR_PTR(err), nop_q->guc->id);
+			goto put_nop_q;
+		}
+
+		/* Reload golden LRC to record the effect of any indirect W/A */
+		err = emit_nop_job(gt, q);
+		if (err) {
+			xe_gt_err(gt, "hwe %s: emit_nop_job failed (%pe) guc_id=%u\n",
+				  hwe->name, ERR_PTR(err), q->guc->id);
+			goto put_nop_q;
+		}
+
+		xe_map_memcpy_from(xe, default_lrc,
+				   &q->lrc[0].bo->vmap,
+				   xe_lrc_pphwsp_offset(&q->lrc[0]),
+				   xe_lrc_size(xe, hwe->class));
+
+		gt->default_lrc[hwe->class] = default_lrc;
+put_nop_q:
+		xe_exec_queue_put(nop_q);
+put_exec_queue:
+		xe_exec_queue_put(q);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+int xe_gt_init_early(struct xe_gt *gt)
+{
+	int err;
+
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		return err;
+
+	xe_gt_topology_init(gt);
+	xe_gt_mcr_init(gt);
+
+	err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		return err;
+
+	xe_reg_sr_init(&gt->reg_sr, "GT", gt_to_xe(gt));
+
+	err = xe_wa_init(gt);
+	if (err)
+		return err;
+
+	xe_wa_process_gt(gt);
+	xe_wa_process_oob(gt);
+	xe_tuning_process_gt(gt);
+
+	return 0;
+}
+
+static void dump_pat_on_error(struct xe_gt *gt)
+{
+	struct drm_printer p;
+	char prefix[32];
+
+	snprintf(prefix, sizeof(prefix), "[GT%u Error]", gt->info.id);
+	p = drm_debug_printer(prefix);
+
+	xe_pat_dump(gt, &p);
+}
+
+static int gt_fw_domain_init(struct xe_gt *gt)
+{
+	int err, i;
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		goto err_hw_fence_irq;
+
+	xe_pat_init(gt);
+
+	if (!xe_gt_is_media_type(gt)) {
+		err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt);
+		if (err)
+			goto err_force_wake;
+		if (IS_SRIOV_PF(gt_to_xe(gt)))
+			xe_lmtt_init(&gt_to_tile(gt)->sriov.pf.lmtt);
+	}
+
+	err = xe_uc_init(&gt->uc);
+	if (err)
+		goto err_force_wake;
+
+	/* Raise GT freq to speed up HuC/GuC load */
+	xe_guc_pc_init_early(&gt->uc.guc.pc);
+
+	err = xe_uc_init_hwconfig(&gt->uc);
+	if (err)
+		goto err_force_wake;
+
+	xe_gt_idle_sysfs_init(&gt->gtidle);
+
+	/* XXX: Fake that we pull the engine mask from hwconfig blob */
+	gt->info.engine_mask = gt->info.__engine_mask;
+
+	/* Enable per hw engine IRQs */
+	xe_irq_enable_hwe(gt);
+
+	/* Rerun MCR init as we now have hw engine list */
+	xe_gt_mcr_init(gt);
+
+	err = xe_hw_engines_init_early(gt);
+	if (err)
+		goto err_force_wake;
+
+	err = xe_hw_engine_class_sysfs_init(gt);
+	if (err)
+		drm_warn(&gt_to_xe(gt)->drm,
+			 "failed to register engines sysfs directory, err: %d\n",
+			 err);
+
+	/* Initialize CCS mode sysfs after early initialization of HW engines */
+	xe_gt_ccs_mode_sysfs_init(gt);
+
+	err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+	XE_WARN_ON(err);
+	xe_device_mem_access_put(gt_to_xe(gt));
+
+	return 0;
+
+err_force_wake:
+	dump_pat_on_error(gt);
+	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+err_hw_fence_irq:
+	for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
+		xe_hw_fence_irq_finish(&gt->fence_irq[i]);
+	xe_device_mem_access_put(gt_to_xe(gt));
+
+	return err;
+}
+
+static int all_fw_domain_init(struct xe_gt *gt)
+{
+	int err, i;
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (err)
+		goto err_hw_fence_irq;
+
+	xe_gt_mcr_set_implicit_defaults(gt);
+	xe_reg_sr_apply_mmio(&gt->reg_sr, gt);
+
+	err = xe_gt_clock_init(gt);
+	if (err)
+		goto err_force_wake;
+
+	xe_mocs_init(gt);
+	err = xe_execlist_init(gt);
+	if (err)
+		goto err_force_wake;
+
+	err = xe_hw_engines_init(gt);
+	if (err)
+		goto err_force_wake;
+
+	err = xe_uc_init_post_hwconfig(&gt->uc);
+	if (err)
+		goto err_force_wake;
+
+	if (!xe_gt_is_media_type(gt)) {
+		/*
+		 * USM has its only SA pool to non-block behind user operations
+		 */
+		if (gt_to_xe(gt)->info.has_usm) {
+			struct xe_device *xe = gt_to_xe(gt);
+
+			gt->usm.bb_pool = xe_sa_bo_manager_init(gt_to_tile(gt),
+								IS_DGFX(xe) ? SZ_1M : SZ_512K, 16);
+			if (IS_ERR(gt->usm.bb_pool)) {
+				err = PTR_ERR(gt->usm.bb_pool);
+				goto err_force_wake;
+			}
+		}
+	}
+
+	if (!xe_gt_is_media_type(gt)) {
+		struct xe_tile *tile = gt_to_tile(gt);
+
+		tile->migrate = xe_migrate_init(tile);
+		if (IS_ERR(tile->migrate)) {
+			err = PTR_ERR(tile->migrate);
+			goto err_force_wake;
+		}
+	}
+
+	err = xe_uc_init_hw(&gt->uc);
+	if (err)
+		goto err_force_wake;
+
+	/* Configure default CCS mode of 1 engine with all resources */
+	if (xe_gt_ccs_mode_enabled(gt)) {
+		gt->ccs_mode = 1;
+		xe_gt_apply_ccs_mode(gt);
+	}
+
+	if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt))
+		xe_lmtt_init_hw(&gt_to_tile(gt)->sriov.pf.lmtt);
+
+	err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	XE_WARN_ON(err);
+	xe_device_mem_access_put(gt_to_xe(gt));
+
+	return 0;
+
+err_force_wake:
+	xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+err_hw_fence_irq:
+	for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
+		xe_hw_fence_irq_finish(&gt->fence_irq[i]);
+	xe_device_mem_access_put(gt_to_xe(gt));
+
+	return err;
+}
+
+int xe_gt_init(struct xe_gt *gt)
+{
+	int err;
+	int i;
+
+	INIT_WORK(&gt->reset.worker, gt_reset_worker);
+
+	for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) {
+		gt->ring_ops[i] = xe_ring_ops_get(gt, i);
+		xe_hw_fence_irq_init(&gt->fence_irq[i]);
+	}
+
+	err = xe_gt_tlb_invalidation_init(gt);
+	if (err)
+		return err;
+
+	err = xe_gt_pagefault_init(gt);
+	if (err)
+		return err;
+
+	xe_mocs_init_early(gt);
+
+	xe_gt_sysfs_init(gt);
+
+	err = gt_fw_domain_init(gt);
+	if (err)
+		return err;
+
+	xe_gt_freq_init(gt);
+
+	xe_force_wake_init_engines(gt, gt_to_fw(gt));
+
+	err = all_fw_domain_init(gt);
+	if (err)
+		return err;
+
+	err = drmm_add_action_or_reset(&gt_to_xe(gt)->drm, gt_fini, gt);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int do_gt_reset(struct xe_gt *gt)
+{
+	int err;
+
+	xe_gsc_wa_14015076503(gt, true);
+
+	xe_mmio_write32(gt, GDRST, GRDOM_FULL);
+	err = xe_mmio_wait32(gt, GDRST, GRDOM_FULL, 0, 5000, NULL, false);
+	if (err)
+		xe_gt_err(gt, "failed to clear GRDOM_FULL (%pe)\n",
+			  ERR_PTR(err));
+
+	xe_gsc_wa_14015076503(gt, false);
+
+	return err;
+}
+
+static int do_gt_restart(struct xe_gt *gt)
+{
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	int err;
+
+	xe_pat_init(gt);
+
+	xe_gt_mcr_set_implicit_defaults(gt);
+	xe_reg_sr_apply_mmio(&gt->reg_sr, gt);
+
+	err = xe_wopcm_init(&gt->uc.wopcm);
+	if (err)
+		return err;
+
+	for_each_hw_engine(hwe, gt, id)
+		xe_hw_engine_enable_ring(hwe);
+
+	err = xe_uc_sanitize_reset(&gt->uc);
+	if (err)
+		return err;
+
+	err = xe_uc_init_hw(&gt->uc);
+	if (err)
+		return err;
+
+	if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt))
+		xe_lmtt_init_hw(&gt_to_tile(gt)->sriov.pf.lmtt);
+
+	xe_mocs_init(gt);
+	err = xe_uc_start(&gt->uc);
+	if (err)
+		return err;
+
+	for_each_hw_engine(hwe, gt, id) {
+		xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
+		xe_reg_sr_apply_whitelist(hwe);
+	}
+
+	/* Get CCS mode in sync between sw/hw */
+	xe_gt_apply_ccs_mode(gt);
+
+	return 0;
+}
+
+static int gt_reset(struct xe_gt *gt)
+{
+	int err;
+
+	/* We only support GT resets with GuC submission */
+	if (!xe_device_uc_enabled(gt_to_xe(gt)))
+		return -ENODEV;
+
+	xe_gt_info(gt, "reset started\n");
+
+	if (xe_fault_inject_gt_reset()) {
+		err = -ECANCELED;
+		goto err_fail;
+	}
+
+	xe_gt_sanitize(gt);
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (err)
+		goto err_msg;
+
+	xe_uc_gucrc_disable(&gt->uc);
+	xe_uc_stop_prepare(&gt->uc);
+	xe_gt_pagefault_reset(gt);
+
+	err = xe_uc_stop(&gt->uc);
+	if (err)
+		goto err_out;
+
+	err = do_gt_reset(gt);
+	if (err)
+		goto err_out;
+
+	xe_gt_tlb_invalidation_reset(gt);
+
+	err = do_gt_restart(gt);
+	if (err)
+		goto err_out;
+
+	err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	xe_device_mem_access_put(gt_to_xe(gt));
+	XE_WARN_ON(err);
+
+	xe_gt_info(gt, "reset done\n");
+
+	return 0;
+
+err_out:
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+err_msg:
+	XE_WARN_ON(xe_uc_start(&gt->uc));
+	xe_device_mem_access_put(gt_to_xe(gt));
+err_fail:
+	xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err));
+
+	gt_to_xe(gt)->needs_flr_on_fini = true;
+
+	return err;
+}
+
+static void gt_reset_worker(struct work_struct *w)
+{
+	struct xe_gt *gt = container_of(w, typeof(*gt), reset.worker);
+
+	gt_reset(gt);
+}
+
+void xe_gt_reset_async(struct xe_gt *gt)
+{
+	xe_gt_info(gt, "trying reset\n");
+
+	/* Don't do a reset while one is already in flight */
+	if (!xe_fault_inject_gt_reset() && xe_uc_reset_prepare(&gt->uc))
+		return;
+
+	xe_gt_info(gt, "reset queued\n");
+	queue_work(gt->ordered_wq, &gt->reset.worker);
+}
+
+void xe_gt_suspend_prepare(struct xe_gt *gt)
+{
+	xe_device_mem_access_get(gt_to_xe(gt));
+	XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+
+	xe_uc_stop_prepare(&gt->uc);
+
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+	xe_device_mem_access_put(gt_to_xe(gt));
+}
+
+int xe_gt_suspend(struct xe_gt *gt)
+{
+	int err;
+
+	xe_gt_sanitize(gt);
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (err)
+		goto err_msg;
+
+	err = xe_uc_suspend(&gt->uc);
+	if (err)
+		goto err_force_wake;
+
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+	xe_device_mem_access_put(gt_to_xe(gt));
+	xe_gt_info(gt, "suspended\n");
+
+	return 0;
+
+err_force_wake:
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+err_msg:
+	xe_device_mem_access_put(gt_to_xe(gt));
+	xe_gt_err(gt, "suspend failed (%pe)\n", ERR_PTR(err));
+
+	return err;
+}
+
+int xe_gt_resume(struct xe_gt *gt)
+{
+	int err;
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (err)
+		goto err_msg;
+
+	err = do_gt_restart(gt);
+	if (err)
+		goto err_force_wake;
+
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+	xe_device_mem_access_put(gt_to_xe(gt));
+	xe_gt_info(gt, "resumed\n");
+
+	return 0;
+
+err_force_wake:
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+err_msg:
+	xe_device_mem_access_put(gt_to_xe(gt));
+	xe_gt_err(gt, "resume failed (%pe)\n", ERR_PTR(err));
+
+	return err;
+}
+
+struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt,
+				     enum xe_engine_class class,
+				     u16 instance, bool logical)
+{
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+
+	for_each_hw_engine(hwe, gt, id)
+		if (hwe->class == class &&
+		    ((!logical && hwe->instance == instance) ||
+		    (logical && hwe->logical_instance == instance)))
+			return hwe;
+
+	return NULL;
+}
+
+struct xe_hw_engine *xe_gt_any_hw_engine_by_reset_domain(struct xe_gt *gt,
+							 enum xe_engine_class class)
+{
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+
+	for_each_hw_engine(hwe, gt, id) {
+		switch (class) {
+		case XE_ENGINE_CLASS_RENDER:
+		case XE_ENGINE_CLASS_COMPUTE:
+			if (hwe->class == XE_ENGINE_CLASS_RENDER ||
+			    hwe->class == XE_ENGINE_CLASS_COMPUTE)
+				return hwe;
+			break;
+		default:
+			if (hwe->class == class)
+				return hwe;
+		}
+	}
+
+	return NULL;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
new file mode 100644
index 000000000000..4486e083f5ef
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_H_
+#define _XE_GT_H_
+
+#include <drm/drm_util.h>
+
+#include "xe_device_types.h"
+#include "xe_hw_engine.h"
+
+#define for_each_hw_engine(hwe__, gt__, id__) \
+	for ((id__) = 0; (id__) < ARRAY_SIZE((gt__)->hw_engines); (id__)++) \
+		for_each_if(((hwe__) = (gt__)->hw_engines + (id__)) && \
+			  xe_hw_engine_is_valid((hwe__)))
+
+#define CCS_MASK(gt) (((gt)->info.engine_mask & XE_HW_ENGINE_CCS_MASK) >> XE_HW_ENGINE_CCS0)
+
+#ifdef CONFIG_FAULT_INJECTION
+#include <linux/fault-inject.h> /* XXX: fault-inject.h is broken */
+extern struct fault_attr gt_reset_failure;
+static inline bool xe_fault_inject_gt_reset(void)
+{
+	return should_fail(&gt_reset_failure, 1);
+}
+#else
+static inline bool xe_fault_inject_gt_reset(void)
+{
+	return false;
+}
+#endif
+
+struct xe_gt *xe_gt_alloc(struct xe_tile *tile);
+int xe_gt_init_early(struct xe_gt *gt);
+int xe_gt_init(struct xe_gt *gt);
+int xe_gt_record_default_lrcs(struct xe_gt *gt);
+void xe_gt_suspend_prepare(struct xe_gt *gt);
+int xe_gt_suspend(struct xe_gt *gt);
+int xe_gt_resume(struct xe_gt *gt);
+void xe_gt_reset_async(struct xe_gt *gt);
+void xe_gt_sanitize(struct xe_gt *gt);
+
+/**
+ * xe_gt_any_hw_engine_by_reset_domain - scan the list of engines and return the
+ * first that matches the same reset domain as @class
+ * @gt: GT structure
+ * @class: hw engine class to lookup
+ */
+struct xe_hw_engine *
+xe_gt_any_hw_engine_by_reset_domain(struct xe_gt *gt, enum xe_engine_class class);
+
+struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt,
+				     enum xe_engine_class class,
+				     u16 instance,
+				     bool logical);
+
+static inline bool xe_gt_is_media_type(struct xe_gt *gt)
+{
+	return gt->info.type == XE_GT_TYPE_MEDIA;
+}
+
+static inline bool xe_gt_is_usm_hwe(struct xe_gt *gt, struct xe_hw_engine *hwe)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	return xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY &&
+		hwe->instance == gt->usm.reserved_bcs_instance;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
new file mode 100644
index 000000000000..529fc286cd06
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "regs/xe_gt_regs.h"
+#include "xe_assert.h"
+#include "xe_gt.h"
+#include "xe_gt_ccs_mode.h"
+#include "xe_gt_sysfs.h"
+#include "xe_mmio.h"
+
+static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines)
+{
+	u32 mode = CCS_MODE_CSLICE_0_3_MASK; /* disable all by default */
+	int num_slices = hweight32(CCS_MASK(gt));
+	struct xe_device *xe = gt_to_xe(gt);
+	int width, cslice = 0;
+	u32 config = 0;
+
+	xe_assert(xe, xe_gt_ccs_mode_enabled(gt));
+
+	xe_assert(xe, num_engines && num_engines <= num_slices);
+	xe_assert(xe, !(num_slices % num_engines));
+
+	/*
+	 * Loop over all available slices and assign each a user engine.
+	 * For example, if there are four compute slices available, the
+	 * assignment of compute slices to compute engines would be,
+	 *
+	 * With 1 engine (ccs0):
+	 *   slice 0, 1, 2, 3: ccs0
+	 *
+	 * With 2 engines (ccs0, ccs1):
+	 *   slice 0, 2: ccs0
+	 *   slice 1, 3: ccs1
+	 *
+	 * With 4 engines (ccs0, ccs1, ccs2, ccs3):
+	 *   slice 0: ccs0
+	 *   slice 1: ccs1
+	 *   slice 2: ccs2
+	 *   slice 3: ccs3
+	 */
+	for (width = num_slices / num_engines; width; width--) {
+		struct xe_hw_engine *hwe;
+		enum xe_hw_engine_id id;
+
+		for_each_hw_engine(hwe, gt, id) {
+			if (hwe->class != XE_ENGINE_CLASS_COMPUTE)
+				continue;
+
+			if (hwe->logical_instance >= num_engines)
+				break;
+
+			config |= BIT(hwe->instance) << XE_HW_ENGINE_CCS0;
+
+			/* If a slice is fused off, leave disabled */
+			while ((CCS_MASK(gt) & BIT(cslice)) == 0)
+				cslice++;
+
+			mode &= ~CCS_MODE_CSLICE(cslice, CCS_MODE_CSLICE_MASK);
+			mode |= CCS_MODE_CSLICE(cslice, hwe->instance);
+			cslice++;
+		}
+	}
+
+	xe_mmio_write32(gt, CCS_MODE, mode);
+
+	xe_gt_info(gt, "CCS_MODE=%x config:%08x, num_engines:%d, num_slices:%d\n",
+		   mode, config, num_engines, num_slices);
+}
+
+void xe_gt_apply_ccs_mode(struct xe_gt *gt)
+{
+	if (!gt->ccs_mode)
+		return;
+
+	__xe_gt_apply_ccs_mode(gt, gt->ccs_mode);
+}
+
+static ssize_t
+num_cslices_show(struct device *kdev,
+		 struct device_attribute *attr, char *buf)
+{
+	struct xe_gt *gt = kobj_to_gt(&kdev->kobj);
+
+	return sysfs_emit(buf, "%u\n", hweight32(CCS_MASK(gt)));
+}
+
+static DEVICE_ATTR_RO(num_cslices);
+
+static ssize_t
+ccs_mode_show(struct device *kdev,
+	      struct device_attribute *attr, char *buf)
+{
+	struct xe_gt *gt = kobj_to_gt(&kdev->kobj);
+
+	return sysfs_emit(buf, "%u\n", gt->ccs_mode);
+}
+
+static ssize_t
+ccs_mode_store(struct device *kdev, struct device_attribute *attr,
+	       const char *buff, size_t count)
+{
+	struct xe_gt *gt = kobj_to_gt(&kdev->kobj);
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 num_engines, num_slices;
+	int ret;
+
+	ret = kstrtou32(buff, 0, &num_engines);
+	if (ret)
+		return ret;
+
+	/*
+	 * Ensure number of engines specified is valid and there is an
+	 * exact multiple of engines for slices.
+	 */
+	num_slices = hweight32(CCS_MASK(gt));
+	if (!num_engines || num_engines > num_slices || num_slices % num_engines) {
+		xe_gt_dbg(gt, "Invalid compute config, %d engines %d slices\n",
+			  num_engines, num_slices);
+		return -EINVAL;
+	}
+
+	/* CCS mode can only be updated when there are no drm clients */
+	spin_lock(&xe->clients.lock);
+	if (xe->clients.count) {
+		spin_unlock(&xe->clients.lock);
+		return -EBUSY;
+	}
+
+	if (gt->ccs_mode != num_engines) {
+		xe_gt_info(gt, "Setting compute mode to %d\n", num_engines);
+		gt->ccs_mode = num_engines;
+		xe_gt_reset_async(gt);
+	}
+
+	spin_unlock(&xe->clients.lock);
+
+	return count;
+}
+
+static DEVICE_ATTR_RW(ccs_mode);
+
+static const struct attribute *gt_ccs_mode_attrs[] = {
+	&dev_attr_ccs_mode.attr,
+	&dev_attr_num_cslices.attr,
+	NULL,
+};
+
+static void xe_gt_ccs_mode_sysfs_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_gt *gt = arg;
+
+	sysfs_remove_files(gt->sysfs, gt_ccs_mode_attrs);
+}
+
+/**
+ * xe_gt_ccs_mode_sysfs_init - Initialize CCS mode sysfs interfaces
+ * @gt: GT structure
+ *
+ * Through a per-gt 'ccs_mode' sysfs interface, the user can enable a fixed
+ * number of compute hardware engines to which the available compute slices
+ * are to be allocated. This user configuration change triggers a gt reset
+ * and it is expected that there are no open drm clients while doing so.
+ * The number of available compute slices is exposed to user through a per-gt
+ * 'num_cslices' sysfs interface.
+ */
+void xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int err;
+
+	if (!xe_gt_ccs_mode_enabled(gt))
+		return;
+
+	err = sysfs_create_files(gt->sysfs, gt_ccs_mode_attrs);
+	if (err) {
+		drm_warn(&xe->drm, "Sysfs creation for ccs_mode failed err: %d\n", err);
+		return;
+	}
+
+	err = drmm_add_action_or_reset(&xe->drm, xe_gt_ccs_mode_sysfs_fini, gt);
+	if (err) {
+		sysfs_remove_files(gt->sysfs, gt_ccs_mode_attrs);
+		drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
+			 __func__, err);
+	}
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.h b/drivers/gpu/drm/xe/xe_gt_ccs_mode.h
new file mode 100644
index 000000000000..f39975aaaab0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GT_CCS_MODE_H_
+#define _XE_GT_CCS_MODE_H_
+
+#include "xe_device_types.h"
+#include "xe_gt.h"
+#include "xe_gt_types.h"
+#include "xe_platform_types.h"
+
+void xe_gt_apply_ccs_mode(struct xe_gt *gt);
+void xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt);
+
+static inline bool xe_gt_ccs_mode_enabled(const struct xe_gt *gt)
+{
+	/* Check if there are more than one compute engines available */
+	return hweight32(CCS_MASK(gt)) > 1;
+}
+
+#endif
+
diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c
new file mode 100644
index 000000000000..937054e31d72
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_clock.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_gt_clock.h"
+
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_regs.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_macros.h"
+#include "xe_mmio.h"
+
+static u32 read_reference_ts_freq(struct xe_gt *gt)
+{
+	u32 ts_override = xe_mmio_read32(gt, TIMESTAMP_OVERRIDE);
+	u32 base_freq, frac_freq;
+
+	base_freq = REG_FIELD_GET(TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK,
+				  ts_override) + 1;
+	base_freq *= 1000000;
+
+	frac_freq = REG_FIELD_GET(TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK,
+				  ts_override);
+	frac_freq = 1000000 / (frac_freq + 1);
+
+	return base_freq + frac_freq;
+}
+
+static u32 get_crystal_clock_freq(u32 rpm_config_reg)
+{
+	const u32 f19_2_mhz = 19200000;
+	const u32 f24_mhz = 24000000;
+	const u32 f25_mhz = 25000000;
+	const u32 f38_4_mhz = 38400000;
+	u32 crystal_clock = REG_FIELD_GET(RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK,
+					  rpm_config_reg);
+
+	switch (crystal_clock) {
+	case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ:
+		return f24_mhz;
+	case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ:
+		return f19_2_mhz;
+	case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ:
+		return f38_4_mhz;
+	case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ:
+		return f25_mhz;
+	default:
+		XE_WARN_ON("NOT_POSSIBLE");
+		return 0;
+	}
+}
+
+int xe_gt_clock_init(struct xe_gt *gt)
+{
+	u32 ctc_reg = xe_mmio_read32(gt, CTC_MODE);
+	u32 freq = 0;
+
+	/* Assuming gen11+ so assert this assumption is correct */
+	xe_gt_assert(gt, GRAPHICS_VER(gt_to_xe(gt)) >= 11);
+
+	if (ctc_reg & CTC_SOURCE_DIVIDE_LOGIC) {
+		freq = read_reference_ts_freq(gt);
+	} else {
+		u32 c0 = xe_mmio_read32(gt, RPM_CONFIG0);
+
+		freq = get_crystal_clock_freq(c0);
+
+		/*
+		 * Now figure out how the command stream's timestamp
+		 * register increments from this frequency (it might
+		 * increment only every few clock cycle).
+		 */
+		freq >>= 3 - REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, c0);
+	}
+
+	gt->info.reference_clock = freq;
+	return 0;
+}
+
+u64 xe_gt_clock_cycles_to_ns(const struct xe_gt *gt, u64 count)
+{
+	return DIV_ROUND_CLOSEST_ULL(count * NSEC_PER_SEC, gt->info.reference_clock);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_clock.h b/drivers/gpu/drm/xe/xe_gt_clock.h
new file mode 100644
index 000000000000..aa162722f859
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_clock.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_CLOCK_H_
+#define _XE_GT_CLOCK_H_
+
+#include <linux/types.h>
+
+struct xe_gt;
+
+int xe_gt_clock_init(struct xe_gt *gt);
+u64 xe_gt_clock_cycles_to_ns(const struct xe_gt *gt, u64 count);
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
new file mode 100644
index 000000000000..c4b67cf09f8f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_gt_debugfs.h"
+
+#include <drm/drm_debugfs.h>
+#include <drm/drm_managed.h>
+
+#include "xe_device.h"
+#include "xe_force_wake.h"
+#include "xe_ggtt.h"
+#include "xe_gt.h"
+#include "xe_gt_mcr.h"
+#include "xe_gt_topology.h"
+#include "xe_hw_engine.h"
+#include "xe_lrc.h"
+#include "xe_macros.h"
+#include "xe_pat.h"
+#include "xe_reg_sr.h"
+#include "xe_reg_whitelist.h"
+#include "xe_uc_debugfs.h"
+#include "xe_wa.h"
+
+static struct xe_gt *node_to_gt(struct drm_info_node *node)
+{
+	return node->info_ent->data;
+}
+
+static int hw_engines(struct seq_file *m, void *data)
+{
+	struct xe_gt *gt = node_to_gt(m->private);
+	struct xe_device *xe = gt_to_xe(gt);
+	struct drm_printer p = drm_seq_file_printer(m);
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	int err;
+
+	xe_device_mem_access_get(xe);
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (err) {
+		xe_device_mem_access_put(xe);
+		return err;
+	}
+
+	for_each_hw_engine(hwe, gt, id)
+		xe_hw_engine_print(hwe, &p);
+
+	err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	xe_device_mem_access_put(xe);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int force_reset(struct seq_file *m, void *data)
+{
+	struct xe_gt *gt = node_to_gt(m->private);
+
+	xe_gt_reset_async(gt);
+
+	return 0;
+}
+
+static int sa_info(struct seq_file *m, void *data)
+{
+	struct xe_tile *tile = gt_to_tile(node_to_gt(m->private));
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	drm_suballoc_dump_debug_info(&tile->mem.kernel_bb_pool->base, &p,
+				     tile->mem.kernel_bb_pool->gpu_addr);
+
+	return 0;
+}
+
+static int topology(struct seq_file *m, void *data)
+{
+	struct xe_gt *gt = node_to_gt(m->private);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_gt_topology_dump(gt, &p);
+
+	return 0;
+}
+
+static int steering(struct seq_file *m, void *data)
+{
+	struct xe_gt *gt = node_to_gt(m->private);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_gt_mcr_steering_dump(gt, &p);
+
+	return 0;
+}
+
+static int ggtt(struct seq_file *m, void *data)
+{
+	struct xe_gt *gt = node_to_gt(m->private);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	return xe_ggtt_dump(gt_to_tile(gt)->mem.ggtt, &p);
+}
+
+static int register_save_restore(struct seq_file *m, void *data)
+{
+	struct xe_gt *gt = node_to_gt(m->private);
+	struct drm_printer p = drm_seq_file_printer(m);
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+
+	xe_reg_sr_dump(&gt->reg_sr, &p);
+	drm_printf(&p, "\n");
+
+	drm_printf(&p, "Engine\n");
+	for_each_hw_engine(hwe, gt, id)
+		xe_reg_sr_dump(&hwe->reg_sr, &p);
+	drm_printf(&p, "\n");
+
+	drm_printf(&p, "LRC\n");
+	for_each_hw_engine(hwe, gt, id)
+		xe_reg_sr_dump(&hwe->reg_lrc, &p);
+	drm_printf(&p, "\n");
+
+	drm_printf(&p, "Whitelist\n");
+	for_each_hw_engine(hwe, gt, id)
+		xe_reg_whitelist_dump(&hwe->reg_whitelist, &p);
+
+	return 0;
+}
+
+static int workarounds(struct seq_file *m, void *data)
+{
+	struct xe_gt *gt = node_to_gt(m->private);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_wa_dump(gt, &p);
+
+	return 0;
+}
+
+static int pat(struct seq_file *m, void *data)
+{
+	struct xe_gt *gt = node_to_gt(m->private);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_pat_dump(gt, &p);
+
+	return 0;
+}
+
+static int rcs_default_lrc(struct seq_file *m, void *data)
+{
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_RENDER);
+	return 0;
+}
+
+static int ccs_default_lrc(struct seq_file *m, void *data)
+{
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_COMPUTE);
+	return 0;
+}
+
+static int bcs_default_lrc(struct seq_file *m, void *data)
+{
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_COPY);
+	return 0;
+}
+
+static int vcs_default_lrc(struct seq_file *m, void *data)
+{
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_VIDEO_DECODE);
+	return 0;
+}
+
+static int vecs_default_lrc(struct seq_file *m, void *data)
+{
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_VIDEO_ENHANCE);
+	return 0;
+}
+
+static const struct drm_info_list debugfs_list[] = {
+	{"hw_engines", hw_engines, 0},
+	{"force_reset", force_reset, 0},
+	{"sa_info", sa_info, 0},
+	{"topology", topology, 0},
+	{"steering", steering, 0},
+	{"ggtt", ggtt, 0},
+	{"register-save-restore", register_save_restore, 0},
+	{"workarounds", workarounds, 0},
+	{"pat", pat, 0},
+	{"default_lrc_rcs", rcs_default_lrc},
+	{"default_lrc_ccs", ccs_default_lrc},
+	{"default_lrc_bcs", bcs_default_lrc},
+	{"default_lrc_vcs", vcs_default_lrc},
+	{"default_lrc_vecs", vecs_default_lrc},
+};
+
+void xe_gt_debugfs_register(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct drm_minor *minor = gt_to_xe(gt)->drm.primary;
+	struct dentry *root;
+	struct drm_info_list *local;
+	char name[8];
+	int i;
+
+	xe_gt_assert(gt, minor->debugfs_root);
+
+	sprintf(name, "gt%d", gt->info.id);
+	root = debugfs_create_dir(name, minor->debugfs_root);
+	if (IS_ERR(root)) {
+		drm_warn(&xe->drm, "Create GT directory failed");
+		return;
+	}
+
+	/*
+	 * Allocate local copy as we need to pass in the GT to the debugfs
+	 * entry and drm_debugfs_create_files just references the drm_info_list
+	 * passed in (e.g. can't define this on the stack).
+	 */
+#define DEBUGFS_SIZE	(ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list))
+	local = drmm_kmalloc(&xe->drm, DEBUGFS_SIZE, GFP_KERNEL);
+	if (!local)
+		return;
+
+	memcpy(local, debugfs_list, DEBUGFS_SIZE);
+#undef DEBUGFS_SIZE
+
+	for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i)
+		local[i].data = gt;
+
+	drm_debugfs_create_files(local,
+				 ARRAY_SIZE(debugfs_list),
+				 root, minor);
+
+	xe_uc_debugfs_register(&gt->uc, root);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.h b/drivers/gpu/drm/xe/xe_gt_debugfs.h
new file mode 100644
index 000000000000..5a329f118a57
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_DEBUGFS_H_
+#define _XE_GT_DEBUGFS_H_
+
+struct xe_gt;
+
+void xe_gt_debugfs_register(struct xe_gt *gt);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c
new file mode 100644
index 000000000000..e5b0f4ecdbe8
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_freq.c
@@ -0,0 +1,222 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_gt_freq.h"
+
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+
+#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
+
+#include "xe_device_types.h"
+#include "xe_gt_sysfs.h"
+#include "xe_gt_throttle_sysfs.h"
+#include "xe_guc_pc.h"
+
+/**
+ * DOC: Xe GT Frequency Management
+ *
+ * This component is responsible for the raw GT frequency management, including
+ * the sysfs API.
+ *
+ * Underneath, Xe enables GuC SLPC automated frequency management. GuC is then
+ * allowed to request PCODE any frequency between the Minimum and the Maximum
+ * selected by this component. Furthermore, it is important to highlight that
+ * PCODE is the ultimate decision maker of the actual running frequency, based
+ * on thermal and other running conditions.
+ *
+ * Xe's Freq provides a sysfs API for frequency management:
+ *
+ * device/tile#/gt#/freq0/<item>_freq *read-only* files:
+ * - act_freq: The actual resolved frequency decided by PCODE.
+ * - cur_freq: The current one requested by GuC PC to the PCODE.
+ * - rpn_freq: The Render Performance (RP) N level, which is the minimal one.
+ * - rpe_freq: The Render Performance (RP) E level, which is the efficient one.
+ * - rp0_freq: The Render Performance (RP) 0 level, which is the maximum one.
+ *
+ * device/tile#/gt#/freq0/<item>_freq *read-write* files:
+ * - min_freq: Min frequency request.
+ * - max_freq: Max frequency request.
+ *             If max <= min, then freq_min becomes a fixed frequency request.
+ */
+
+static struct xe_guc_pc *
+dev_to_pc(struct device *dev)
+{
+	return &kobj_to_gt(dev->kobj.parent)->uc.guc.pc;
+}
+
+static ssize_t act_freq_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+
+	return sysfs_emit(buf, "%d\n", xe_guc_pc_get_act_freq(pc));
+}
+static DEVICE_ATTR_RO(act_freq);
+
+static ssize_t cur_freq_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+	u32 freq;
+	ssize_t ret;
+
+	ret = xe_guc_pc_get_cur_freq(pc, &freq);
+	if (ret)
+		return ret;
+
+	return sysfs_emit(buf, "%d\n", freq);
+}
+static DEVICE_ATTR_RO(cur_freq);
+
+static ssize_t rp0_freq_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+
+	return sysfs_emit(buf, "%d\n", xe_guc_pc_get_rp0_freq(pc));
+}
+static DEVICE_ATTR_RO(rp0_freq);
+
+static ssize_t rpe_freq_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+
+	return sysfs_emit(buf, "%d\n", xe_guc_pc_get_rpe_freq(pc));
+}
+static DEVICE_ATTR_RO(rpe_freq);
+
+static ssize_t rpn_freq_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+
+	return sysfs_emit(buf, "%d\n", xe_guc_pc_get_rpn_freq(pc));
+}
+static DEVICE_ATTR_RO(rpn_freq);
+
+static ssize_t min_freq_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+	u32 freq;
+	ssize_t ret;
+
+	ret = xe_guc_pc_get_min_freq(pc, &freq);
+	if (ret)
+		return ret;
+
+	return sysfs_emit(buf, "%d\n", freq);
+}
+
+static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr,
+			      const char *buff, size_t count)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+	u32 freq;
+	ssize_t ret;
+
+	ret = kstrtou32(buff, 0, &freq);
+	if (ret)
+		return ret;
+
+	ret = xe_guc_pc_set_min_freq(pc, freq);
+	if (ret)
+		return ret;
+
+	return count;
+}
+static DEVICE_ATTR_RW(min_freq);
+
+static ssize_t max_freq_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+	u32 freq;
+	ssize_t ret;
+
+	ret = xe_guc_pc_get_max_freq(pc, &freq);
+	if (ret)
+		return ret;
+
+	return sysfs_emit(buf, "%d\n", freq);
+}
+
+static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr,
+			      const char *buff, size_t count)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+	u32 freq;
+	ssize_t ret;
+
+	ret = kstrtou32(buff, 0, &freq);
+	if (ret)
+		return ret;
+
+	ret = xe_guc_pc_set_max_freq(pc, freq);
+	if (ret)
+		return ret;
+
+	return count;
+}
+static DEVICE_ATTR_RW(max_freq);
+
+static const struct attribute *freq_attrs[] = {
+	&dev_attr_act_freq.attr,
+	&dev_attr_cur_freq.attr,
+	&dev_attr_rp0_freq.attr,
+	&dev_attr_rpe_freq.attr,
+	&dev_attr_rpn_freq.attr,
+	&dev_attr_min_freq.attr,
+	&dev_attr_max_freq.attr,
+	NULL
+};
+
+static void freq_fini(struct drm_device *drm, void *arg)
+{
+	struct kobject *kobj = arg;
+
+	sysfs_remove_files(kobj, freq_attrs);
+	kobject_put(kobj);
+}
+
+/**
+ * xe_gt_freq_init - Initialize Xe Freq component
+ * @gt: Xe GT object
+ *
+ * It needs to be initialized after GT Sysfs and GuC PC components are ready.
+ */
+void xe_gt_freq_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int err;
+
+	if (xe->info.skip_guc_pc)
+		return;
+
+	gt->freq = kobject_create_and_add("freq0", gt->sysfs);
+	if (!gt->freq) {
+		drm_warn(&xe->drm, "failed to add freq0 directory to %s\n",
+			 kobject_name(gt->sysfs));
+		return;
+	}
+
+	err = drmm_add_action_or_reset(&xe->drm, freq_fini, gt->freq);
+	if (err) {
+		drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
+			 __func__, err);
+		return;
+	}
+
+	err = sysfs_create_files(gt->freq, freq_attrs);
+	if (err)
+		drm_warn(&xe->drm,  "failed to add freq attrs to %s, err: %d\n",
+			 kobject_name(gt->freq), err);
+
+	xe_gt_throttle_sysfs_init(gt);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_freq.h b/drivers/gpu/drm/xe/xe_gt_freq.h
new file mode 100644
index 000000000000..f3fe3c90491a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_freq.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GT_FREQ_H_
+#define _XE_GT_FREQ_H_
+
+struct xe_gt;
+
+void xe_gt_freq_init(struct xe_gt *gt);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c
new file mode 100644
index 000000000000..9fcae65b6469
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_idle.c
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_idle.h"
+#include "xe_gt_sysfs.h"
+#include "xe_guc_pc.h"
+#include "regs/xe_gt_regs.h"
+#include "xe_mmio.h"
+
+/**
+ * DOC: Xe GT Idle
+ *
+ * Contains functions that init GT idle features like C6
+ *
+ * device/gt#/gtidle/name - name of the state
+ * device/gt#/gtidle/idle_residency_ms - Provides residency of the idle state in ms
+ * device/gt#/gtidle/idle_status - Provides current idle state
+ */
+
+static struct xe_gt_idle *dev_to_gtidle(struct device *dev)
+{
+	struct kobject *kobj = &dev->kobj;
+
+	return &kobj_to_gt(kobj->parent)->gtidle;
+}
+
+static struct xe_gt *gtidle_to_gt(struct xe_gt_idle *gtidle)
+{
+	return container_of(gtidle, struct xe_gt, gtidle);
+}
+
+static struct xe_guc_pc *gtidle_to_pc(struct xe_gt_idle *gtidle)
+{
+	return &gtidle_to_gt(gtidle)->uc.guc.pc;
+}
+
+static const char *gt_idle_state_to_string(enum xe_gt_idle_state state)
+{
+	switch (state) {
+	case GT_IDLE_C0:
+		return "gt-c0";
+	case GT_IDLE_C6:
+		return "gt-c6";
+	default:
+		return "unknown";
+	}
+}
+
+static u64 get_residency_ms(struct xe_gt_idle *gtidle, u64 cur_residency)
+{
+	u64 delta, overflow_residency, prev_residency;
+
+	overflow_residency = BIT_ULL(32);
+
+	/*
+	 * Counter wrap handling
+	 * Store previous hw counter values for counter wrap-around handling
+	 * Relying on sufficient frequency of queries otherwise counters can still wrap.
+	 */
+	prev_residency = gtidle->prev_residency;
+	gtidle->prev_residency = cur_residency;
+
+	/* delta */
+	if (cur_residency >= prev_residency)
+		delta = cur_residency - prev_residency;
+	else
+		delta = cur_residency + (overflow_residency - prev_residency);
+
+	/* Add delta to extended raw driver copy of idle residency */
+	cur_residency = gtidle->cur_residency + delta;
+	gtidle->cur_residency = cur_residency;
+
+	/* residency multiplier in ns, convert to ms */
+	cur_residency = mul_u64_u32_div(cur_residency, gtidle->residency_multiplier, 1e6);
+
+	return cur_residency;
+}
+
+static ssize_t name_show(struct device *dev,
+			 struct device_attribute *attr, char *buff)
+{
+	struct xe_gt_idle *gtidle = dev_to_gtidle(dev);
+
+	return sysfs_emit(buff, "%s\n", gtidle->name);
+}
+static DEVICE_ATTR_RO(name);
+
+static ssize_t idle_status_show(struct device *dev,
+				struct device_attribute *attr, char *buff)
+{
+	struct xe_gt_idle *gtidle = dev_to_gtidle(dev);
+	struct xe_guc_pc *pc = gtidle_to_pc(gtidle);
+	enum xe_gt_idle_state state;
+
+	state = gtidle->idle_status(pc);
+
+	return sysfs_emit(buff, "%s\n", gt_idle_state_to_string(state));
+}
+static DEVICE_ATTR_RO(idle_status);
+
+static ssize_t idle_residency_ms_show(struct device *dev,
+				      struct device_attribute *attr, char *buff)
+{
+	struct xe_gt_idle *gtidle = dev_to_gtidle(dev);
+	struct xe_guc_pc *pc = gtidle_to_pc(gtidle);
+	u64 residency;
+
+	residency = gtidle->idle_residency(pc);
+	return sysfs_emit(buff, "%llu\n", get_residency_ms(gtidle, residency));
+}
+static DEVICE_ATTR_RO(idle_residency_ms);
+
+static const struct attribute *gt_idle_attrs[] = {
+	&dev_attr_name.attr,
+	&dev_attr_idle_status.attr,
+	&dev_attr_idle_residency_ms.attr,
+	NULL,
+};
+
+static void gt_idle_sysfs_fini(struct drm_device *drm, void *arg)
+{
+	struct kobject *kobj = arg;
+
+	sysfs_remove_files(kobj, gt_idle_attrs);
+	kobject_put(kobj);
+}
+
+void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle)
+{
+	struct xe_gt *gt = gtidle_to_gt(gtidle);
+	struct xe_device *xe = gt_to_xe(gt);
+	struct kobject *kobj;
+	int err;
+
+	kobj = kobject_create_and_add("gtidle", gt->sysfs);
+	if (!kobj) {
+		drm_warn(&xe->drm, "%s failed, err: %d\n", __func__, -ENOMEM);
+		return;
+	}
+
+	if (xe_gt_is_media_type(gt)) {
+		sprintf(gtidle->name, "gt%d-mc", gt->info.id);
+		gtidle->idle_residency = xe_guc_pc_mc6_residency;
+	} else {
+		sprintf(gtidle->name, "gt%d-rc", gt->info.id);
+		gtidle->idle_residency = xe_guc_pc_rc6_residency;
+	}
+
+	/* Multiplier for Residency counter in units of 1.28us */
+	gtidle->residency_multiplier = 1280;
+	gtidle->idle_status = xe_guc_pc_c_status;
+
+	err = sysfs_create_files(kobj, gt_idle_attrs);
+	if (err) {
+		kobject_put(kobj);
+		drm_warn(&xe->drm, "failed to register gtidle sysfs, err: %d\n", err);
+		return;
+	}
+
+	err = drmm_add_action_or_reset(&xe->drm, gt_idle_sysfs_fini, kobj);
+	if (err)
+		drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
+			 __func__, err);
+}
+
+void xe_gt_idle_enable_c6(struct xe_gt *gt)
+{
+	xe_device_assert_mem_access(gt_to_xe(gt));
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+
+	/* Units of 1280 ns for a total of 5s */
+	xe_mmio_write32(gt, RC_IDLE_HYSTERSIS, 0x3B9ACA);
+	/* Enable RC6 */
+	xe_mmio_write32(gt, RC_CONTROL,
+			RC_CTL_HW_ENABLE | RC_CTL_TO_MODE | RC_CTL_RC6_ENABLE);
+}
+
+void xe_gt_idle_disable_c6(struct xe_gt *gt)
+{
+	xe_device_assert_mem_access(gt_to_xe(gt));
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+
+	xe_mmio_write32(gt, PG_ENABLE, 0);
+	xe_mmio_write32(gt, RC_CONTROL, 0);
+	xe_mmio_write32(gt, RC_STATE, 0);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_idle.h b/drivers/gpu/drm/xe/xe_gt_idle.h
new file mode 100644
index 000000000000..69280fd16b03
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_idle.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GT_IDLE_H_
+#define _XE_GT_IDLE_H_
+
+#include "xe_gt_idle_types.h"
+
+struct xe_gt;
+
+void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle);
+void xe_gt_idle_enable_c6(struct xe_gt *gt);
+void xe_gt_idle_disable_c6(struct xe_gt *gt);
+
+#endif /* _XE_GT_IDLE_H_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_idle_types.h b/drivers/gpu/drm/xe/xe_gt_idle_types.h
new file mode 100644
index 000000000000..f99b447534f3
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_idle_types.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GT_IDLE_SYSFS_TYPES_H_
+#define _XE_GT_IDLE_SYSFS_TYPES_H_
+
+#include <linux/types.h>
+
+struct xe_guc_pc;
+
+/* States of GT Idle */
+enum xe_gt_idle_state {
+	GT_IDLE_C0,
+	GT_IDLE_C6,
+	GT_IDLE_UNKNOWN,
+};
+
+/**
+ * struct xe_gt_idle - A struct that contains idle properties based of gt
+ */
+struct xe_gt_idle {
+	/** @name: name */
+	char name[16];
+	/** @residency_multiplier: residency multiplier in ns */
+	u32 residency_multiplier;
+	/** @cur_residency: raw driver copy of idle residency */
+	u64 cur_residency;
+	/** @prev_residency: previous residency counter */
+	u64 prev_residency;
+	/** @idle_status: get the current idle state */
+	enum xe_gt_idle_state (*idle_status)(struct xe_guc_pc *pc);
+	/** @idle_residency: get idle residency counter */
+	u64 (*idle_residency)(struct xe_guc_pc *pc);
+};
+
+#endif /* _XE_GT_IDLE_SYSFS_TYPES_H_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
new file mode 100644
index 000000000000..8546cd3cc50d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -0,0 +1,685 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_gt_mcr.h"
+
+#include "regs/xe_gt_regs.h"
+#include "xe_gt.h"
+#include "xe_gt_topology.h"
+#include "xe_gt_types.h"
+#include "xe_mmio.h"
+
+/**
+ * DOC: GT Multicast/Replicated (MCR) Register Support
+ *
+ * Some GT registers are designed as "multicast" or "replicated" registers:
+ * multiple instances of the same register share a single MMIO offset.  MCR
+ * registers are generally used when the hardware needs to potentially track
+ * independent values of a register per hardware unit (e.g., per-subslice,
+ * per-L3bank, etc.).  The specific types of replication that exist vary
+ * per-platform.
+ *
+ * MMIO accesses to MCR registers are controlled according to the settings
+ * programmed in the platform's MCR_SELECTOR register(s).  MMIO writes to MCR
+ * registers can be done in either multicast (a single write updates all
+ * instances of the register to the same value) or unicast (a write updates only
+ * one specific instance) form.  Reads of MCR registers always operate in a
+ * unicast manner regardless of how the multicast/unicast bit is set in
+ * MCR_SELECTOR.  Selection of a specific MCR instance for unicast operations is
+ * referred to as "steering."
+ *
+ * If MCR register operations are steered toward a hardware unit that is
+ * fused off or currently powered down due to power gating, the MMIO operation
+ * is "terminated" by the hardware.  Terminated read operations will return a
+ * value of zero and terminated unicast write operations will be silently
+ * ignored. During device initialization, the goal of the various
+ * ``init_steering_*()`` functions is to apply the platform-specific rules for
+ * each MCR register type to identify a steering target that will select a
+ * non-terminated instance.
+ */
+
+#define STEER_SEMAPHORE		XE_REG(0xFD0)
+
+static inline struct xe_reg to_xe_reg(struct xe_reg_mcr reg_mcr)
+{
+	return reg_mcr.__reg;
+}
+
+enum {
+	MCR_OP_READ,
+	MCR_OP_WRITE
+};
+
+static const struct xe_mmio_range xelp_l3bank_steering_table[] = {
+	{ 0x00B100, 0x00B3FF },
+	{},
+};
+
+static const struct xe_mmio_range xehp_l3bank_steering_table[] = {
+	{ 0x008C80, 0x008CFF },
+	{ 0x00B100, 0x00B3FF },
+	{},
+};
+
+/*
+ * Although the bspec lists more "MSLICE" ranges than shown here, some of those
+ * are of a "GAM" subclass that has special rules and doesn't need to be
+ * included here.
+ */
+static const struct xe_mmio_range xehp_mslice_steering_table[] = {
+	{ 0x00DD00, 0x00DDFF },
+	{ 0x00E900, 0x00FFFF }, /* 0xEA00 - OxEFFF is unused */
+	{},
+};
+
+static const struct xe_mmio_range xehp_lncf_steering_table[] = {
+	{ 0x00B000, 0x00B0FF },
+	{ 0x00D880, 0x00D8FF },
+	{},
+};
+
+/*
+ * We have several types of MCR registers where steering to (0,0) will always
+ * provide us with a non-terminated value.  We'll stick them all in the same
+ * table for simplicity.
+ */
+static const struct xe_mmio_range xehpc_instance0_steering_table[] = {
+	{ 0x004000, 0x004AFF },		/* HALF-BSLICE */
+	{ 0x008800, 0x00887F },		/* CC */
+	{ 0x008A80, 0x008AFF },		/* TILEPSMI */
+	{ 0x00B000, 0x00B0FF },		/* HALF-BSLICE */
+	{ 0x00B100, 0x00B3FF },		/* L3BANK */
+	{ 0x00C800, 0x00CFFF },		/* HALF-BSLICE */
+	{ 0x00D800, 0x00D8FF },		/* HALF-BSLICE */
+	{ 0x00DD00, 0x00DDFF },		/* BSLICE */
+	{ 0x00E900, 0x00E9FF },		/* HALF-BSLICE */
+	{ 0x00EC00, 0x00EEFF },		/* HALF-BSLICE */
+	{ 0x00F000, 0x00FFFF },		/* HALF-BSLICE */
+	{ 0x024180, 0x0241FF },		/* HALF-BSLICE */
+	{},
+};
+
+static const struct xe_mmio_range xelpg_instance0_steering_table[] = {
+	{ 0x000B00, 0x000BFF },         /* SQIDI */
+	{ 0x001000, 0x001FFF },         /* SQIDI */
+	{ 0x004000, 0x0048FF },         /* GAM */
+	{ 0x008700, 0x0087FF },         /* SQIDI */
+	{ 0x00B000, 0x00B0FF },         /* NODE */
+	{ 0x00C800, 0x00CFFF },         /* GAM */
+	{ 0x00D880, 0x00D8FF },         /* NODE */
+	{ 0x00DD00, 0x00DDFF },         /* OAAL2 */
+	{},
+};
+
+static const struct xe_mmio_range xelpg_l3bank_steering_table[] = {
+	{ 0x00B100, 0x00B3FF },
+	{},
+};
+
+static const struct xe_mmio_range xelp_dss_steering_table[] = {
+	{ 0x008150, 0x00815F },
+	{ 0x009520, 0x00955F },
+	{ 0x00DE80, 0x00E8FF },
+	{ 0x024A00, 0x024A7F },
+	{},
+};
+
+/* DSS steering is used for GSLICE ranges as well */
+static const struct xe_mmio_range xehp_dss_steering_table[] = {
+	{ 0x005200, 0x0052FF },		/* GSLICE */
+	{ 0x005400, 0x007FFF },		/* GSLICE */
+	{ 0x008140, 0x00815F },		/* GSLICE (0x8140-0x814F), DSS (0x8150-0x815F) */
+	{ 0x008D00, 0x008DFF },		/* DSS */
+	{ 0x0094D0, 0x00955F },		/* GSLICE (0x94D0-0x951F), DSS (0x9520-0x955F) */
+	{ 0x009680, 0x0096FF },		/* DSS */
+	{ 0x00D800, 0x00D87F },		/* GSLICE */
+	{ 0x00DC00, 0x00DCFF },		/* GSLICE */
+	{ 0x00DE80, 0x00E8FF },		/* DSS (0xE000-0xE0FF reserved ) */
+	{ 0x017000, 0x017FFF },		/* GSLICE */
+	{ 0x024A00, 0x024A7F },		/* DSS */
+	{},
+};
+
+/* DSS steering is used for COMPUTE ranges as well */
+static const struct xe_mmio_range xehpc_dss_steering_table[] = {
+	{ 0x008140, 0x00817F },		/* COMPUTE (0x8140-0x814F & 0x8160-0x817F), DSS (0x8150-0x815F) */
+	{ 0x0094D0, 0x00955F },		/* COMPUTE (0x94D0-0x951F), DSS (0x9520-0x955F) */
+	{ 0x009680, 0x0096FF },		/* DSS */
+	{ 0x00DC00, 0x00DCFF },		/* COMPUTE */
+	{ 0x00DE80, 0x00E7FF },		/* DSS (0xDF00-0xE1FF reserved ) */
+	{},
+};
+
+/* DSS steering is used for SLICE ranges as well */
+static const struct xe_mmio_range xelpg_dss_steering_table[] = {
+	{ 0x005200, 0x0052FF },		/* SLICE */
+	{ 0x005500, 0x007FFF },		/* SLICE */
+	{ 0x008140, 0x00815F },		/* SLICE (0x8140-0x814F), DSS (0x8150-0x815F) */
+	{ 0x0094D0, 0x00955F },		/* SLICE (0x94D0-0x951F), DSS (0x9520-0x955F) */
+	{ 0x009680, 0x0096FF },		/* DSS */
+	{ 0x00D800, 0x00D87F },		/* SLICE */
+	{ 0x00DC00, 0x00DCFF },		/* SLICE */
+	{ 0x00DE80, 0x00E8FF },		/* DSS (0xE000-0xE0FF reserved) */
+	{},
+};
+
+static const struct xe_mmio_range xelpmp_oaddrm_steering_table[] = {
+	{ 0x393200, 0x39323F },
+	{ 0x393400, 0x3934FF },
+	{},
+};
+
+static const struct xe_mmio_range dg2_implicit_steering_table[] = {
+	{ 0x000B00, 0x000BFF },		/* SF (SQIDI replication) */
+	{ 0x001000, 0x001FFF },		/* SF (SQIDI replication) */
+	{ 0x004000, 0x004AFF },		/* GAM (MSLICE replication) */
+	{ 0x008700, 0x0087FF },		/* MCFG (SQIDI replication) */
+	{ 0x00C800, 0x00CFFF },		/* GAM (MSLICE replication) */
+	{ 0x00F000, 0x00FFFF },		/* GAM (MSLICE replication) */
+	{},
+};
+
+static const struct xe_mmio_range xe2lpg_dss_steering_table[] = {
+	{ 0x005200, 0x0052FF },         /* SLICE */
+	{ 0x005500, 0x007FFF },         /* SLICE */
+	{ 0x008140, 0x00815F },         /* SLICE (0x8140-0x814F), DSS (0x8150-0x815F) */
+	{ 0x0094D0, 0x00955F },         /* SLICE (0x94D0-0x951F), DSS (0x9520-0x955F) */
+	{ 0x009680, 0x0096FF },         /* DSS */
+	{ 0x00D800, 0x00D87F },         /* SLICE */
+	{ 0x00DC00, 0x00DCFF },         /* SLICE */
+	{ 0x00DE80, 0x00E8FF },         /* DSS (0xE000-0xE0FF reserved) */
+	{ 0x00E980, 0x00E9FF },         /* SLICE */
+	{ 0x013000, 0x0133FF },         /* DSS (0x13000-0x131FF), SLICE (0x13200-0x133FF) */
+	{},
+};
+
+static const struct xe_mmio_range xe2lpg_sqidi_psmi_steering_table[] = {
+	{ 0x000B00, 0x000BFF },
+	{ 0x001000, 0x001FFF },
+	{},
+};
+
+static const struct xe_mmio_range xe2lpg_instance0_steering_table[] = {
+	{ 0x004000, 0x004AFF },         /* GAM, rsvd, GAMWKR */
+	{ 0x008700, 0x00887F },         /* SQIDI, MEMPIPE */
+	{ 0x00B000, 0x00B3FF },         /* NODE, L3BANK */
+	{ 0x00C800, 0x00CFFF },         /* GAM */
+	{ 0x00D880, 0x00D8FF },         /* NODE */
+	{ 0x00DD00, 0x00DDFF },         /* MEMPIPE */
+	{ 0x00E900, 0x00E97F },         /* MEMPIPE */
+	{ 0x00F000, 0x00FFFF },         /* GAM, GAMWKR */
+	{ 0x013400, 0x0135FF },         /* MEMPIPE */
+	{},
+};
+
+static const struct xe_mmio_range xe2lpm_gpmxmt_steering_table[] = {
+	{ 0x388160, 0x38817F },
+	{ 0x389480, 0x3894CF },
+	{},
+};
+
+static const struct xe_mmio_range xe2lpm_instance0_steering_table[] = {
+	{ 0x384000, 0x3847DF },         /* GAM, rsvd, GAM */
+	{ 0x384900, 0x384AFF },         /* GAM */
+	{ 0x389560, 0x3895FF },         /* MEDIAINF */
+	{ 0x38B600, 0x38B8FF },         /* L3BANK */
+	{ 0x38C800, 0x38D07F },         /* GAM, MEDIAINF */
+	{ 0x38F000, 0x38F0FF },         /* GAM */
+	{ 0x393C00, 0x393C7F },         /* MEDIAINF */
+	{},
+};
+
+static void init_steering_l3bank(struct xe_gt *gt)
+{
+	if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) {
+		u32 mslice_mask = REG_FIELD_GET(MEML3_EN_MASK,
+						xe_mmio_read32(gt, MIRROR_FUSE3));
+		u32 bank_mask = REG_FIELD_GET(GT_L3_EXC_MASK,
+					      xe_mmio_read32(gt, XEHP_FUSE4));
+
+		/*
+		 * Group selects mslice, instance selects bank within mslice.
+		 * Bank 0 is always valid _except_ when the bank mask is 010b.
+		 */
+		gt->steering[L3BANK].group_target = __ffs(mslice_mask);
+		gt->steering[L3BANK].instance_target =
+			bank_mask & BIT(0) ? 0 : 2;
+	} else if (gt_to_xe(gt)->info.platform == XE_DG2) {
+		u32 mslice_mask = REG_FIELD_GET(MEML3_EN_MASK,
+						xe_mmio_read32(gt, MIRROR_FUSE3));
+		u32 bank = __ffs(mslice_mask) * 8;
+
+		/*
+		 * Like mslice registers, look for a valid mslice and steer to
+		 * the first L3BANK of that quad. Access to the Nth L3 bank is
+		 * split between the first bits of group and instance
+		 */
+		gt->steering[L3BANK].group_target = (bank >> 2) & 0x7;
+		gt->steering[L3BANK].instance_target = bank & 0x3;
+	} else {
+		u32 fuse = REG_FIELD_GET(L3BANK_MASK,
+					 ~xe_mmio_read32(gt, MIRROR_FUSE3));
+
+		gt->steering[L3BANK].group_target = 0;	/* unused */
+		gt->steering[L3BANK].instance_target = __ffs(fuse);
+	}
+}
+
+static void init_steering_mslice(struct xe_gt *gt)
+{
+	u32 mask = REG_FIELD_GET(MEML3_EN_MASK,
+				 xe_mmio_read32(gt, MIRROR_FUSE3));
+
+	/*
+	 * mslice registers are valid (not terminated) if either the meml3
+	 * associated with the mslice is present, or at least one DSS associated
+	 * with the mslice is present.  There will always be at least one meml3
+	 * so we can just use that to find a non-terminated mslice and ignore
+	 * the DSS fusing.
+	 */
+	gt->steering[MSLICE].group_target = __ffs(mask);
+	gt->steering[MSLICE].instance_target = 0;	/* unused */
+
+	/*
+	 * LNCF termination is also based on mslice presence, so we'll set
+	 * it up here.  Either LNCF within a non-terminated mslice will work,
+	 * so we just always pick LNCF 0 here.
+	 */
+	gt->steering[LNCF].group_target = __ffs(mask) << 1;
+	gt->steering[LNCF].instance_target = 0;		/* unused */
+}
+
+static void init_steering_dss(struct xe_gt *gt)
+{
+	unsigned int dss = min(xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0),
+			       xe_dss_mask_group_ffs(gt->fuse_topo.c_dss_mask, 0, 0));
+	unsigned int dss_per_grp = gt_to_xe(gt)->info.platform == XE_PVC ? 8 : 4;
+
+	gt->steering[DSS].group_target = dss / dss_per_grp;
+	gt->steering[DSS].instance_target = dss % dss_per_grp;
+}
+
+static void init_steering_oaddrm(struct xe_gt *gt)
+{
+	/*
+	 * First instance is only terminated if the entire first media slice
+	 * is absent (i.e., no VCS0 or VECS0).
+	 */
+	if (gt->info.engine_mask & (XE_HW_ENGINE_VCS0 | XE_HW_ENGINE_VECS0))
+		gt->steering[OADDRM].group_target = 0;
+	else
+		gt->steering[OADDRM].group_target = 1;
+
+	gt->steering[DSS].instance_target = 0;		/* unused */
+}
+
+static void init_steering_sqidi_psmi(struct xe_gt *gt)
+{
+	u32 mask = REG_FIELD_GET(XE2_NODE_ENABLE_MASK,
+				 xe_mmio_read32(gt, MIRROR_FUSE3));
+	u32 select = __ffs(mask);
+
+	gt->steering[SQIDI_PSMI].group_target = select >> 1;
+	gt->steering[SQIDI_PSMI].instance_target = select & 0x1;
+}
+
+static void init_steering_inst0(struct xe_gt *gt)
+{
+	gt->steering[DSS].group_target = 0;		/* unused */
+	gt->steering[DSS].instance_target = 0;		/* unused */
+}
+
+static const struct {
+	const char *name;
+	void (*init)(struct xe_gt *gt);
+} xe_steering_types[] = {
+	[L3BANK] =	{ "L3BANK",	init_steering_l3bank },
+	[MSLICE] =	{ "MSLICE",	init_steering_mslice },
+	[LNCF] =	{ "LNCF",	NULL }, /* initialized by mslice init */
+	[DSS] =		{ "DSS",	init_steering_dss },
+	[OADDRM] =	{ "OADDRM / GPMXMT", init_steering_oaddrm },
+	[SQIDI_PSMI] =  { "SQIDI_PSMI", init_steering_sqidi_psmi },
+	[INSTANCE0] =	{ "INSTANCE 0",	init_steering_inst0 },
+	[IMPLICIT_STEERING] = { "IMPLICIT", NULL },
+};
+
+void xe_gt_mcr_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	BUILD_BUG_ON(IMPLICIT_STEERING + 1 != NUM_STEERING_TYPES);
+	BUILD_BUG_ON(ARRAY_SIZE(xe_steering_types) != NUM_STEERING_TYPES);
+
+	spin_lock_init(&gt->mcr_lock);
+
+	if (gt->info.type == XE_GT_TYPE_MEDIA) {
+		drm_WARN_ON(&xe->drm, MEDIA_VER(xe) < 13);
+
+		if (MEDIA_VER(xe) >= 20) {
+			gt->steering[OADDRM].ranges = xe2lpm_gpmxmt_steering_table;
+			gt->steering[INSTANCE0].ranges = xe2lpm_instance0_steering_table;
+		} else {
+			gt->steering[OADDRM].ranges = xelpmp_oaddrm_steering_table;
+		}
+	} else {
+		if (GRAPHICS_VER(xe) >= 20) {
+			gt->steering[DSS].ranges = xe2lpg_dss_steering_table;
+			gt->steering[SQIDI_PSMI].ranges = xe2lpg_sqidi_psmi_steering_table;
+			gt->steering[INSTANCE0].ranges = xe2lpg_instance0_steering_table;
+		} else if (GRAPHICS_VERx100(xe) >= 1270) {
+			gt->steering[INSTANCE0].ranges = xelpg_instance0_steering_table;
+			gt->steering[L3BANK].ranges = xelpg_l3bank_steering_table;
+			gt->steering[DSS].ranges = xelpg_dss_steering_table;
+		} else if (xe->info.platform == XE_PVC) {
+			gt->steering[INSTANCE0].ranges = xehpc_instance0_steering_table;
+			gt->steering[DSS].ranges = xehpc_dss_steering_table;
+		} else if (xe->info.platform == XE_DG2) {
+			gt->steering[L3BANK].ranges = xehp_l3bank_steering_table;
+			gt->steering[MSLICE].ranges = xehp_mslice_steering_table;
+			gt->steering[LNCF].ranges = xehp_lncf_steering_table;
+			gt->steering[DSS].ranges = xehp_dss_steering_table;
+			gt->steering[IMPLICIT_STEERING].ranges = dg2_implicit_steering_table;
+		} else {
+			gt->steering[L3BANK].ranges = xelp_l3bank_steering_table;
+			gt->steering[DSS].ranges = xelp_dss_steering_table;
+		}
+	}
+
+	/* Select non-terminated steering target for each type */
+	for (int i = 0; i < NUM_STEERING_TYPES; i++)
+		if (gt->steering[i].ranges && xe_steering_types[i].init)
+			xe_steering_types[i].init(gt);
+}
+
+/**
+ * xe_gt_mcr_set_implicit_defaults - Initialize steer control registers
+ * @gt: GT structure
+ *
+ * Some register ranges don't need to have their steering control registers
+ * changed on each access - it's sufficient to set them once on initialization.
+ * This function sets those registers for each platform *
+ */
+void xe_gt_mcr_set_implicit_defaults(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	if (xe->info.platform == XE_DG2) {
+		u32 steer_val = REG_FIELD_PREP(MCR_SLICE_MASK, 0) |
+			REG_FIELD_PREP(MCR_SUBSLICE_MASK, 2);
+
+		xe_mmio_write32(gt, MCFG_MCR_SELECTOR, steer_val);
+		xe_mmio_write32(gt, SF_MCR_SELECTOR, steer_val);
+		/*
+		 * For GAM registers, all reads should be directed to instance 1
+		 * (unicast reads against other instances are not allowed),
+		 * and instance 1 is already the hardware's default steering
+		 * target, which we never change
+		 */
+	}
+}
+
+/*
+ * xe_gt_mcr_get_nonterminated_steering - find group/instance values that
+ *    will steer a register to a non-terminated instance
+ * @gt: GT structure
+ * @reg: register for which the steering is required
+ * @group: return variable for group steering
+ * @instance: return variable for instance steering
+ *
+ * This function returns a group/instance pair that is guaranteed to work for
+ * read steering of the given register. Note that a value will be returned even
+ * if the register is not replicated and therefore does not actually require
+ * steering.
+ *
+ * Returns true if the caller should steer to the @group/@instance values
+ * returned.  Returns false if the caller need not perform any steering
+ */
+static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
+						 struct xe_reg_mcr reg_mcr,
+						 u8 *group, u8 *instance)
+{
+	const struct xe_reg reg = to_xe_reg(reg_mcr);
+	const struct xe_mmio_range *implicit_ranges;
+
+	for (int type = 0; type < IMPLICIT_STEERING; type++) {
+		if (!gt->steering[type].ranges)
+			continue;
+
+		for (int i = 0; gt->steering[type].ranges[i].end > 0; i++) {
+			if (xe_mmio_in_range(gt, &gt->steering[type].ranges[i], reg)) {
+				*group = gt->steering[type].group_target;
+				*instance = gt->steering[type].instance_target;
+				return true;
+			}
+		}
+	}
+
+	implicit_ranges = gt->steering[IMPLICIT_STEERING].ranges;
+	if (implicit_ranges)
+		for (int i = 0; implicit_ranges[i].end > 0; i++)
+			if (xe_mmio_in_range(gt, &implicit_ranges[i], reg))
+				return false;
+
+	/*
+	 * Not found in a steering table and not a register with implicit
+	 * steering. Just steer to 0/0 as a guess and raise a warning.
+	 */
+	drm_WARN(&gt_to_xe(gt)->drm, true,
+		 "Did not find MCR register %#x in any MCR steering table\n",
+		 reg.addr);
+	*group = 0;
+	*instance = 0;
+
+	return true;
+}
+
+/*
+ * Obtain exclusive access to MCR steering.  On MTL and beyond we also need
+ * to synchronize with external clients (e.g., firmware), so a semaphore
+ * register will also need to be taken.
+ */
+static void mcr_lock(struct xe_gt *gt) __acquires(&gt->mcr_lock)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int ret = 0;
+
+	spin_lock(&gt->mcr_lock);
+
+	/*
+	 * Starting with MTL we also need to grab a semaphore register
+	 * to synchronize with external agents (e.g., firmware) that now
+	 * shares the same steering control register. The semaphore is obtained
+	 * when a read to the relevant register returns 1.
+	 */
+	if (GRAPHICS_VERx100(xe) >= 1270)
+		ret = xe_mmio_wait32(gt, STEER_SEMAPHORE, 0x1, 0x1, 10, NULL,
+				     true);
+
+	drm_WARN_ON_ONCE(&xe->drm, ret == -ETIMEDOUT);
+}
+
+static void mcr_unlock(struct xe_gt *gt) __releases(&gt->mcr_lock)
+{
+	/* Release hardware semaphore - this is done by writing 1 to the register */
+	if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270)
+		xe_mmio_write32(gt, STEER_SEMAPHORE, 0x1);
+
+	spin_unlock(&gt->mcr_lock);
+}
+
+/*
+ * Access a register with specific MCR steering
+ *
+ * Caller needs to make sure the relevant forcewake wells are up.
+ */
+static u32 rw_with_mcr_steering(struct xe_gt *gt, struct xe_reg_mcr reg_mcr,
+				u8 rw_flag, int group, int instance, u32 value)
+{
+	const struct xe_reg reg = to_xe_reg(reg_mcr);
+	struct xe_reg steer_reg;
+	u32 steer_val, val = 0;
+
+	lockdep_assert_held(&gt->mcr_lock);
+
+	if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) {
+		steer_reg = MTL_MCR_SELECTOR;
+		steer_val = REG_FIELD_PREP(MTL_MCR_GROUPID, group) |
+			REG_FIELD_PREP(MTL_MCR_INSTANCEID, instance);
+	} else {
+		steer_reg = MCR_SELECTOR;
+		steer_val = REG_FIELD_PREP(MCR_SLICE_MASK, group) |
+			REG_FIELD_PREP(MCR_SUBSLICE_MASK, instance);
+	}
+
+	/*
+	 * Always leave the hardware in multicast mode when doing reads and only
+	 * change it to unicast mode when doing writes of a specific instance.
+	 *
+	 * The setting of the multicast/unicast bit usually wouldn't matter for
+	 * read operations (which always return the value from a single register
+	 * instance regardless of how that bit is set), but some platforms may
+	 * have workarounds requiring us to remain in multicast mode for reads,
+	 * e.g. Wa_22013088509 on PVC.  There's no real downside to this, so
+	 * we'll just go ahead and do so on all platforms; we'll only clear the
+	 * multicast bit from the mask when explicitly doing a write operation.
+	 *
+	 * No need to save old steering reg value.
+	 */
+	if (rw_flag == MCR_OP_READ)
+		steer_val |= MCR_MULTICAST;
+
+	xe_mmio_write32(gt, steer_reg, steer_val);
+
+	if (rw_flag == MCR_OP_READ)
+		val = xe_mmio_read32(gt, reg);
+	else
+		xe_mmio_write32(gt, reg, value);
+
+	/*
+	 * If we turned off the multicast bit (during a write) we're required
+	 * to turn it back on before finishing.  The group and instance values
+	 * don't matter since they'll be re-programmed on the next MCR
+	 * operation.
+	 */
+	if (rw_flag == MCR_OP_WRITE)
+		xe_mmio_write32(gt, steer_reg, MCR_MULTICAST);
+
+	return val;
+}
+
+/**
+ * xe_gt_mcr_unicast_read_any - reads a non-terminated instance of an MCR register
+ * @gt: GT structure
+ * @reg_mcr: register to read
+ *
+ * Reads a GT MCR register.  The read will be steered to a non-terminated
+ * instance (i.e., one that isn't fused off or powered down by power gating).
+ * This function assumes the caller is already holding any necessary forcewake
+ * domains.
+ *
+ * Returns the value from a non-terminated instance of @reg.
+ */
+u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, struct xe_reg_mcr reg_mcr)
+{
+	const struct xe_reg reg = to_xe_reg(reg_mcr);
+	u8 group, instance;
+	u32 val;
+	bool steer;
+
+	steer = xe_gt_mcr_get_nonterminated_steering(gt, reg_mcr,
+						     &group, &instance);
+
+	if (steer) {
+		mcr_lock(gt);
+		val = rw_with_mcr_steering(gt, reg_mcr, MCR_OP_READ,
+					   group, instance, 0);
+		mcr_unlock(gt);
+	} else {
+		val = xe_mmio_read32(gt, reg);
+	}
+
+	return val;
+}
+
+/**
+ * xe_gt_mcr_unicast_read - read a specific instance of an MCR register
+ * @gt: GT structure
+ * @reg_mcr: the MCR register to read
+ * @group: the MCR group
+ * @instance: the MCR instance
+ *
+ * Returns the value read from an MCR register after steering toward a specific
+ * group/instance.
+ */
+u32 xe_gt_mcr_unicast_read(struct xe_gt *gt,
+			   struct xe_reg_mcr reg_mcr,
+			   int group, int instance)
+{
+	u32 val;
+
+	mcr_lock(gt);
+	val = rw_with_mcr_steering(gt, reg_mcr, MCR_OP_READ, group, instance, 0);
+	mcr_unlock(gt);
+
+	return val;
+}
+
+/**
+ * xe_gt_mcr_unicast_write - write a specific instance of an MCR register
+ * @gt: GT structure
+ * @reg_mcr: the MCR register to write
+ * @value: value to write
+ * @group: the MCR group
+ * @instance: the MCR instance
+ *
+ * Write an MCR register in unicast mode after steering toward a specific
+ * group/instance.
+ */
+void xe_gt_mcr_unicast_write(struct xe_gt *gt, struct xe_reg_mcr reg_mcr,
+			     u32 value, int group, int instance)
+{
+	mcr_lock(gt);
+	rw_with_mcr_steering(gt, reg_mcr, MCR_OP_WRITE, group, instance, value);
+	mcr_unlock(gt);
+}
+
+/**
+ * xe_gt_mcr_multicast_write - write a value to all instances of an MCR register
+ * @gt: GT structure
+ * @reg_mcr: the MCR register to write
+ * @value: value to write
+ *
+ * Write an MCR register in multicast mode to update all instances.
+ */
+void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr reg_mcr,
+			       u32 value)
+{
+	struct xe_reg reg = to_xe_reg(reg_mcr);
+
+	/*
+	 * Synchronize with any unicast operations.  Once we have exclusive
+	 * access, the MULTICAST bit should already be set, so there's no need
+	 * to touch the steering register.
+	 */
+	mcr_lock(gt);
+	xe_mmio_write32(gt, reg, value);
+	mcr_unlock(gt);
+}
+
+void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p)
+{
+	for (int i = 0; i < NUM_STEERING_TYPES; i++) {
+		if (gt->steering[i].ranges) {
+			drm_printf(p, "%s steering: group=%#x, instance=%#x\n",
+				   xe_steering_types[i].name,
+				   gt->steering[i].group_target,
+				   gt->steering[i].instance_target);
+			for (int j = 0; gt->steering[i].ranges[j].end; j++)
+				drm_printf(p, "\t0x%06x - 0x%06x\n",
+					   gt->steering[i].ranges[j].start,
+					   gt->steering[i].ranges[j].end);
+		}
+	}
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h
new file mode 100644
index 000000000000..27ca1bc880a0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_MCR_H_
+#define _XE_GT_MCR_H_
+
+#include "regs/xe_reg_defs.h"
+
+struct drm_printer;
+struct xe_gt;
+
+void xe_gt_mcr_init(struct xe_gt *gt);
+
+void xe_gt_mcr_set_implicit_defaults(struct xe_gt *gt);
+
+u32 xe_gt_mcr_unicast_read(struct xe_gt *gt, struct xe_reg_mcr mcr_reg,
+			   int group, int instance);
+u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, struct xe_reg_mcr mcr_reg);
+
+void xe_gt_mcr_unicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg,
+			     u32 value, int group, int instance);
+void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg,
+			       u32 value);
+
+void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p);
+
+#endif /* _XE_GT_MCR_H_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
new file mode 100644
index 000000000000..73f08f1924df
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -0,0 +1,649 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_gt_pagefault.h"
+
+#include <linux/bitfield.h>
+#include <linux/circ_buf.h>
+
+#include <drm/drm_exec.h>
+#include <drm/drm_managed.h>
+#include <drm/ttm/ttm_execbuf_util.h>
+
+#include "abi/guc_actions_abi.h"
+#include "xe_bo.h"
+#include "xe_gt.h"
+#include "xe_gt_tlb_invalidation.h"
+#include "xe_guc.h"
+#include "xe_guc_ct.h"
+#include "xe_migrate.h"
+#include "xe_pt.h"
+#include "xe_trace.h"
+#include "xe_vm.h"
+
+struct pagefault {
+	u64 page_addr;
+	u32 asid;
+	u16 pdata;
+	u8 vfid;
+	u8 access_type;
+	u8 fault_type;
+	u8 fault_level;
+	u8 engine_class;
+	u8 engine_instance;
+	u8 fault_unsuccessful;
+	bool trva_fault;
+};
+
+enum access_type {
+	ACCESS_TYPE_READ = 0,
+	ACCESS_TYPE_WRITE = 1,
+	ACCESS_TYPE_ATOMIC = 2,
+	ACCESS_TYPE_RESERVED = 3,
+};
+
+enum fault_type {
+	NOT_PRESENT = 0,
+	WRITE_ACCESS_VIOLATION = 1,
+	ATOMIC_ACCESS_VIOLATION = 2,
+};
+
+struct acc {
+	u64 va_range_base;
+	u32 asid;
+	u32 sub_granularity;
+	u8 granularity;
+	u8 vfid;
+	u8 access_type;
+	u8 engine_class;
+	u8 engine_instance;
+};
+
+static bool access_is_atomic(enum access_type access_type)
+{
+	return access_type == ACCESS_TYPE_ATOMIC;
+}
+
+static bool vma_is_valid(struct xe_tile *tile, struct xe_vma *vma)
+{
+	return BIT(tile->id) & vma->tile_present &&
+		!(BIT(tile->id) & vma->usm.tile_invalidated);
+}
+
+static bool vma_matches(struct xe_vma *vma, u64 page_addr)
+{
+	if (page_addr > xe_vma_end(vma) - 1 ||
+	    page_addr + SZ_4K - 1 < xe_vma_start(vma))
+		return false;
+
+	return true;
+}
+
+static struct xe_vma *lookup_vma(struct xe_vm *vm, u64 page_addr)
+{
+	struct xe_vma *vma = NULL;
+
+	if (vm->usm.last_fault_vma) {   /* Fast lookup */
+		if (vma_matches(vm->usm.last_fault_vma, page_addr))
+			vma = vm->usm.last_fault_vma;
+	}
+	if (!vma)
+		vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K);
+
+	return vma;
+}
+
+static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma,
+		       bool atomic, unsigned int id)
+{
+	struct xe_bo *bo = xe_vma_bo(vma);
+	struct xe_vm *vm = xe_vma_vm(vma);
+	unsigned int num_shared = 2; /* slots for bind + move */
+	int err;
+
+	err = xe_vm_prepare_vma(exec, vma, num_shared);
+	if (err)
+		return err;
+
+	if (atomic && IS_DGFX(vm->xe)) {
+		if (xe_vma_is_userptr(vma)) {
+			err = -EACCES;
+			return err;
+		}
+
+		/* Migrate to VRAM, move should invalidate the VMA first */
+		err = xe_bo_migrate(bo, XE_PL_VRAM0 + id);
+		if (err)
+			return err;
+	} else if (bo) {
+		/* Create backing store if needed */
+		err = xe_bo_validate(bo, vm, true);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct drm_exec exec;
+	struct xe_vm *vm;
+	struct xe_vma *vma = NULL;
+	struct dma_fence *fence;
+	bool write_locked;
+	int ret = 0;
+	bool atomic;
+
+	/* SW isn't expected to handle TRTT faults */
+	if (pf->trva_fault)
+		return -EFAULT;
+
+	/* ASID to VM */
+	mutex_lock(&xe->usm.lock);
+	vm = xa_load(&xe->usm.asid_to_vm, pf->asid);
+	if (vm)
+		xe_vm_get(vm);
+	mutex_unlock(&xe->usm.lock);
+	if (!vm || !xe_vm_in_fault_mode(vm))
+		return -EINVAL;
+
+retry_userptr:
+	/*
+	 * TODO: Avoid exclusive lock if VM doesn't have userptrs, or
+	 * start out read-locked?
+	 */
+	down_write(&vm->lock);
+	write_locked = true;
+	vma = lookup_vma(vm, pf->page_addr);
+	if (!vma) {
+		ret = -EINVAL;
+		goto unlock_vm;
+	}
+
+	if (!xe_vma_is_userptr(vma) ||
+	    !xe_vma_userptr_check_repin(to_userptr_vma(vma))) {
+		downgrade_write(&vm->lock);
+		write_locked = false;
+	}
+
+	trace_xe_vma_pagefault(vma);
+
+	atomic = access_is_atomic(pf->access_type);
+
+	/* Check if VMA is valid */
+	if (vma_is_valid(tile, vma) && !atomic)
+		goto unlock_vm;
+
+	/* TODO: Validate fault */
+
+	if (xe_vma_is_userptr(vma) && write_locked) {
+		struct xe_userptr_vma *uvma = to_userptr_vma(vma);
+
+		spin_lock(&vm->userptr.invalidated_lock);
+		list_del_init(&uvma->userptr.invalidate_link);
+		spin_unlock(&vm->userptr.invalidated_lock);
+
+		ret = xe_vma_userptr_pin_pages(uvma);
+		if (ret)
+			goto unlock_vm;
+
+		downgrade_write(&vm->lock);
+		write_locked = false;
+	}
+
+	/* Lock VM and BOs dma-resv */
+	drm_exec_init(&exec, 0, 0);
+	drm_exec_until_all_locked(&exec) {
+		ret = xe_pf_begin(&exec, vma, atomic, tile->id);
+		drm_exec_retry_on_contention(&exec);
+		if (ret)
+			goto unlock_dma_resv;
+	}
+
+	/* Bind VMA only to the GT that has faulted */
+	trace_xe_vma_pf_bind(vma);
+	fence = __xe_pt_bind_vma(tile, vma, xe_tile_migrate_engine(tile), NULL, 0,
+				 vma->tile_present & BIT(tile->id));
+	if (IS_ERR(fence)) {
+		ret = PTR_ERR(fence);
+		goto unlock_dma_resv;
+	}
+
+	/*
+	 * XXX: Should we drop the lock before waiting? This only helps if doing
+	 * GPU binds which is currently only done if we have to wait for more
+	 * than 10ms on a move.
+	 */
+	dma_fence_wait(fence, false);
+	dma_fence_put(fence);
+
+	if (xe_vma_is_userptr(vma))
+		ret = xe_vma_userptr_check_repin(to_userptr_vma(vma));
+	vma->usm.tile_invalidated &= ~BIT(tile->id);
+
+unlock_dma_resv:
+	drm_exec_fini(&exec);
+unlock_vm:
+	if (!ret)
+		vm->usm.last_fault_vma = vma;
+	if (write_locked)
+		up_write(&vm->lock);
+	else
+		up_read(&vm->lock);
+	if (ret == -EAGAIN)
+		goto retry_userptr;
+
+	if (!ret) {
+		ret = xe_gt_tlb_invalidation_vma(gt, NULL, vma);
+		if (ret >= 0)
+			ret = 0;
+	}
+	xe_vm_put(vm);
+
+	return ret;
+}
+
+static int send_pagefault_reply(struct xe_guc *guc,
+				struct xe_guc_pagefault_reply *reply)
+{
+	u32 action[] = {
+		XE_GUC_ACTION_PAGE_FAULT_RES_DESC,
+		reply->dw0,
+		reply->dw1,
+	};
+
+	return xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
+}
+
+static void print_pagefault(struct xe_device *xe, struct pagefault *pf)
+{
+	drm_dbg(&xe->drm, "\n\tASID: %d\n"
+		 "\tVFID: %d\n"
+		 "\tPDATA: 0x%04x\n"
+		 "\tFaulted Address: 0x%08x%08x\n"
+		 "\tFaultType: %d\n"
+		 "\tAccessType: %d\n"
+		 "\tFaultLevel: %d\n"
+		 "\tEngineClass: %d\n"
+		 "\tEngineInstance: %d\n",
+		 pf->asid, pf->vfid, pf->pdata, upper_32_bits(pf->page_addr),
+		 lower_32_bits(pf->page_addr),
+		 pf->fault_type, pf->access_type, pf->fault_level,
+		 pf->engine_class, pf->engine_instance);
+}
+
+#define PF_MSG_LEN_DW	4
+
+static bool get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf)
+{
+	const struct xe_guc_pagefault_desc *desc;
+	bool ret = false;
+
+	spin_lock_irq(&pf_queue->lock);
+	if (pf_queue->head != pf_queue->tail) {
+		desc = (const struct xe_guc_pagefault_desc *)
+			(pf_queue->data + pf_queue->head);
+
+		pf->fault_level = FIELD_GET(PFD_FAULT_LEVEL, desc->dw0);
+		pf->trva_fault = FIELD_GET(XE2_PFD_TRVA_FAULT, desc->dw0);
+		pf->engine_class = FIELD_GET(PFD_ENG_CLASS, desc->dw0);
+		pf->engine_instance = FIELD_GET(PFD_ENG_INSTANCE, desc->dw0);
+		pf->pdata = FIELD_GET(PFD_PDATA_HI, desc->dw1) <<
+			PFD_PDATA_HI_SHIFT;
+		pf->pdata |= FIELD_GET(PFD_PDATA_LO, desc->dw0);
+		pf->asid = FIELD_GET(PFD_ASID, desc->dw1);
+		pf->vfid = FIELD_GET(PFD_VFID, desc->dw2);
+		pf->access_type = FIELD_GET(PFD_ACCESS_TYPE, desc->dw2);
+		pf->fault_type = FIELD_GET(PFD_FAULT_TYPE, desc->dw2);
+		pf->page_addr = (u64)(FIELD_GET(PFD_VIRTUAL_ADDR_HI, desc->dw3)) <<
+			PFD_VIRTUAL_ADDR_HI_SHIFT;
+		pf->page_addr |= FIELD_GET(PFD_VIRTUAL_ADDR_LO, desc->dw2) <<
+			PFD_VIRTUAL_ADDR_LO_SHIFT;
+
+		pf_queue->head = (pf_queue->head + PF_MSG_LEN_DW) %
+			PF_QUEUE_NUM_DW;
+		ret = true;
+	}
+	spin_unlock_irq(&pf_queue->lock);
+
+	return ret;
+}
+
+static bool pf_queue_full(struct pf_queue *pf_queue)
+{
+	lockdep_assert_held(&pf_queue->lock);
+
+	return CIRC_SPACE(pf_queue->tail, pf_queue->head, PF_QUEUE_NUM_DW) <=
+		PF_MSG_LEN_DW;
+}
+
+int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	struct xe_device *xe = gt_to_xe(gt);
+	struct pf_queue *pf_queue;
+	unsigned long flags;
+	u32 asid;
+	bool full;
+
+	if (unlikely(len != PF_MSG_LEN_DW))
+		return -EPROTO;
+
+	asid = FIELD_GET(PFD_ASID, msg[1]);
+	pf_queue = gt->usm.pf_queue + (asid % NUM_PF_QUEUE);
+
+	spin_lock_irqsave(&pf_queue->lock, flags);
+	full = pf_queue_full(pf_queue);
+	if (!full) {
+		memcpy(pf_queue->data + pf_queue->tail, msg, len * sizeof(u32));
+		pf_queue->tail = (pf_queue->tail + len) % PF_QUEUE_NUM_DW;
+		queue_work(gt->usm.pf_wq, &pf_queue->worker);
+	} else {
+		drm_warn(&xe->drm, "PF Queue full, shouldn't be possible");
+	}
+	spin_unlock_irqrestore(&pf_queue->lock, flags);
+
+	return full ? -ENOSPC : 0;
+}
+
+#define USM_QUEUE_MAX_RUNTIME_MS	20
+
+static void pf_queue_work_func(struct work_struct *w)
+{
+	struct pf_queue *pf_queue = container_of(w, struct pf_queue, worker);
+	struct xe_gt *gt = pf_queue->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_guc_pagefault_reply reply = {};
+	struct pagefault pf = {};
+	unsigned long threshold;
+	int ret;
+
+	threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS);
+
+	while (get_pagefault(pf_queue, &pf)) {
+		ret = handle_pagefault(gt, &pf);
+		if (unlikely(ret)) {
+			print_pagefault(xe, &pf);
+			pf.fault_unsuccessful = 1;
+			drm_dbg(&xe->drm, "Fault response: Unsuccessful %d\n", ret);
+		}
+
+		reply.dw0 = FIELD_PREP(PFR_VALID, 1) |
+			FIELD_PREP(PFR_SUCCESS, pf.fault_unsuccessful) |
+			FIELD_PREP(PFR_REPLY, PFR_ACCESS) |
+			FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) |
+			FIELD_PREP(PFR_ASID, pf.asid);
+
+		reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) |
+			FIELD_PREP(PFR_ENG_INSTANCE, pf.engine_instance) |
+			FIELD_PREP(PFR_ENG_CLASS, pf.engine_class) |
+			FIELD_PREP(PFR_PDATA, pf.pdata);
+
+		send_pagefault_reply(&gt->uc.guc, &reply);
+
+		if (time_after(jiffies, threshold) &&
+		    pf_queue->head != pf_queue->tail) {
+			queue_work(gt->usm.pf_wq, w);
+			break;
+		}
+	}
+}
+
+static void acc_queue_work_func(struct work_struct *w);
+
+int xe_gt_pagefault_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int i;
+
+	if (!xe->info.has_usm)
+		return 0;
+
+	for (i = 0; i < NUM_PF_QUEUE; ++i) {
+		gt->usm.pf_queue[i].gt = gt;
+		spin_lock_init(&gt->usm.pf_queue[i].lock);
+		INIT_WORK(&gt->usm.pf_queue[i].worker, pf_queue_work_func);
+	}
+	for (i = 0; i < NUM_ACC_QUEUE; ++i) {
+		gt->usm.acc_queue[i].gt = gt;
+		spin_lock_init(&gt->usm.acc_queue[i].lock);
+		INIT_WORK(&gt->usm.acc_queue[i].worker, acc_queue_work_func);
+	}
+
+	gt->usm.pf_wq = alloc_workqueue("xe_gt_page_fault_work_queue",
+					WQ_UNBOUND | WQ_HIGHPRI, NUM_PF_QUEUE);
+	if (!gt->usm.pf_wq)
+		return -ENOMEM;
+
+	gt->usm.acc_wq = alloc_workqueue("xe_gt_access_counter_work_queue",
+					 WQ_UNBOUND | WQ_HIGHPRI,
+					 NUM_ACC_QUEUE);
+	if (!gt->usm.acc_wq)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void xe_gt_pagefault_reset(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int i;
+
+	if (!xe->info.has_usm)
+		return;
+
+	for (i = 0; i < NUM_PF_QUEUE; ++i) {
+		spin_lock_irq(&gt->usm.pf_queue[i].lock);
+		gt->usm.pf_queue[i].head = 0;
+		gt->usm.pf_queue[i].tail = 0;
+		spin_unlock_irq(&gt->usm.pf_queue[i].lock);
+	}
+
+	for (i = 0; i < NUM_ACC_QUEUE; ++i) {
+		spin_lock(&gt->usm.acc_queue[i].lock);
+		gt->usm.acc_queue[i].head = 0;
+		gt->usm.acc_queue[i].tail = 0;
+		spin_unlock(&gt->usm.acc_queue[i].lock);
+	}
+}
+
+static int granularity_in_byte(int val)
+{
+	switch (val) {
+	case 0:
+		return SZ_128K;
+	case 1:
+		return SZ_2M;
+	case 2:
+		return SZ_16M;
+	case 3:
+		return SZ_64M;
+	default:
+		return 0;
+	}
+}
+
+static int sub_granularity_in_byte(int val)
+{
+	return (granularity_in_byte(val) / 32);
+}
+
+static void print_acc(struct xe_device *xe, struct acc *acc)
+{
+	drm_warn(&xe->drm, "Access counter request:\n"
+		 "\tType: %s\n"
+		 "\tASID: %d\n"
+		 "\tVFID: %d\n"
+		 "\tEngine: %d:%d\n"
+		 "\tGranularity: 0x%x KB Region/ %d KB sub-granularity\n"
+		 "\tSub_Granularity Vector: 0x%08x\n"
+		 "\tVA Range base: 0x%016llx\n",
+		 acc->access_type ? "AC_NTFY_VAL" : "AC_TRIG_VAL",
+		 acc->asid, acc->vfid, acc->engine_class, acc->engine_instance,
+		 granularity_in_byte(acc->granularity) / SZ_1K,
+		 sub_granularity_in_byte(acc->granularity) / SZ_1K,
+		 acc->sub_granularity, acc->va_range_base);
+}
+
+static struct xe_vma *get_acc_vma(struct xe_vm *vm, struct acc *acc)
+{
+	u64 page_va = acc->va_range_base + (ffs(acc->sub_granularity) - 1) *
+		sub_granularity_in_byte(acc->granularity);
+
+	return xe_vm_find_overlapping_vma(vm, page_va, SZ_4K);
+}
+
+static int handle_acc(struct xe_gt *gt, struct acc *acc)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct drm_exec exec;
+	struct xe_vm *vm;
+	struct xe_vma *vma;
+	int ret = 0;
+
+	/* We only support ACC_TRIGGER at the moment */
+	if (acc->access_type != ACC_TRIGGER)
+		return -EINVAL;
+
+	/* ASID to VM */
+	mutex_lock(&xe->usm.lock);
+	vm = xa_load(&xe->usm.asid_to_vm, acc->asid);
+	if (vm)
+		xe_vm_get(vm);
+	mutex_unlock(&xe->usm.lock);
+	if (!vm || !xe_vm_in_fault_mode(vm))
+		return -EINVAL;
+
+	down_read(&vm->lock);
+
+	/* Lookup VMA */
+	vma = get_acc_vma(vm, acc);
+	if (!vma) {
+		ret = -EINVAL;
+		goto unlock_vm;
+	}
+
+	trace_xe_vma_acc(vma);
+
+	/* Userptr or null can't be migrated, nothing to do */
+	if (xe_vma_has_no_bo(vma))
+		goto unlock_vm;
+
+	/* Lock VM and BOs dma-resv */
+	drm_exec_init(&exec, 0, 0);
+	drm_exec_until_all_locked(&exec) {
+		ret = xe_pf_begin(&exec, vma, true, tile->id);
+		drm_exec_retry_on_contention(&exec);
+		if (ret)
+			break;
+	}
+
+	drm_exec_fini(&exec);
+unlock_vm:
+	up_read(&vm->lock);
+	xe_vm_put(vm);
+
+	return ret;
+}
+
+#define make_u64(hi__, low__)  ((u64)(hi__) << 32 | (u64)(low__))
+
+#define ACC_MSG_LEN_DW        4
+
+static bool get_acc(struct acc_queue *acc_queue, struct acc *acc)
+{
+	const struct xe_guc_acc_desc *desc;
+	bool ret = false;
+
+	spin_lock(&acc_queue->lock);
+	if (acc_queue->head != acc_queue->tail) {
+		desc = (const struct xe_guc_acc_desc *)
+			(acc_queue->data + acc_queue->head);
+
+		acc->granularity = FIELD_GET(ACC_GRANULARITY, desc->dw2);
+		acc->sub_granularity = FIELD_GET(ACC_SUBG_HI, desc->dw1) << 31 |
+			FIELD_GET(ACC_SUBG_LO, desc->dw0);
+		acc->engine_class = FIELD_GET(ACC_ENG_CLASS, desc->dw1);
+		acc->engine_instance = FIELD_GET(ACC_ENG_INSTANCE, desc->dw1);
+		acc->asid =  FIELD_GET(ACC_ASID, desc->dw1);
+		acc->vfid =  FIELD_GET(ACC_VFID, desc->dw2);
+		acc->access_type = FIELD_GET(ACC_TYPE, desc->dw0);
+		acc->va_range_base = make_u64(desc->dw3 & ACC_VIRTUAL_ADDR_RANGE_HI,
+					      desc->dw2 & ACC_VIRTUAL_ADDR_RANGE_LO);
+
+		acc_queue->head = (acc_queue->head + ACC_MSG_LEN_DW) %
+				  ACC_QUEUE_NUM_DW;
+		ret = true;
+	}
+	spin_unlock(&acc_queue->lock);
+
+	return ret;
+}
+
+static void acc_queue_work_func(struct work_struct *w)
+{
+	struct acc_queue *acc_queue = container_of(w, struct acc_queue, worker);
+	struct xe_gt *gt = acc_queue->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	struct acc acc = {};
+	unsigned long threshold;
+	int ret;
+
+	threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS);
+
+	while (get_acc(acc_queue, &acc)) {
+		ret = handle_acc(gt, &acc);
+		if (unlikely(ret)) {
+			print_acc(xe, &acc);
+			drm_warn(&xe->drm, "ACC: Unsuccessful %d\n", ret);
+		}
+
+		if (time_after(jiffies, threshold) &&
+		    acc_queue->head != acc_queue->tail) {
+			queue_work(gt->usm.acc_wq, w);
+			break;
+		}
+	}
+}
+
+static bool acc_queue_full(struct acc_queue *acc_queue)
+{
+	lockdep_assert_held(&acc_queue->lock);
+
+	return CIRC_SPACE(acc_queue->tail, acc_queue->head, ACC_QUEUE_NUM_DW) <=
+		ACC_MSG_LEN_DW;
+}
+
+int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	struct acc_queue *acc_queue;
+	u32 asid;
+	bool full;
+
+	if (unlikely(len != ACC_MSG_LEN_DW))
+		return -EPROTO;
+
+	asid = FIELD_GET(ACC_ASID, msg[1]);
+	acc_queue = &gt->usm.acc_queue[asid % NUM_ACC_QUEUE];
+
+	spin_lock(&acc_queue->lock);
+	full = acc_queue_full(acc_queue);
+	if (!full) {
+		memcpy(acc_queue->data + acc_queue->tail, msg,
+		       len * sizeof(u32));
+		acc_queue->tail = (acc_queue->tail + len) % ACC_QUEUE_NUM_DW;
+		queue_work(gt->usm.acc_wq, &acc_queue->worker);
+	} else {
+		drm_warn(&gt_to_xe(gt)->drm, "ACC Queue full, dropping ACC");
+	}
+	spin_unlock(&acc_queue->lock);
+
+	return full ? -ENOSPC : 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.h b/drivers/gpu/drm/xe/xe_gt_pagefault.h
new file mode 100644
index 000000000000..839c065a5e4c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_PAGEFAULT_H_
+#define _XE_GT_PAGEFAULT_H_
+
+#include <linux/types.h>
+
+struct xe_gt;
+struct xe_guc;
+
+int xe_gt_pagefault_init(struct xe_gt *gt);
+void xe_gt_pagefault_reset(struct xe_gt *gt);
+int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len);
+int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len);
+
+#endif	/* _XE_GT_PAGEFAULT_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_printk.h b/drivers/gpu/drm/xe/xe_gt_printk.h
new file mode 100644
index 000000000000..5991bcadd47e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_printk.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GT_PRINTK_H_
+#define _XE_GT_PRINTK_H_
+
+#include <drm/drm_print.h>
+
+#include "xe_device_types.h"
+
+#define xe_gt_printk(_gt, _level, _fmt, ...) \
+	drm_##_level(&gt_to_xe(_gt)->drm, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__)
+
+#define xe_gt_err(_gt, _fmt, ...) \
+	xe_gt_printk((_gt), err, _fmt, ##__VA_ARGS__)
+
+#define xe_gt_warn(_gt, _fmt, ...) \
+	xe_gt_printk((_gt), warn, _fmt, ##__VA_ARGS__)
+
+#define xe_gt_notice(_gt, _fmt, ...) \
+	xe_gt_printk((_gt), notice, _fmt, ##__VA_ARGS__)
+
+#define xe_gt_info(_gt, _fmt, ...) \
+	xe_gt_printk((_gt), info, _fmt, ##__VA_ARGS__)
+
+#define xe_gt_dbg(_gt, _fmt, ...) \
+	xe_gt_printk((_gt), dbg, _fmt, ##__VA_ARGS__)
+
+#define xe_gt_err_ratelimited(_gt, _fmt, ...) \
+	xe_gt_printk((_gt), err_ratelimited, _fmt, ##__VA_ARGS__)
+
+#define xe_gt_WARN(_gt, _condition, _fmt, ...) \
+	drm_WARN(&gt_to_xe(_gt)->drm, _condition, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__)
+
+#define xe_gt_WARN_ONCE(_gt, _condition, _fmt, ...) \
+	drm_WARN_ONCE(&gt_to_xe(_gt)->drm, _condition, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__)
+
+#define xe_gt_WARN_ON(_gt, _condition) \
+	xe_gt_WARN((_gt), _condition, "%s(%s)", "gt_WARN_ON", __stringify(_condition))
+
+#define xe_gt_WARN_ON_ONCE(_gt, _condition) \
+	xe_gt_WARN_ONCE((_gt), _condition, "%s(%s)", "gt_WARN_ON_ONCE", __stringify(_condition))
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.c b/drivers/gpu/drm/xe/xe_gt_sysfs.c
new file mode 100644
index 000000000000..c69d2e8a0fe1
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sysfs.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_gt_sysfs.h"
+
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+
+#include <drm/drm_managed.h>
+
+#include "xe_gt.h"
+
+static void xe_gt_sysfs_kobj_release(struct kobject *kobj)
+{
+	kfree(kobj);
+}
+
+static const struct kobj_type xe_gt_sysfs_kobj_type = {
+	.release = xe_gt_sysfs_kobj_release,
+	.sysfs_ops = &kobj_sysfs_ops,
+};
+
+static void gt_sysfs_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_gt *gt = arg;
+
+	kobject_put(gt->sysfs);
+}
+
+void xe_gt_sysfs_init(struct xe_gt *gt)
+{
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_device *xe = gt_to_xe(gt);
+	struct kobj_gt *kg;
+	int err;
+
+	kg = kzalloc(sizeof(*kg), GFP_KERNEL);
+	if (!kg)
+		return;
+
+	kobject_init(&kg->base, &xe_gt_sysfs_kobj_type);
+	kg->gt = gt;
+
+	err = kobject_add(&kg->base, tile->sysfs, "gt%d", gt->info.id);
+	if (err) {
+		drm_warn(&xe->drm, "failed to add GT sysfs directory, err: %d\n", err);
+		kobject_put(&kg->base);
+		return;
+	}
+
+	gt->sysfs = &kg->base;
+
+	err = drmm_add_action_or_reset(&xe->drm, gt_sysfs_fini, gt);
+	if (err) {
+		drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
+			 __func__, err);
+		return;
+	}
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.h b/drivers/gpu/drm/xe/xe_gt_sysfs.h
new file mode 100644
index 000000000000..e3ec278ca0be
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sysfs.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_SYSFS_H_
+#define _XE_GT_SYSFS_H_
+
+#include "xe_gt_sysfs_types.h"
+
+void xe_gt_sysfs_init(struct xe_gt *gt);
+
+static inline struct xe_gt *
+kobj_to_gt(struct kobject *kobj)
+{
+	return container_of(kobj, struct kobj_gt, base)->gt;
+}
+
+#endif /* _XE_GT_SYSFS_H_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs_types.h b/drivers/gpu/drm/xe/xe_gt_sysfs_types.h
new file mode 100644
index 000000000000..d3bc6b83360f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sysfs_types.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_SYSFS_TYPES_H_
+#define _XE_GT_SYSFS_TYPES_H_
+
+#include <linux/kobject.h>
+
+struct xe_gt;
+
+/**
+ * struct kobj_gt - A GT's kobject struct that connects the kobject and the GT
+ *
+ * When dealing with multiple GTs, this struct helps to understand which GT
+ * needs to be addressed on a given sysfs call.
+ */
+struct kobj_gt {
+	/** @base: The actual kobject */
+	struct kobject base;
+	/** @gt: A pointer to the GT itself */
+	struct xe_gt *gt;
+};
+
+#endif	/* _XE_GT_SYSFS_TYPES_H_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c
new file mode 100644
index 000000000000..63d640591a52
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include <regs/xe_gt_regs.h>
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_sysfs.h"
+#include "xe_gt_throttle_sysfs.h"
+#include "xe_mmio.h"
+
+/**
+ * DOC: Xe GT Throttle
+ *
+ * Provides sysfs entries for frequency throttle reasons in GT
+ *
+ * device/gt#/freq0/throttle/status - Overall status
+ * device/gt#/freq0/throttle/reason_pl1 - Frequency throttle due to PL1
+ * device/gt#/freq0/throttle/reason_pl2 - Frequency throttle due to PL2
+ * device/gt#/freq0/throttle/reason_pl4 - Frequency throttle due to PL4, Iccmax etc.
+ * device/gt#/freq0/throttle/reason_thermal - Frequency throttle due to thermal
+ * device/gt#/freq0/throttle/reason_prochot - Frequency throttle due to prochot
+ * device/gt#/freq0/throttle/reason_ratl - Frequency throttle due to RATL
+ * device/gt#/freq0/throttle/reason_vr_thermalert - Frequency throttle due to VR THERMALERT
+ * device/gt#/freq0/throttle/reason_vr_tdc -  Frequency throttle due to VR TDC
+ */
+
+static struct xe_gt *
+dev_to_gt(struct device *dev)
+{
+	return kobj_to_gt(dev->kobj.parent);
+}
+
+static u32 read_perf_limit_reasons(struct xe_gt *gt)
+{
+	u32 reg;
+
+	if (xe_gt_is_media_type(gt))
+		reg = xe_mmio_read32(gt, MTL_MEDIA_PERF_LIMIT_REASONS);
+	else
+		reg = xe_mmio_read32(gt, GT0_PERF_LIMIT_REASONS);
+
+	return reg;
+}
+
+static u32 read_status(struct xe_gt *gt)
+{
+	u32 status = read_perf_limit_reasons(gt) & GT0_PERF_LIMIT_REASONS_MASK;
+
+	return status;
+}
+
+static u32 read_reason_pl1(struct xe_gt *gt)
+{
+	u32 pl1 = read_perf_limit_reasons(gt) & POWER_LIMIT_1_MASK;
+
+	return pl1;
+}
+
+static u32 read_reason_pl2(struct xe_gt *gt)
+{
+	u32 pl2 = read_perf_limit_reasons(gt) & POWER_LIMIT_2_MASK;
+
+	return pl2;
+}
+
+static u32 read_reason_pl4(struct xe_gt *gt)
+{
+	u32 pl4 = read_perf_limit_reasons(gt) & POWER_LIMIT_4_MASK;
+
+	return pl4;
+}
+
+static u32 read_reason_thermal(struct xe_gt *gt)
+{
+	u32 thermal = read_perf_limit_reasons(gt) & THERMAL_LIMIT_MASK;
+
+	return thermal;
+}
+
+static u32 read_reason_prochot(struct xe_gt *gt)
+{
+	u32 prochot = read_perf_limit_reasons(gt) & PROCHOT_MASK;
+
+	return prochot;
+}
+
+static u32 read_reason_ratl(struct xe_gt *gt)
+{
+	u32 ratl = read_perf_limit_reasons(gt) & RATL_MASK;
+
+	return ratl;
+}
+
+static u32 read_reason_vr_thermalert(struct xe_gt *gt)
+{
+	u32 thermalert = read_perf_limit_reasons(gt) & VR_THERMALERT_MASK;
+
+	return thermalert;
+}
+
+static u32 read_reason_vr_tdc(struct xe_gt *gt)
+{
+	u32 tdc = read_perf_limit_reasons(gt) & VR_TDC_MASK;
+
+	return tdc;
+}
+
+static ssize_t status_show(struct device *dev,
+			   struct device_attribute *attr,
+			   char *buff)
+{
+	struct xe_gt *gt = dev_to_gt(dev);
+	bool status = !!read_status(gt);
+
+	return sysfs_emit(buff, "%u\n", status);
+}
+static DEVICE_ATTR_RO(status);
+
+static ssize_t reason_pl1_show(struct device *dev,
+			       struct device_attribute *attr,
+			       char *buff)
+{
+	struct xe_gt *gt = dev_to_gt(dev);
+	bool pl1 = !!read_reason_pl1(gt);
+
+	return sysfs_emit(buff, "%u\n", pl1);
+}
+static DEVICE_ATTR_RO(reason_pl1);
+
+static ssize_t reason_pl2_show(struct device *dev,
+			       struct device_attribute *attr,
+			       char *buff)
+{
+	struct xe_gt *gt = dev_to_gt(dev);
+	bool pl2 = !!read_reason_pl2(gt);
+
+	return sysfs_emit(buff, "%u\n", pl2);
+}
+static DEVICE_ATTR_RO(reason_pl2);
+
+static ssize_t reason_pl4_show(struct device *dev,
+			       struct device_attribute *attr,
+			       char *buff)
+{
+	struct xe_gt *gt = dev_to_gt(dev);
+	bool pl4 = !!read_reason_pl4(gt);
+
+	return sysfs_emit(buff, "%u\n", pl4);
+}
+static DEVICE_ATTR_RO(reason_pl4);
+
+static ssize_t reason_thermal_show(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buff)
+{
+	struct xe_gt *gt = dev_to_gt(dev);
+	bool thermal = !!read_reason_thermal(gt);
+
+	return sysfs_emit(buff, "%u\n", thermal);
+}
+static DEVICE_ATTR_RO(reason_thermal);
+
+static ssize_t reason_prochot_show(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buff)
+{
+	struct xe_gt *gt = dev_to_gt(dev);
+	bool prochot = !!read_reason_prochot(gt);
+
+	return sysfs_emit(buff, "%u\n", prochot);
+}
+static DEVICE_ATTR_RO(reason_prochot);
+
+static ssize_t reason_ratl_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buff)
+{
+	struct xe_gt *gt = dev_to_gt(dev);
+	bool ratl = !!read_reason_ratl(gt);
+
+	return sysfs_emit(buff, "%u\n", ratl);
+}
+static DEVICE_ATTR_RO(reason_ratl);
+
+static ssize_t reason_vr_thermalert_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buff)
+{
+	struct xe_gt *gt = dev_to_gt(dev);
+	bool thermalert = !!read_reason_vr_thermalert(gt);
+
+	return sysfs_emit(buff, "%u\n", thermalert);
+}
+static DEVICE_ATTR_RO(reason_vr_thermalert);
+
+static ssize_t reason_vr_tdc_show(struct device *dev,
+				  struct device_attribute *attr,
+				  char *buff)
+{
+	struct xe_gt *gt = dev_to_gt(dev);
+	bool tdc = !!read_reason_vr_tdc(gt);
+
+	return sysfs_emit(buff, "%u\n", tdc);
+}
+static DEVICE_ATTR_RO(reason_vr_tdc);
+
+static struct attribute *throttle_attrs[] = {
+	&dev_attr_status.attr,
+	&dev_attr_reason_pl1.attr,
+	&dev_attr_reason_pl2.attr,
+	&dev_attr_reason_pl4.attr,
+	&dev_attr_reason_thermal.attr,
+	&dev_attr_reason_prochot.attr,
+	&dev_attr_reason_ratl.attr,
+	&dev_attr_reason_vr_thermalert.attr,
+	&dev_attr_reason_vr_tdc.attr,
+	NULL
+};
+
+static const struct attribute_group throttle_group_attrs = {
+	.name = "throttle",
+	.attrs = throttle_attrs,
+};
+
+static void gt_throttle_sysfs_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_gt *gt = arg;
+
+	sysfs_remove_group(gt->freq, &throttle_group_attrs);
+}
+
+void xe_gt_throttle_sysfs_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int err;
+
+	err = sysfs_create_group(gt->freq, &throttle_group_attrs);
+	if (err) {
+		drm_warn(&xe->drm, "failed to register throttle sysfs, err: %d\n", err);
+		return;
+	}
+
+	err = drmm_add_action_or_reset(&xe->drm, gt_throttle_sysfs_fini, gt);
+	if (err)
+		drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
+			 __func__, err);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h
new file mode 100644
index 000000000000..3ecfd4beffe1
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GT_THROTTLE_SYSFS_H_
+#define _XE_GT_THROTTLE_SYSFS_H_
+
+#include <drm/drm_managed.h>
+
+struct xe_gt;
+
+void xe_gt_throttle_sysfs_init(struct xe_gt *gt);
+
+#endif /* _XE_GT_THROTTLE_SYSFS_H_ */
+
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
new file mode 100644
index 000000000000..f4c485289dbe
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -0,0 +1,418 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_gt_tlb_invalidation.h"
+
+#include "abi/guc_actions_abi.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_guc.h"
+#include "xe_guc_ct.h"
+#include "xe_trace.h"
+
+#define TLB_TIMEOUT	(HZ / 4)
+
+static void xe_gt_tlb_fence_timeout(struct work_struct *work)
+{
+	struct xe_gt *gt = container_of(work, struct xe_gt,
+					tlb_invalidation.fence_tdr.work);
+	struct xe_gt_tlb_invalidation_fence *fence, *next;
+
+	spin_lock_irq(&gt->tlb_invalidation.pending_lock);
+	list_for_each_entry_safe(fence, next,
+				 &gt->tlb_invalidation.pending_fences, link) {
+		s64 since_inval_ms = ktime_ms_delta(ktime_get(),
+						    fence->invalidation_time);
+
+		if (msecs_to_jiffies(since_inval_ms) < TLB_TIMEOUT)
+			break;
+
+		trace_xe_gt_tlb_invalidation_fence_timeout(fence);
+		drm_err(&gt_to_xe(gt)->drm, "gt%d: TLB invalidation fence timeout, seqno=%d recv=%d",
+			gt->info.id, fence->seqno, gt->tlb_invalidation.seqno_recv);
+
+		list_del(&fence->link);
+		fence->base.error = -ETIME;
+		dma_fence_signal(&fence->base);
+		dma_fence_put(&fence->base);
+	}
+	if (!list_empty(&gt->tlb_invalidation.pending_fences))
+		queue_delayed_work(system_wq,
+				   &gt->tlb_invalidation.fence_tdr,
+				   TLB_TIMEOUT);
+	spin_unlock_irq(&gt->tlb_invalidation.pending_lock);
+}
+
+/**
+ * xe_gt_tlb_invalidation_init - Initialize GT TLB invalidation state
+ * @gt: graphics tile
+ *
+ * Initialize GT TLB invalidation state, purely software initialization, should
+ * be called once during driver load.
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
+{
+	gt->tlb_invalidation.seqno = 1;
+	INIT_LIST_HEAD(&gt->tlb_invalidation.pending_fences);
+	spin_lock_init(&gt->tlb_invalidation.pending_lock);
+	spin_lock_init(&gt->tlb_invalidation.lock);
+	gt->tlb_invalidation.fence_context = dma_fence_context_alloc(1);
+	INIT_DELAYED_WORK(&gt->tlb_invalidation.fence_tdr,
+			  xe_gt_tlb_fence_timeout);
+
+	return 0;
+}
+
+static void
+__invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence)
+{
+	trace_xe_gt_tlb_invalidation_fence_signal(fence);
+	dma_fence_signal(&fence->base);
+	dma_fence_put(&fence->base);
+}
+
+static void
+invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence)
+{
+	list_del(&fence->link);
+	__invalidation_fence_signal(fence);
+}
+
+/**
+ * xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset
+ * @gt: graphics tile
+ *
+ * Signal any pending invalidation fences, should be called during a GT reset
+ */
+void xe_gt_tlb_invalidation_reset(struct xe_gt *gt)
+{
+	struct xe_gt_tlb_invalidation_fence *fence, *next;
+	struct xe_guc *guc = &gt->uc.guc;
+	int pending_seqno;
+
+	/*
+	 * CT channel is already disabled at this point. No new TLB requests can
+	 * appear.
+	 */
+
+	mutex_lock(&gt->uc.guc.ct.lock);
+	spin_lock_irq(&gt->tlb_invalidation.pending_lock);
+	cancel_delayed_work(&gt->tlb_invalidation.fence_tdr);
+	/*
+	 * We might have various kworkers waiting for TLB flushes to complete
+	 * which are not tracked with an explicit TLB fence, however at this
+	 * stage that will never happen since the CT is already disabled, so
+	 * make sure we signal them here under the assumption that we have
+	 * completed a full GT reset.
+	 */
+	if (gt->tlb_invalidation.seqno == 1)
+		pending_seqno = TLB_INVALIDATION_SEQNO_MAX - 1;
+	else
+		pending_seqno = gt->tlb_invalidation.seqno - 1;
+	WRITE_ONCE(gt->tlb_invalidation.seqno_recv, pending_seqno);
+	wake_up_all(&guc->ct.wq);
+
+	list_for_each_entry_safe(fence, next,
+				 &gt->tlb_invalidation.pending_fences, link)
+		invalidation_fence_signal(fence);
+	spin_unlock_irq(&gt->tlb_invalidation.pending_lock);
+	mutex_unlock(&gt->uc.guc.ct.lock);
+}
+
+static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno)
+{
+	int seqno_recv = READ_ONCE(gt->tlb_invalidation.seqno_recv);
+
+	if (seqno - seqno_recv < -(TLB_INVALIDATION_SEQNO_MAX / 2))
+		return false;
+
+	if (seqno - seqno_recv > (TLB_INVALIDATION_SEQNO_MAX / 2))
+		return true;
+
+	return seqno_recv >= seqno;
+}
+
+static int send_tlb_invalidation(struct xe_guc *guc,
+				 struct xe_gt_tlb_invalidation_fence *fence,
+				 u32 *action, int len)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	int seqno;
+	int ret;
+
+	/*
+	 * XXX: The seqno algorithm relies on TLB invalidation being processed
+	 * in order which they currently are, if that changes the algorithm will
+	 * need to be updated.
+	 */
+
+	mutex_lock(&guc->ct.lock);
+	seqno = gt->tlb_invalidation.seqno;
+	if (fence) {
+		fence->seqno = seqno;
+		trace_xe_gt_tlb_invalidation_fence_send(fence);
+	}
+	action[1] = seqno;
+	ret = xe_guc_ct_send_locked(&guc->ct, action, len,
+				    G2H_LEN_DW_TLB_INVALIDATE, 1);
+	if (!ret && fence) {
+		spin_lock_irq(&gt->tlb_invalidation.pending_lock);
+		/*
+		 * We haven't actually published the TLB fence as per
+		 * pending_fences, but in theory our seqno could have already
+		 * been written as we acquired the pending_lock. In such a case
+		 * we can just go ahead and signal the fence here.
+		 */
+		if (tlb_invalidation_seqno_past(gt, seqno)) {
+			__invalidation_fence_signal(fence);
+		} else {
+			fence->invalidation_time = ktime_get();
+			list_add_tail(&fence->link,
+				      &gt->tlb_invalidation.pending_fences);
+
+			if (list_is_singular(&gt->tlb_invalidation.pending_fences))
+				queue_delayed_work(system_wq,
+						   &gt->tlb_invalidation.fence_tdr,
+						   TLB_TIMEOUT);
+		}
+		spin_unlock_irq(&gt->tlb_invalidation.pending_lock);
+	} else if (ret < 0 && fence) {
+		__invalidation_fence_signal(fence);
+	}
+	if (!ret) {
+		gt->tlb_invalidation.seqno = (gt->tlb_invalidation.seqno + 1) %
+			TLB_INVALIDATION_SEQNO_MAX;
+		if (!gt->tlb_invalidation.seqno)
+			gt->tlb_invalidation.seqno = 1;
+		ret = seqno;
+	}
+	mutex_unlock(&guc->ct.lock);
+
+	return ret;
+}
+
+#define MAKE_INVAL_OP(type)	((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \
+		XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \
+		XE_GUC_TLB_INVAL_FLUSH_CACHE)
+
+/**
+ * xe_gt_tlb_invalidation_guc - Issue a TLB invalidation on this GT for the GuC
+ * @gt: graphics tile
+ *
+ * Issue a TLB invalidation for the GuC. Completion of TLB is asynchronous and
+ * caller can use seqno + xe_gt_tlb_invalidation_wait to wait for completion.
+ *
+ * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
+ * negative error code on error.
+ */
+int xe_gt_tlb_invalidation_guc(struct xe_gt *gt)
+{
+	u32 action[] = {
+		XE_GUC_ACTION_TLB_INVALIDATION,
+		0,  /* seqno, replaced in send_tlb_invalidation */
+		MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC),
+	};
+
+	return send_tlb_invalidation(&gt->uc.guc, NULL, action,
+				     ARRAY_SIZE(action));
+}
+
+/**
+ * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA
+ * @gt: graphics tile
+ * @fence: invalidation fence which will be signal on TLB invalidation
+ * completion, can be NULL
+ * @vma: VMA to invalidate
+ *
+ * Issue a range based TLB invalidation if supported, if not fallback to a full
+ * TLB invalidation. Completion of TLB is asynchronous and caller can either use
+ * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for
+ * completion.
+ *
+ * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
+ * negative error code on error.
+ */
+int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
+			       struct xe_gt_tlb_invalidation_fence *fence,
+			       struct xe_vma *vma)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+#define MAX_TLB_INVALIDATION_LEN	7
+	u32 action[MAX_TLB_INVALIDATION_LEN];
+	int len = 0;
+
+	xe_gt_assert(gt, vma);
+
+	/* Execlists not supported */
+	if (gt_to_xe(gt)->info.force_execlist) {
+		if (fence)
+			__invalidation_fence_signal(fence);
+
+		return 0;
+	}
+
+	action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
+	action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */
+	if (!xe->info.has_range_tlb_invalidation) {
+		action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
+	} else {
+		u64 start = xe_vma_start(vma);
+		u64 length = xe_vma_size(vma);
+		u64 align, end;
+
+		if (length < SZ_4K)
+			length = SZ_4K;
+
+		/*
+		 * We need to invalidate a higher granularity if start address
+		 * is not aligned to length. When start is not aligned with
+		 * length we need to find the length large enough to create an
+		 * address mask covering the required range.
+		 */
+		align = roundup_pow_of_two(length);
+		start = ALIGN_DOWN(xe_vma_start(vma), align);
+		end = ALIGN(xe_vma_end(vma), align);
+		length = align;
+		while (start + length < end) {
+			length <<= 1;
+			start = ALIGN_DOWN(xe_vma_start(vma), length);
+		}
+
+		/*
+		 * Minimum invalidation size for a 2MB page that the hardware
+		 * expects is 16MB
+		 */
+		if (length >= SZ_2M) {
+			length = max_t(u64, SZ_16M, length);
+			start = ALIGN_DOWN(xe_vma_start(vma), length);
+		}
+
+		xe_gt_assert(gt, length >= SZ_4K);
+		xe_gt_assert(gt, is_power_of_2(length));
+		xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1, ilog2(SZ_2M) + 1)));
+		xe_gt_assert(gt, IS_ALIGNED(start, length));
+
+		action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
+		action[len++] = xe_vma_vm(vma)->usm.asid;
+		action[len++] = lower_32_bits(start);
+		action[len++] = upper_32_bits(start);
+		action[len++] = ilog2(length) - ilog2(SZ_4K);
+	}
+
+	xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN);
+
+	return send_tlb_invalidation(&gt->uc.guc, fence, action, len);
+}
+
+/**
+ * xe_gt_tlb_invalidation_wait - Wait for TLB to complete
+ * @gt: graphics tile
+ * @seqno: seqno to wait which was returned from xe_gt_tlb_invalidation
+ *
+ * Wait for 200ms for a TLB invalidation to complete, in practice we always
+ * should receive the TLB invalidation within 200ms.
+ *
+ * Return: 0 on success, -ETIME on TLB invalidation timeout
+ */
+int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_guc *guc = &gt->uc.guc;
+	struct drm_printer p = drm_err_printer(__func__);
+	int ret;
+
+	/* Execlists not supported */
+	if (gt_to_xe(gt)->info.force_execlist)
+		return 0;
+
+	/*
+	 * XXX: See above, this algorithm only works if seqno are always in
+	 * order
+	 */
+	ret = wait_event_timeout(guc->ct.wq,
+				 tlb_invalidation_seqno_past(gt, seqno),
+				 TLB_TIMEOUT);
+	if (!ret) {
+		drm_err(&xe->drm, "gt%d: TLB invalidation time'd out, seqno=%d, recv=%d\n",
+			gt->info.id, seqno, gt->tlb_invalidation.seqno_recv);
+		xe_guc_ct_print(&guc->ct, &p, true);
+		return -ETIME;
+	}
+
+	return 0;
+}
+
+/**
+ * xe_guc_tlb_invalidation_done_handler - TLB invalidation done handler
+ * @guc: guc
+ * @msg: message indicating TLB invalidation done
+ * @len: length of message
+ *
+ * Parse seqno of TLB invalidation, wake any waiters for seqno, and signal any
+ * invalidation fences for seqno. Algorithm for this depends on seqno being
+ * received in-order and asserts this assumption.
+ *
+ * Return: 0 on success, -EPROTO for malformed messages.
+ */
+int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	struct xe_gt_tlb_invalidation_fence *fence, *next;
+	unsigned long flags;
+
+	if (unlikely(len != 1))
+		return -EPROTO;
+
+	/*
+	 * This can also be run both directly from the IRQ handler and also in
+	 * process_g2h_msg(). Only one may process any individual CT message,
+	 * however the order they are processed here could result in skipping a
+	 * seqno. To handle that we just process all the seqnos from the last
+	 * seqno_recv up to and including the one in msg[0]. The delta should be
+	 * very small so there shouldn't be much of pending_fences we actually
+	 * need to iterate over here.
+	 *
+	 * From GuC POV we expect the seqnos to always appear in-order, so if we
+	 * see something later in the timeline we can be sure that anything
+	 * appearing earlier has already signalled, just that we have yet to
+	 * officially process the CT message like if racing against
+	 * process_g2h_msg().
+	 */
+	spin_lock_irqsave(&gt->tlb_invalidation.pending_lock, flags);
+	if (tlb_invalidation_seqno_past(gt, msg[0])) {
+		spin_unlock_irqrestore(&gt->tlb_invalidation.pending_lock, flags);
+		return 0;
+	}
+
+	/*
+	 * wake_up_all() and wait_event_timeout() already have the correct
+	 * barriers.
+	 */
+	WRITE_ONCE(gt->tlb_invalidation.seqno_recv, msg[0]);
+	wake_up_all(&guc->ct.wq);
+
+	list_for_each_entry_safe(fence, next,
+				 &gt->tlb_invalidation.pending_fences, link) {
+		trace_xe_gt_tlb_invalidation_fence_recv(fence);
+
+		if (!tlb_invalidation_seqno_past(gt, fence->seqno))
+			break;
+
+		invalidation_fence_signal(fence);
+	}
+
+	if (!list_empty(&gt->tlb_invalidation.pending_fences))
+		mod_delayed_work(system_wq,
+				 &gt->tlb_invalidation.fence_tdr,
+				 TLB_TIMEOUT);
+	else
+		cancel_delayed_work(&gt->tlb_invalidation.fence_tdr);
+
+	spin_unlock_irqrestore(&gt->tlb_invalidation.pending_lock, flags);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
new file mode 100644
index 000000000000..b333c1709397
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GT_TLB_INVALIDATION_H_
+#define _XE_GT_TLB_INVALIDATION_H_
+
+#include <linux/types.h>
+
+#include "xe_gt_tlb_invalidation_types.h"
+
+struct xe_gt;
+struct xe_guc;
+struct xe_vma;
+
+int xe_gt_tlb_invalidation_init(struct xe_gt *gt);
+void xe_gt_tlb_invalidation_reset(struct xe_gt *gt);
+int xe_gt_tlb_invalidation_guc(struct xe_gt *gt);
+int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
+			       struct xe_gt_tlb_invalidation_fence *fence,
+			       struct xe_vma *vma);
+int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno);
+int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
+
+#endif	/* _XE_GT_TLB_INVALIDATION_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h
new file mode 100644
index 000000000000..934c828efe31
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GT_TLB_INVALIDATION_TYPES_H_
+#define _XE_GT_TLB_INVALIDATION_TYPES_H_
+
+#include <linux/dma-fence.h>
+
+/**
+ * struct xe_gt_tlb_invalidation_fence - XE GT TLB invalidation fence
+ *
+ * Optionally passed to xe_gt_tlb_invalidation and will be signaled upon TLB
+ * invalidation completion.
+ */
+struct xe_gt_tlb_invalidation_fence {
+	/** @base: dma fence base */
+	struct dma_fence base;
+	/** @link: link into list of pending tlb fences */
+	struct list_head link;
+	/** @seqno: seqno of TLB invalidation to signal fence one */
+	int seqno;
+	/** @invalidation_time: time of TLB invalidation */
+	ktime_t invalidation_time;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
new file mode 100644
index 000000000000..a8d7f272c30a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_topology.c
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_gt_topology.h"
+
+#include <linux/bitmap.h>
+
+#include "regs/xe_gt_regs.h"
+#include "xe_gt.h"
+#include "xe_mmio.h"
+
+#define XE_MAX_DSS_FUSE_BITS (32 * XE_MAX_DSS_FUSE_REGS)
+#define XE_MAX_EU_FUSE_BITS (32 * XE_MAX_EU_FUSE_REGS)
+
+static void
+load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...)
+{
+	va_list argp;
+	u32 fuse_val[XE_MAX_DSS_FUSE_REGS] = {};
+	int i;
+
+	if (drm_WARN_ON(&gt_to_xe(gt)->drm, numregs > XE_MAX_DSS_FUSE_REGS))
+		numregs = XE_MAX_DSS_FUSE_REGS;
+
+	va_start(argp, numregs);
+	for (i = 0; i < numregs; i++)
+		fuse_val[i] = xe_mmio_read32(gt, va_arg(argp, struct xe_reg));
+	va_end(argp);
+
+	bitmap_from_arr32(mask, fuse_val, numregs * 32);
+}
+
+static void
+load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 reg_val = xe_mmio_read32(gt, XELP_EU_ENABLE);
+	u32 val = 0;
+	int i;
+
+	BUILD_BUG_ON(XE_MAX_EU_FUSE_REGS > 1);
+
+	/*
+	 * Pre-Xe_HP platforms inverted the bit meaning (disable instead
+	 * of enable).
+	 */
+	if (GRAPHICS_VERx100(xe) < 1250)
+		reg_val = ~reg_val & XELP_EU_MASK;
+
+	/* On PVC, one bit = one EU */
+	if (GRAPHICS_VERx100(xe) == 1260) {
+		val = reg_val;
+	} else {
+		/* All other platforms, one bit = 2 EU */
+		for (i = 0; i < fls(reg_val); i++)
+			if (reg_val & BIT(i))
+				val |= 0x3 << 2 * i;
+	}
+
+	bitmap_from_arr32(mask, &val, XE_MAX_EU_FUSE_BITS);
+}
+
+static void
+get_num_dss_regs(struct xe_device *xe, int *geometry_regs, int *compute_regs)
+{
+	if (GRAPHICS_VER(xe) > 20) {
+		*geometry_regs = 3;
+		*compute_regs = 3;
+	} else if (GRAPHICS_VERx100(xe) == 1260) {
+		*geometry_regs = 0;
+		*compute_regs = 2;
+	} else if (GRAPHICS_VERx100(xe) >= 1250) {
+		*geometry_regs = 1;
+		*compute_regs = 1;
+	} else {
+		*geometry_regs = 1;
+		*compute_regs = 0;
+	}
+}
+
+void
+xe_gt_topology_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct drm_printer p = drm_debug_printer("GT topology");
+	int num_geometry_regs, num_compute_regs;
+
+	get_num_dss_regs(xe, &num_geometry_regs, &num_compute_regs);
+
+	/*
+	 * Register counts returned shouldn't exceed the number of registers
+	 * passed as parameters below.
+	 */
+	drm_WARN_ON(&xe->drm, num_geometry_regs > 3);
+	drm_WARN_ON(&xe->drm, num_compute_regs > 3);
+
+	load_dss_mask(gt, gt->fuse_topo.g_dss_mask,
+		      num_geometry_regs,
+		      XELP_GT_GEOMETRY_DSS_ENABLE,
+		      XE2_GT_GEOMETRY_DSS_1,
+		      XE2_GT_GEOMETRY_DSS_2);
+	load_dss_mask(gt, gt->fuse_topo.c_dss_mask, num_compute_regs,
+		      XEHP_GT_COMPUTE_DSS_ENABLE,
+		      XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,
+		      XE2_GT_COMPUTE_DSS_2);
+	load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss);
+
+	xe_gt_topology_dump(gt, &p);
+}
+
+void
+xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
+{
+	drm_printf(p, "dss mask (geometry): %*pb\n", XE_MAX_DSS_FUSE_BITS,
+		   gt->fuse_topo.g_dss_mask);
+	drm_printf(p, "dss mask (compute):  %*pb\n", XE_MAX_DSS_FUSE_BITS,
+		   gt->fuse_topo.c_dss_mask);
+
+	drm_printf(p, "EU mask per DSS:     %*pb\n", XE_MAX_EU_FUSE_BITS,
+		   gt->fuse_topo.eu_mask_per_dss);
+
+}
+
+/*
+ * Used to obtain the index of the first DSS.  Can start searching from the
+ * beginning of a specific dss group (e.g., gslice, cslice, etc.) if
+ * groupsize and groupnum are non-zero.
+ */
+unsigned int
+xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum)
+{
+	return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize);
+}
+
+bool xe_dss_mask_empty(const xe_dss_mask_t mask)
+{
+	return bitmap_empty(mask, XE_MAX_DSS_FUSE_BITS);
+}
+
+/**
+ * xe_gt_topology_has_dss_in_quadrant - check fusing of DSS in GT quadrant
+ * @gt: GT to check
+ * @quad: Which quadrant of the DSS space to check
+ *
+ * Since Xe_HP platforms can have up to four CCS engines, those engines
+ * are each logically associated with a quarter of the possible DSS.  If there
+ * are no DSS present in one of the four quadrants of the DSS space, the
+ * corresponding CCS engine is also not available for use.
+ *
+ * Returns false if all DSS in a quadrant of the GT are fused off, else true.
+ */
+bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	xe_dss_mask_t all_dss;
+	int g_dss_regs, c_dss_regs, dss_per_quad, quad_first;
+
+	bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask,
+		  XE_MAX_DSS_FUSE_BITS);
+
+	get_num_dss_regs(xe, &g_dss_regs, &c_dss_regs);
+	dss_per_quad = 32 * max(g_dss_regs, c_dss_regs) / 4;
+
+	quad_first = xe_dss_mask_group_ffs(all_dss, dss_per_quad, quad);
+
+	return quad_first < (quad + 1) * dss_per_quad;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h
new file mode 100644
index 000000000000..d1b54fb52ea6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_topology.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_TOPOLOGY_H_
+#define _XE_GT_TOPOLOGY_H_
+
+#include "xe_gt_types.h"
+
+struct drm_printer;
+
+void xe_gt_topology_init(struct xe_gt *gt);
+
+void xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p);
+
+unsigned int
+xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum);
+
+bool xe_dss_mask_empty(const xe_dss_mask_t mask);
+
+bool
+xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad);
+
+#endif /* _XE_GT_TOPOLOGY_H_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
new file mode 100644
index 000000000000..f74684660475
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -0,0 +1,363 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022-2023 Intel Corporation
+ */
+
+#ifndef _XE_GT_TYPES_H_
+#define _XE_GT_TYPES_H_
+
+#include "xe_force_wake_types.h"
+#include "xe_gt_idle_types.h"
+#include "xe_hw_engine_types.h"
+#include "xe_hw_fence_types.h"
+#include "xe_reg_sr_types.h"
+#include "xe_sa_types.h"
+#include "xe_uc_types.h"
+
+struct xe_exec_queue_ops;
+struct xe_migrate;
+struct xe_ring_ops;
+
+enum xe_gt_type {
+	XE_GT_TYPE_UNINITIALIZED,
+	XE_GT_TYPE_MAIN,
+	XE_GT_TYPE_MEDIA,
+};
+
+#define XE_MAX_DSS_FUSE_REGS	3
+#define XE_MAX_EU_FUSE_REGS	1
+
+typedef unsigned long xe_dss_mask_t[BITS_TO_LONGS(32 * XE_MAX_DSS_FUSE_REGS)];
+typedef unsigned long xe_eu_mask_t[BITS_TO_LONGS(32 * XE_MAX_EU_FUSE_REGS)];
+
+struct xe_mmio_range {
+	u32 start;
+	u32 end;
+};
+
+/*
+ * The hardware has multiple kinds of multicast register ranges that need
+ * special register steering (and future platforms are expected to add
+ * additional types).
+ *
+ * During driver startup, we initialize the steering control register to
+ * direct reads to a slice/subslice that are valid for the 'subslice' class
+ * of multicast registers.  If another type of steering does not have any
+ * overlap in valid steering targets with 'subslice' style registers, we will
+ * need to explicitly re-steer reads of registers of the other type.
+ *
+ * Only the replication types that may need additional non-default steering
+ * are listed here.
+ */
+enum xe_steering_type {
+	L3BANK,
+	MSLICE,
+	LNCF,
+	DSS,
+	OADDRM,
+	SQIDI_PSMI,
+
+	/*
+	 * On some platforms there are multiple types of MCR registers that
+	 * will always return a non-terminated value at instance (0, 0).  We'll
+	 * lump those all into a single category to keep things simple.
+	 */
+	INSTANCE0,
+
+	/*
+	 * Register ranges that don't need special steering for each register:
+	 * it's sufficient to keep the HW-default for the selector, or only
+	 * change it once, on GT initialization. This needs to be the last
+	 * steering type.
+	 */
+	IMPLICIT_STEERING,
+	NUM_STEERING_TYPES
+};
+
+#define gt_to_tile(gt__)							\
+	_Generic(gt__,								\
+		 const struct xe_gt * : (const struct xe_tile *)((gt__)->tile),	\
+		 struct xe_gt * : (gt__)->tile)
+
+#define gt_to_xe(gt__)										\
+	_Generic(gt__,										\
+		 const struct xe_gt * : (const struct xe_device *)(gt_to_tile(gt__)->xe),	\
+		 struct xe_gt * : gt_to_tile(gt__)->xe)
+
+/**
+ * struct xe_gt - A "Graphics Technology" unit of the GPU
+ *
+ * A GT ("Graphics Technology") is the subset of a GPU primarily responsible
+ * for implementing the graphics, compute, and/or media IP.  It encapsulates
+ * the hardware engines, programmable execution units, and GuC.   Each GT has
+ * its own handling of power management (RC6+forcewake) and multicast register
+ * steering.
+ *
+ * A GPU/tile may have a single GT that supplies all graphics, compute, and
+ * media functionality, or the graphics/compute and media may be split into
+ * separate GTs within a tile.
+ */
+struct xe_gt {
+	/** @tile: Backpointer to GT's tile */
+	struct xe_tile *tile;
+
+	/** @info: GT info */
+	struct {
+		/** @type: type of GT */
+		enum xe_gt_type type;
+		/** @id: Unique ID of this GT within the PCI Device */
+		u8 id;
+		/** @reference_clock: clock frequency */
+		u32 reference_clock;
+		/** @engine_mask: mask of engines present on GT */
+		u64 engine_mask;
+		/**
+		 * @__engine_mask: mask of engines present on GT read from
+		 * xe_pci.c, used to fake reading the engine_mask from the
+		 * hwconfig blob.
+		 */
+		u64 __engine_mask;
+	} info;
+
+	/**
+	 * @mmio: mmio info for GT.  All GTs within a tile share the same
+	 * register space, but have their own copy of GSI registers at a
+	 * specific offset, as well as their own forcewake handling.
+	 */
+	struct {
+		/** @fw: force wake for GT */
+		struct xe_force_wake fw;
+		/**
+		 * @adj_limit: adjust MMIO address if address is below this
+		 * value
+		 */
+		u32 adj_limit;
+		/** @adj_offset: offect to add to MMIO address when adjusting */
+		u32 adj_offset;
+	} mmio;
+
+	/**
+	 * @reg_sr: table with registers to be restored on GT init/resume/reset
+	 */
+	struct xe_reg_sr reg_sr;
+
+	/** @reset: state for GT resets */
+	struct {
+		/**
+		 * @worker: work so GT resets can done async allowing to reset
+		 * code to safely flush all code paths
+		 */
+		struct work_struct worker;
+	} reset;
+
+	/** @tlb_invalidation: TLB invalidation state */
+	struct {
+		/** @seqno: TLB invalidation seqno, protected by CT lock */
+#define TLB_INVALIDATION_SEQNO_MAX	0x100000
+		int seqno;
+		/**
+		 * @seqno_recv: last received TLB invalidation seqno, protected by CT lock
+		 */
+		int seqno_recv;
+		/**
+		 * @pending_fences: list of pending fences waiting TLB
+		 * invaliations, protected by CT lock
+		 */
+		struct list_head pending_fences;
+		/**
+		 * @pending_lock: protects @pending_fences and updating
+		 * @seqno_recv.
+		 */
+		spinlock_t pending_lock;
+		/**
+		 * @fence_tdr: schedules a delayed call to
+		 * xe_gt_tlb_fence_timeout after the timeut interval is over.
+		 */
+		struct delayed_work fence_tdr;
+		/** @fence_context: context for TLB invalidation fences */
+		u64 fence_context;
+		/**
+		 * @fence_seqno: seqno to TLB invalidation fences, protected by
+		 * tlb_invalidation.lock
+		 */
+		u32 fence_seqno;
+		/** @lock: protects TLB invalidation fences */
+		spinlock_t lock;
+	} tlb_invalidation;
+
+	/**
+	 * @ccs_mode: Number of compute engines enabled.
+	 * Allows fixed mapping of available compute slices to compute engines.
+	 * By default only the first available compute engine is enabled and all
+	 * available compute slices are allocated to it.
+	 */
+	u32 ccs_mode;
+
+	/** @usm: unified shared memory state */
+	struct {
+		/**
+		 * @bb_pool: Pool from which batchbuffers, for USM operations
+		 * (e.g. migrations, fixing page tables), are allocated.
+		 * Dedicated pool needed so USM operations to not get blocked
+		 * behind any user operations which may have resulted in a
+		 * fault.
+		 */
+		struct xe_sa_manager *bb_pool;
+		/**
+		 * @reserved_bcs_instance: reserved BCS instance used for USM
+		 * operations (e.g. mmigrations, fixing page tables)
+		 */
+		u16 reserved_bcs_instance;
+		/** @pf_wq: page fault work queue, unbound, high priority */
+		struct workqueue_struct *pf_wq;
+		/** @acc_wq: access counter work queue, unbound, high priority */
+		struct workqueue_struct *acc_wq;
+		/**
+		 * @pf_queue: Page fault queue used to sync faults so faults can
+		 * be processed not under the GuC CT lock. The queue is sized so
+		 * it can sync all possible faults (1 per physical engine).
+		 * Multiple queues exists for page faults from different VMs are
+		 * be processed in parallel.
+		 */
+		struct pf_queue {
+			/** @gt: back pointer to GT */
+			struct xe_gt *gt;
+#define PF_QUEUE_NUM_DW	128
+			/** @data: data in the page fault queue */
+			u32 data[PF_QUEUE_NUM_DW];
+			/**
+			 * @head: head pointer in DWs for page fault queue,
+			 * moved by worker which processes faults.
+			 */
+			u16 head;
+			/**
+			 * @tail: tail pointer in DWs for page fault queue,
+			 * moved by G2H handler.
+			 */
+			u16 tail;
+			/** @lock: protects page fault queue */
+			spinlock_t lock;
+			/** @worker: to process page faults */
+			struct work_struct worker;
+#define NUM_PF_QUEUE	4
+		} pf_queue[NUM_PF_QUEUE];
+		/**
+		 * @acc_queue: Same as page fault queue, cannot process access
+		 * counters under CT lock.
+		 */
+		struct acc_queue {
+			/** @gt: back pointer to GT */
+			struct xe_gt *gt;
+#define ACC_QUEUE_NUM_DW	128
+			/** @data: data in the page fault queue */
+			u32 data[ACC_QUEUE_NUM_DW];
+			/**
+			 * @head: head pointer in DWs for page fault queue,
+			 * moved by worker which processes faults.
+			 */
+			u16 head;
+			/**
+			 * @tail: tail pointer in DWs for page fault queue,
+			 * moved by G2H handler.
+			 */
+			u16 tail;
+			/** @lock: protects page fault queue */
+			spinlock_t lock;
+			/** @worker: to process access counters */
+			struct work_struct worker;
+#define NUM_ACC_QUEUE	4
+		} acc_queue[NUM_ACC_QUEUE];
+	} usm;
+
+	/** @ordered_wq: used to serialize GT resets and TDRs */
+	struct workqueue_struct *ordered_wq;
+
+	/** @uc: micro controllers on the GT */
+	struct xe_uc uc;
+
+	/** @gtidle: idle properties of GT */
+	struct xe_gt_idle gtidle;
+
+	/** @exec_queue_ops: submission backend exec queue operations */
+	const struct xe_exec_queue_ops *exec_queue_ops;
+
+	/**
+	 * @ring_ops: ring operations for this hw engine (1 per engine class)
+	 */
+	const struct xe_ring_ops *ring_ops[XE_ENGINE_CLASS_MAX];
+
+	/** @fence_irq: fence IRQs (1 per engine class) */
+	struct xe_hw_fence_irq fence_irq[XE_ENGINE_CLASS_MAX];
+
+	/** @default_lrc: default LRC state */
+	void *default_lrc[XE_ENGINE_CLASS_MAX];
+
+	/** @hw_engines: hardware engines on the GT */
+	struct xe_hw_engine hw_engines[XE_NUM_HW_ENGINES];
+
+	/** @eclass: per hardware engine class interface on the GT */
+	struct xe_hw_engine_class_intf  eclass[XE_ENGINE_CLASS_MAX];
+
+	/** @pcode: GT's PCODE */
+	struct {
+		/** @lock: protecting GT's PCODE mailbox data */
+		struct mutex lock;
+	} pcode;
+
+	/** @sysfs: sysfs' kobj used by xe_gt_sysfs */
+	struct kobject *sysfs;
+
+	/** @freq: Main GT freq sysfs control */
+	struct kobject *freq;
+
+	/** @mocs: info */
+	struct {
+		/** @uc_index: UC index */
+		u8 uc_index;
+		/** @wb_index: WB index, only used on L3_CCS platforms */
+		u8 wb_index;
+	} mocs;
+
+	/** @fuse_topo: GT topology reported by fuse registers */
+	struct {
+		/** @g_dss_mask: dual-subslices usable by geometry */
+		xe_dss_mask_t g_dss_mask;
+
+		/** @c_dss_mask: dual-subslices usable by compute */
+		xe_dss_mask_t c_dss_mask;
+
+		/** @eu_mask_per_dss: EU mask per DSS*/
+		xe_eu_mask_t eu_mask_per_dss;
+	} fuse_topo;
+
+	/** @steering: register steering for individual HW units */
+	struct {
+		/* @ranges: register ranges used for this steering type */
+		const struct xe_mmio_range *ranges;
+
+		/** @group_target: target to steer accesses to */
+		u16 group_target;
+		/** @instance_target: instance to steer accesses to */
+		u16 instance_target;
+	} steering[NUM_STEERING_TYPES];
+
+	/**
+	 * @mcr_lock: protects the MCR_SELECTOR register for the duration
+	 *    of a steered operation
+	 */
+	spinlock_t mcr_lock;
+
+	/** @wa_active: keep track of active workarounds */
+	struct {
+		/** @gt: bitmap with active GT workarounds */
+		unsigned long *gt;
+		/** @engine: bitmap with active engine workarounds */
+		unsigned long *engine;
+		/** @lrc: bitmap with active LRC workarounds */
+		unsigned long *lrc;
+		/** @oob: bitmap with active OOB workaroudns */
+		unsigned long *oob;
+	} wa_active;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
new file mode 100644
index 000000000000..0a61390c64a7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -0,0 +1,916 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_guc.h"
+
+#include <drm/drm_managed.h>
+
+#include "abi/guc_actions_abi.h"
+#include "abi/guc_errors_abi.h"
+#include "generated/xe_wa_oob.h"
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_guc_regs.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_guc_ads.h"
+#include "xe_guc_ct.h"
+#include "xe_guc_hwconfig.h"
+#include "xe_guc_log.h"
+#include "xe_guc_pc.h"
+#include "xe_guc_submit.h"
+#include "xe_mmio.h"
+#include "xe_platform_types.h"
+#include "xe_uc.h"
+#include "xe_uc_fw.h"
+#include "xe_wa.h"
+#include "xe_wopcm.h"
+
+/* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */
+#define GUC_GGTT_TOP    0xFEE00000
+static u32 guc_bo_ggtt_addr(struct xe_guc *guc,
+			    struct xe_bo *bo)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	u32 addr = xe_bo_ggtt_addr(bo);
+
+	xe_assert(xe, addr >= xe_wopcm_size(guc_to_xe(guc)));
+	xe_assert(xe, addr < GUC_GGTT_TOP);
+	xe_assert(xe, bo->size <= GUC_GGTT_TOP - addr);
+
+	return addr;
+}
+
+static u32 guc_ctl_debug_flags(struct xe_guc *guc)
+{
+	u32 level = xe_guc_log_get_level(&guc->log);
+	u32 flags = 0;
+
+	if (!GUC_LOG_LEVEL_IS_VERBOSE(level))
+		flags |= GUC_LOG_DISABLED;
+	else
+		flags |= GUC_LOG_LEVEL_TO_VERBOSITY(level) <<
+			 GUC_LOG_VERBOSITY_SHIFT;
+
+	return flags;
+}
+
+static u32 guc_ctl_feature_flags(struct xe_guc *guc)
+{
+	u32 flags = 0;
+
+	if (!guc_to_xe(guc)->info.skip_guc_pc)
+		flags |= GUC_CTL_ENABLE_SLPC;
+
+	return flags;
+}
+
+static u32 guc_ctl_log_params_flags(struct xe_guc *guc)
+{
+	u32 offset = guc_bo_ggtt_addr(guc, guc->log.bo) >> PAGE_SHIFT;
+	u32 flags;
+
+	#if (((CRASH_BUFFER_SIZE) % SZ_1M) == 0)
+	#define LOG_UNIT SZ_1M
+	#define LOG_FLAG GUC_LOG_LOG_ALLOC_UNITS
+	#else
+	#define LOG_UNIT SZ_4K
+	#define LOG_FLAG 0
+	#endif
+
+	#if (((CAPTURE_BUFFER_SIZE) % SZ_1M) == 0)
+	#define CAPTURE_UNIT SZ_1M
+	#define CAPTURE_FLAG GUC_LOG_CAPTURE_ALLOC_UNITS
+	#else
+	#define CAPTURE_UNIT SZ_4K
+	#define CAPTURE_FLAG 0
+	#endif
+
+	BUILD_BUG_ON(!CRASH_BUFFER_SIZE);
+	BUILD_BUG_ON(!IS_ALIGNED(CRASH_BUFFER_SIZE, LOG_UNIT));
+	BUILD_BUG_ON(!DEBUG_BUFFER_SIZE);
+	BUILD_BUG_ON(!IS_ALIGNED(DEBUG_BUFFER_SIZE, LOG_UNIT));
+	BUILD_BUG_ON(!CAPTURE_BUFFER_SIZE);
+	BUILD_BUG_ON(!IS_ALIGNED(CAPTURE_BUFFER_SIZE, CAPTURE_UNIT));
+
+	BUILD_BUG_ON((CRASH_BUFFER_SIZE / LOG_UNIT - 1) >
+			(GUC_LOG_CRASH_MASK >> GUC_LOG_CRASH_SHIFT));
+	BUILD_BUG_ON((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) >
+			(GUC_LOG_DEBUG_MASK >> GUC_LOG_DEBUG_SHIFT));
+	BUILD_BUG_ON((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) >
+			(GUC_LOG_CAPTURE_MASK >> GUC_LOG_CAPTURE_SHIFT));
+
+	flags = GUC_LOG_VALID |
+		GUC_LOG_NOTIFY_ON_HALF_FULL |
+		CAPTURE_FLAG |
+		LOG_FLAG |
+		((CRASH_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_CRASH_SHIFT) |
+		((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_DEBUG_SHIFT) |
+		((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) <<
+		 GUC_LOG_CAPTURE_SHIFT) |
+		(offset << GUC_LOG_BUF_ADDR_SHIFT);
+
+	#undef LOG_UNIT
+	#undef LOG_FLAG
+	#undef CAPTURE_UNIT
+	#undef CAPTURE_FLAG
+
+	return flags;
+}
+
+static u32 guc_ctl_ads_flags(struct xe_guc *guc)
+{
+	u32 ads = guc_bo_ggtt_addr(guc, guc->ads.bo) >> PAGE_SHIFT;
+	u32 flags = ads << GUC_ADS_ADDR_SHIFT;
+
+	return flags;
+}
+
+static u32 guc_ctl_wa_flags(struct xe_guc *guc)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_gt *gt = guc_to_gt(guc);
+	u32 flags = 0;
+
+	if (XE_WA(gt, 22012773006))
+		flags |= GUC_WA_POLLCS;
+
+	if (XE_WA(gt, 16011759253))
+		flags |= GUC_WA_GAM_CREDITS;
+
+	if (XE_WA(gt, 14014475959))
+		flags |= GUC_WA_HOLD_CCS_SWITCHOUT;
+
+	if (XE_WA(gt, 22011391025) || XE_WA(gt, 14012197797))
+		flags |= GUC_WA_DUAL_QUEUE;
+
+	/*
+	 * Wa_22011802037: FIXME - there's more to be done than simply setting
+	 * this flag: make sure each CS is stopped when preparing for GT reset
+	 * and wait for pending MI_FW.
+	 */
+	if (GRAPHICS_VERx100(xe) < 1270)
+		flags |= GUC_WA_PRE_PARSER;
+
+	if (XE_WA(gt, 16011777198))
+		flags |= GUC_WA_RCS_RESET_BEFORE_RC6;
+
+	if (XE_WA(gt, 22012727170) || XE_WA(gt, 22012727685))
+		flags |= GUC_WA_CONTEXT_ISOLATION;
+
+	if ((XE_WA(gt, 16015675438) || XE_WA(gt, 18020744125)) &&
+	    !xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_RENDER))
+		flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST;
+
+	if (XE_WA(gt, 1509372804))
+		flags |= GUC_WA_RENDER_RST_RC6_EXIT;
+
+	return flags;
+}
+
+static u32 guc_ctl_devid(struct xe_guc *guc)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+
+	return (((u32)xe->info.devid) << 16) | xe->info.revid;
+}
+
+static void guc_init_params(struct xe_guc *guc)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	u32 *params = guc->params;
+	int i;
+
+	BUILD_BUG_ON(sizeof(guc->params) != GUC_CTL_MAX_DWORDS * sizeof(u32));
+	BUILD_BUG_ON(GUC_CTL_MAX_DWORDS + 2 != SOFT_SCRATCH_COUNT);
+
+	params[GUC_CTL_LOG_PARAMS] = guc_ctl_log_params_flags(guc);
+	params[GUC_CTL_FEATURE] = 0;
+	params[GUC_CTL_DEBUG] = guc_ctl_debug_flags(guc);
+	params[GUC_CTL_ADS] = guc_ctl_ads_flags(guc);
+	params[GUC_CTL_WA] = 0;
+	params[GUC_CTL_DEVID] = guc_ctl_devid(guc);
+
+	for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
+		drm_dbg(&xe->drm, "GuC param[%2d] = 0x%08x\n", i, params[i]);
+}
+
+static void guc_init_params_post_hwconfig(struct xe_guc *guc)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	u32 *params = guc->params;
+	int i;
+
+	BUILD_BUG_ON(sizeof(guc->params) != GUC_CTL_MAX_DWORDS * sizeof(u32));
+	BUILD_BUG_ON(GUC_CTL_MAX_DWORDS + 2 != SOFT_SCRATCH_COUNT);
+
+	params[GUC_CTL_LOG_PARAMS] = guc_ctl_log_params_flags(guc);
+	params[GUC_CTL_FEATURE] = guc_ctl_feature_flags(guc);
+	params[GUC_CTL_DEBUG] = guc_ctl_debug_flags(guc);
+	params[GUC_CTL_ADS] = guc_ctl_ads_flags(guc);
+	params[GUC_CTL_WA] = guc_ctl_wa_flags(guc);
+	params[GUC_CTL_DEVID] = guc_ctl_devid(guc);
+
+	for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
+		drm_dbg(&xe->drm, "GuC param[%2d] = 0x%08x\n", i, params[i]);
+}
+
+/*
+ * Initialize the GuC parameter block before starting the firmware
+ * transfer. These parameters are read by the firmware on startup
+ * and cannot be changed thereafter.
+ */
+static void guc_write_params(struct xe_guc *guc)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	int i;
+
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+
+	xe_mmio_write32(gt, SOFT_SCRATCH(0), 0);
+
+	for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
+		xe_mmio_write32(gt, SOFT_SCRATCH(1 + i), guc->params[i]);
+}
+
+static void guc_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_guc *guc = arg;
+
+	xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
+	xe_guc_pc_fini(&guc->pc);
+	xe_uc_fini_hw(&guc_to_gt(guc)->uc);
+	xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
+}
+
+int xe_guc_init(struct xe_guc *guc)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_gt *gt = guc_to_gt(guc);
+	int ret;
+
+	guc->fw.type = XE_UC_FW_TYPE_GUC;
+	ret = xe_uc_fw_init(&guc->fw);
+	if (ret)
+		goto out;
+
+	if (!xe_uc_fw_is_enabled(&guc->fw))
+		return 0;
+
+	ret = xe_guc_log_init(&guc->log);
+	if (ret)
+		goto out;
+
+	ret = xe_guc_ads_init(&guc->ads);
+	if (ret)
+		goto out;
+
+	ret = xe_guc_ct_init(&guc->ct);
+	if (ret)
+		goto out;
+
+	ret = xe_guc_pc_init(&guc->pc);
+	if (ret)
+		goto out;
+
+	ret = drmm_add_action_or_reset(&gt_to_xe(gt)->drm, guc_fini, guc);
+	if (ret)
+		goto out;
+
+	guc_init_params(guc);
+
+	if (xe_gt_is_media_type(gt))
+		guc->notify_reg = MED_GUC_HOST_INTERRUPT;
+	else
+		guc->notify_reg = GUC_HOST_INTERRUPT;
+
+	xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE);
+
+	return 0;
+
+out:
+	drm_err(&xe->drm, "GuC init failed with %d", ret);
+	return ret;
+}
+
+/**
+ * xe_guc_init_post_hwconfig - initialize GuC post hwconfig load
+ * @guc: The GuC object
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_guc_init_post_hwconfig(struct xe_guc *guc)
+{
+	guc_init_params_post_hwconfig(guc);
+
+	return xe_guc_ads_init_post_hwconfig(&guc->ads);
+}
+
+int xe_guc_post_load_init(struct xe_guc *guc)
+{
+	xe_guc_ads_populate_post_load(&guc->ads);
+	guc->submission_state.enabled = true;
+
+	return 0;
+}
+
+int xe_guc_reset(struct xe_guc *guc)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_gt *gt = guc_to_gt(guc);
+	u32 guc_status, gdrst;
+	int ret;
+
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+
+	xe_mmio_write32(gt, GDRST, GRDOM_GUC);
+
+	ret = xe_mmio_wait32(gt, GDRST, GRDOM_GUC, 0, 5000, &gdrst, false);
+	if (ret) {
+		drm_err(&xe->drm, "GuC reset timed out, GDRST=0x%8x\n",
+			gdrst);
+		goto err_out;
+	}
+
+	guc_status = xe_mmio_read32(gt, GUC_STATUS);
+	if (!(guc_status & GS_MIA_IN_RESET)) {
+		drm_err(&xe->drm,
+			"GuC status: 0x%x, MIA core expected to be in reset\n",
+			guc_status);
+		ret = -EIO;
+		goto err_out;
+	}
+
+	return 0;
+
+err_out:
+
+	return ret;
+}
+
+static void guc_prepare_xfer(struct xe_guc *guc)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	struct xe_device *xe =  guc_to_xe(guc);
+	u32 shim_flags = GUC_ENABLE_READ_CACHE_LOGIC |
+		GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA |
+		GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA |
+		GUC_ENABLE_MIA_CLOCK_GATING;
+
+	if (GRAPHICS_VERx100(xe) < 1250)
+		shim_flags |= GUC_DISABLE_SRAM_INIT_TO_ZEROES |
+				GUC_ENABLE_MIA_CACHING;
+
+	if (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC)
+		shim_flags |= REG_FIELD_PREP(GUC_MOCS_INDEX_MASK, gt->mocs.uc_index);
+
+	/* Must program this register before loading the ucode with DMA */
+	xe_mmio_write32(gt, GUC_SHIM_CONTROL, shim_flags);
+
+	xe_mmio_write32(gt, GT_PM_CONFIG, GT_DOORBELL_ENABLE);
+}
+
+/*
+ * Supporting MMIO & in memory RSA
+ */
+static int guc_xfer_rsa(struct xe_guc *guc)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	u32 rsa[UOS_RSA_SCRATCH_COUNT];
+	size_t copied;
+	int i;
+
+	if (guc->fw.rsa_size > 256) {
+		u32 rsa_ggtt_addr = xe_bo_ggtt_addr(guc->fw.bo) +
+				    xe_uc_fw_rsa_offset(&guc->fw);
+		xe_mmio_write32(gt, UOS_RSA_SCRATCH(0), rsa_ggtt_addr);
+		return 0;
+	}
+
+	copied = xe_uc_fw_copy_rsa(&guc->fw, rsa, sizeof(rsa));
+	if (copied < sizeof(rsa))
+		return -ENOMEM;
+
+	for (i = 0; i < UOS_RSA_SCRATCH_COUNT; i++)
+		xe_mmio_write32(gt, UOS_RSA_SCRATCH(i), rsa[i]);
+
+	return 0;
+}
+
+static int guc_wait_ucode(struct xe_guc *guc)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	u32 status;
+	int ret;
+
+	/*
+	 * Wait for the GuC to start up.
+	 * NB: Docs recommend not using the interrupt for completion.
+	 * Measurements indicate this should take no more than 20ms
+	 * (assuming the GT clock is at maximum frequency). So, a
+	 * timeout here indicates that the GuC has failed and is unusable.
+	 * (Higher levels of the driver may decide to reset the GuC and
+	 * attempt the ucode load again if this happens.)
+	 *
+	 * FIXME: There is a known (but exceedingly unlikely) race condition
+	 * where the asynchronous frequency management code could reduce
+	 * the GT clock while a GuC reload is in progress (during a full
+	 * GT reset). A fix is in progress but there are complex locking
+	 * issues to be resolved. In the meantime bump the timeout to
+	 * 200ms. Even at slowest clock, this should be sufficient. And
+	 * in the working case, a larger timeout makes no difference.
+	 */
+	ret = xe_mmio_wait32(guc_to_gt(guc), GUC_STATUS, GS_UKERNEL_MASK,
+			     FIELD_PREP(GS_UKERNEL_MASK, XE_GUC_LOAD_STATUS_READY),
+			     200000, &status, false);
+
+	if (ret) {
+		struct drm_device *drm = &xe->drm;
+		struct drm_printer p = drm_info_printer(drm->dev);
+
+		drm_info(drm, "GuC load failed: status = 0x%08X\n", status);
+		drm_info(drm, "GuC load failed: status: Reset = %d, BootROM = 0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n",
+			 REG_FIELD_GET(GS_MIA_IN_RESET, status),
+			 REG_FIELD_GET(GS_BOOTROM_MASK, status),
+			 REG_FIELD_GET(GS_UKERNEL_MASK, status),
+			 REG_FIELD_GET(GS_MIA_MASK, status),
+			 REG_FIELD_GET(GS_AUTH_STATUS_MASK, status));
+
+		if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) {
+			drm_info(drm, "GuC firmware signature verification failed\n");
+			ret = -ENOEXEC;
+		}
+
+		if (REG_FIELD_GET(GS_UKERNEL_MASK, status) ==
+		    XE_GUC_LOAD_STATUS_EXCEPTION) {
+			drm_info(drm, "GuC firmware exception. EIP: %#x\n",
+				 xe_mmio_read32(guc_to_gt(guc),
+						SOFT_SCRATCH(13)));
+			ret = -ENXIO;
+		}
+
+		xe_guc_log_print(&guc->log, &p);
+	} else {
+		drm_dbg(&xe->drm, "GuC successfully loaded");
+	}
+
+	return ret;
+}
+
+static int __xe_guc_upload(struct xe_guc *guc)
+{
+	int ret;
+
+	guc_write_params(guc);
+	guc_prepare_xfer(guc);
+
+	/*
+	 * Note that GuC needs the CSS header plus uKernel code to be copied
+	 * by the DMA engine in one operation, whereas the RSA signature is
+	 * loaded separately, either by copying it to the UOS_RSA_SCRATCH
+	 * register (if key size <= 256) or through a ggtt-pinned vma (if key
+	 * size > 256). The RSA size and therefore the way we provide it to the
+	 * HW is fixed for each platform and hard-coded in the bootrom.
+	 */
+	ret = guc_xfer_rsa(guc);
+	if (ret)
+		goto out;
+	/*
+	 * Current uCode expects the code to be loaded at 8k; locations below
+	 * this are used for the stack.
+	 */
+	ret = xe_uc_fw_upload(&guc->fw, 0x2000, UOS_MOVE);
+	if (ret)
+		goto out;
+
+	/* Wait for authentication */
+	ret = guc_wait_ucode(guc);
+	if (ret)
+		goto out;
+
+	xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_RUNNING);
+	return 0;
+
+out:
+	xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOAD_FAIL);
+	return 0	/* FIXME: ret, don't want to stop load currently */;
+}
+
+/**
+ * xe_guc_min_load_for_hwconfig - load minimal GuC and read hwconfig table
+ * @guc: The GuC object
+ *
+ * This function uploads a minimal GuC that does not support submissions but
+ * in a state where the hwconfig table can be read. Next, it reads and parses
+ * the hwconfig table so it can be used for subsequent steps in the driver load.
+ * Lastly, it enables CT communication (XXX: this is needed for PFs/VFs only).
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_guc_min_load_for_hwconfig(struct xe_guc *guc)
+{
+	int ret;
+
+	xe_guc_ads_populate_minimal(&guc->ads);
+
+	ret = __xe_guc_upload(guc);
+	if (ret)
+		return ret;
+
+	ret = xe_guc_hwconfig_init(guc);
+	if (ret)
+		return ret;
+
+	ret = xe_guc_enable_communication(guc);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+int xe_guc_upload(struct xe_guc *guc)
+{
+	xe_guc_ads_populate(&guc->ads);
+
+	return __xe_guc_upload(guc);
+}
+
+static void guc_handle_mmio_msg(struct xe_guc *guc)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	u32 msg;
+
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+
+	msg = xe_mmio_read32(gt, SOFT_SCRATCH(15));
+	msg &= XE_GUC_RECV_MSG_EXCEPTION |
+		XE_GUC_RECV_MSG_CRASH_DUMP_POSTED;
+	xe_mmio_write32(gt, SOFT_SCRATCH(15), 0);
+
+	if (msg & XE_GUC_RECV_MSG_CRASH_DUMP_POSTED)
+		drm_err(&guc_to_xe(guc)->drm,
+			"Received early GuC crash dump notification!\n");
+
+	if (msg & XE_GUC_RECV_MSG_EXCEPTION)
+		drm_err(&guc_to_xe(guc)->drm,
+			"Received early GuC exception notification!\n");
+}
+
+static void guc_enable_irq(struct xe_guc *guc)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	u32 events = xe_gt_is_media_type(gt) ?
+		REG_FIELD_PREP(ENGINE0_MASK, GUC_INTR_GUC2HOST)  :
+		REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST);
+
+	/* Primary GuC and media GuC share a single enable bit */
+	xe_mmio_write32(gt, GUC_SG_INTR_ENABLE,
+			REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST));
+
+	/*
+	 * There are separate mask bits for primary and media GuCs, so use
+	 * a RMW operation to avoid clobbering the other GuC's setting.
+	 */
+	xe_mmio_rmw32(gt, GUC_SG_INTR_MASK, events, 0);
+}
+
+int xe_guc_enable_communication(struct xe_guc *guc)
+{
+	int err;
+
+	guc_enable_irq(guc);
+
+	xe_mmio_rmw32(guc_to_gt(guc), PMINTRMSK,
+		      ARAT_EXPIRED_INTRMSK, 0);
+
+	err = xe_guc_ct_enable(&guc->ct);
+	if (err)
+		return err;
+
+	guc_handle_mmio_msg(guc);
+
+	return 0;
+}
+
+int xe_guc_suspend(struct xe_guc *guc)
+{
+	int ret;
+	u32 action[] = {
+		XE_GUC_ACTION_CLIENT_SOFT_RESET,
+	};
+
+	ret = xe_guc_mmio_send(guc, action, ARRAY_SIZE(action));
+	if (ret) {
+		drm_err(&guc_to_xe(guc)->drm,
+			"GuC suspend: CLIENT_SOFT_RESET fail: %d!\n", ret);
+		return ret;
+	}
+
+	xe_guc_sanitize(guc);
+	return 0;
+}
+
+void xe_guc_notify(struct xe_guc *guc)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	const u32 default_notify_data = 0;
+
+	/*
+	 * Both GUC_HOST_INTERRUPT and MED_GUC_HOST_INTERRUPT can pass
+	 * additional payload data to the GuC but this capability is not
+	 * used by the firmware yet. Use default value in the meantime.
+	 */
+	xe_mmio_write32(gt, guc->notify_reg, default_notify_data);
+}
+
+int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr)
+{
+	u32 action[] = {
+		XE_GUC_ACTION_AUTHENTICATE_HUC,
+		rsa_addr
+	};
+
+	return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action));
+}
+
+int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request,
+			  u32 len, u32 *response_buf)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_gt *gt = guc_to_gt(guc);
+	u32 header, reply;
+	struct xe_reg reply_reg = xe_gt_is_media_type(gt) ?
+		MED_VF_SW_FLAG(0) : VF_SW_FLAG(0);
+	const u32 LAST_INDEX = VF_SW_FLAG_COUNT - 1;
+	int ret;
+	int i;
+
+	BUILD_BUG_ON(VF_SW_FLAG_COUNT != MED_VF_SW_FLAG_COUNT);
+
+	xe_assert(xe, !guc->ct.enabled);
+	xe_assert(xe, len);
+	xe_assert(xe, len <= VF_SW_FLAG_COUNT);
+	xe_assert(xe, len <= MED_VF_SW_FLAG_COUNT);
+	xe_assert(xe, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, request[0]) ==
+		  GUC_HXG_ORIGIN_HOST);
+	xe_assert(xe, FIELD_GET(GUC_HXG_MSG_0_TYPE, request[0]) ==
+		  GUC_HXG_TYPE_REQUEST);
+
+retry:
+	/* Not in critical data-path, just do if else for GT type */
+	if (xe_gt_is_media_type(gt)) {
+		for (i = 0; i < len; ++i)
+			xe_mmio_write32(gt, MED_VF_SW_FLAG(i),
+					request[i]);
+		xe_mmio_read32(gt, MED_VF_SW_FLAG(LAST_INDEX));
+	} else {
+		for (i = 0; i < len; ++i)
+			xe_mmio_write32(gt, VF_SW_FLAG(i),
+					request[i]);
+		xe_mmio_read32(gt, VF_SW_FLAG(LAST_INDEX));
+	}
+
+	xe_guc_notify(guc);
+
+	ret = xe_mmio_wait32(gt, reply_reg, GUC_HXG_MSG_0_ORIGIN,
+			     FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_GUC),
+			     50000, &reply, false);
+	if (ret) {
+timeout:
+		drm_err(&xe->drm, "mmio request %#x: no reply %#x\n",
+			request[0], reply);
+		return ret;
+	}
+
+	header = xe_mmio_read32(gt, reply_reg);
+	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) ==
+	    GUC_HXG_TYPE_NO_RESPONSE_BUSY) {
+		/*
+		 * Once we got a BUSY reply we must wait again for the final
+		 * response but this time we can't use ORIGIN mask anymore.
+		 * To spot a right change in the reply, we take advantage that
+		 * response SUCCESS and FAILURE differ only by the single bit
+		 * and all other bits are set and can be used as a new mask.
+		 */
+		u32 resp_bits = GUC_HXG_TYPE_RESPONSE_SUCCESS & GUC_HXG_TYPE_RESPONSE_FAILURE;
+		u32 resp_mask = FIELD_PREP(GUC_HXG_MSG_0_TYPE, resp_bits);
+
+		BUILD_BUG_ON(FIELD_MAX(GUC_HXG_MSG_0_TYPE) != GUC_HXG_TYPE_RESPONSE_SUCCESS);
+		BUILD_BUG_ON((GUC_HXG_TYPE_RESPONSE_SUCCESS ^ GUC_HXG_TYPE_RESPONSE_FAILURE) != 1);
+
+		ret = xe_mmio_wait32(gt, reply_reg,  resp_mask, resp_mask,
+				     1000000, &header, false);
+
+		if (unlikely(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) !=
+			     GUC_HXG_ORIGIN_GUC))
+			goto proto;
+		if (unlikely(ret))
+			goto timeout;
+	}
+
+	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) ==
+	    GUC_HXG_TYPE_NO_RESPONSE_RETRY) {
+		u32 reason = FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, header);
+
+		drm_dbg(&xe->drm, "mmio request %#x: retrying, reason %#x\n",
+			request[0], reason);
+		goto retry;
+	}
+
+	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) ==
+	    GUC_HXG_TYPE_RESPONSE_FAILURE) {
+		u32 hint = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, header);
+		u32 error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, header);
+
+		drm_err(&xe->drm, "mmio request %#x: failure %#x/%#x\n",
+			request[0], error, hint);
+		return -ENXIO;
+	}
+
+	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) !=
+	    GUC_HXG_TYPE_RESPONSE_SUCCESS) {
+proto:
+		drm_err(&xe->drm, "mmio request %#x: unexpected reply %#x\n",
+			request[0], header);
+		return -EPROTO;
+	}
+
+	/* Just copy entire possible message response */
+	if (response_buf) {
+		response_buf[0] = header;
+
+		for (i = 1; i < VF_SW_FLAG_COUNT; i++) {
+			reply_reg.addr += sizeof(u32);
+			response_buf[i] = xe_mmio_read32(gt, reply_reg);
+		}
+	}
+
+	/* Use data from the GuC response as our return value */
+	return FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, header);
+}
+
+int xe_guc_mmio_send(struct xe_guc *guc, const u32 *request, u32 len)
+{
+	return xe_guc_mmio_send_recv(guc, request, len, NULL);
+}
+
+static int guc_self_cfg(struct xe_guc *guc, u16 key, u16 len, u64 val)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	u32 request[HOST2GUC_SELF_CFG_REQUEST_MSG_LEN] = {
+		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION,
+			   GUC_ACTION_HOST2GUC_SELF_CFG),
+		FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY, key) |
+		FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_LEN, len),
+		FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_2_VALUE32,
+			   lower_32_bits(val)),
+		FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_3_VALUE64,
+			   upper_32_bits(val)),
+	};
+	int ret;
+
+	xe_assert(xe, len <= 2);
+	xe_assert(xe, len != 1 || !upper_32_bits(val));
+
+	/* Self config must go over MMIO */
+	ret = xe_guc_mmio_send(guc, request, ARRAY_SIZE(request));
+
+	if (unlikely(ret < 0))
+		return ret;
+	if (unlikely(ret > 1))
+		return -EPROTO;
+	if (unlikely(!ret))
+		return -ENOKEY;
+
+	return 0;
+}
+
+int xe_guc_self_cfg32(struct xe_guc *guc, u16 key, u32 val)
+{
+	return guc_self_cfg(guc, key, 1, val);
+}
+
+int xe_guc_self_cfg64(struct xe_guc *guc, u16 key, u64 val)
+{
+	return guc_self_cfg(guc, key, 2, val);
+}
+
+void xe_guc_irq_handler(struct xe_guc *guc, const u16 iir)
+{
+	if (iir & GUC_INTR_GUC2HOST)
+		xe_guc_ct_irq_handler(&guc->ct);
+}
+
+void xe_guc_sanitize(struct xe_guc *guc)
+{
+	xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE);
+	xe_guc_ct_disable(&guc->ct);
+	guc->submission_state.enabled = false;
+}
+
+int xe_guc_reset_prepare(struct xe_guc *guc)
+{
+	return xe_guc_submit_reset_prepare(guc);
+}
+
+void xe_guc_reset_wait(struct xe_guc *guc)
+{
+	xe_guc_submit_reset_wait(guc);
+}
+
+void xe_guc_stop_prepare(struct xe_guc *guc)
+{
+	XE_WARN_ON(xe_guc_pc_stop(&guc->pc));
+}
+
+int xe_guc_stop(struct xe_guc *guc)
+{
+	int ret;
+
+	xe_guc_ct_disable(&guc->ct);
+
+	ret = xe_guc_submit_stop(guc);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+int xe_guc_start(struct xe_guc *guc)
+{
+	int ret;
+
+	ret = xe_guc_pc_start(&guc->pc);
+	XE_WARN_ON(ret);
+
+	return xe_guc_submit_start(guc);
+}
+
+void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	u32 status;
+	int err;
+	int i;
+
+	xe_uc_fw_print(&guc->fw, p);
+
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		return;
+
+	status = xe_mmio_read32(gt, GUC_STATUS);
+
+	drm_printf(p, "\nGuC status 0x%08x:\n", status);
+	drm_printf(p, "\tBootrom status = 0x%x\n",
+		   REG_FIELD_GET(GS_BOOTROM_MASK, status));
+	drm_printf(p, "\tuKernel status = 0x%x\n",
+		   REG_FIELD_GET(GS_UKERNEL_MASK, status));
+	drm_printf(p, "\tMIA Core status = 0x%x\n",
+		   REG_FIELD_GET(GS_MIA_MASK, status));
+	drm_printf(p, "\tLog level = %d\n",
+		   xe_guc_log_get_level(&guc->log));
+
+	drm_puts(p, "\nScratch registers:\n");
+	for (i = 0; i < SOFT_SCRATCH_COUNT; i++) {
+		drm_printf(p, "\t%2d: \t0x%x\n",
+			   i, xe_mmio_read32(gt, SOFT_SCRATCH(i)));
+	}
+
+	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+
+	xe_guc_ct_print(&guc->ct, p, false);
+	xe_guc_submit_print(guc, p);
+}
+
+/**
+ * xe_guc_in_reset() - Detect if GuC MIA is in reset.
+ * @guc: The GuC object
+ *
+ * This function detects runtime resume from d3cold by leveraging
+ * GUC_STATUS, GUC doesn't get reset during d3hot,
+ * it strictly to be called from RPM resume handler.
+ *
+ * Return: true if failed to get forcewake or GuC MIA is in Reset,
+ * otherwise false.
+ */
+bool xe_guc_in_reset(struct xe_guc *guc)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	u32 status;
+	int err;
+
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		return true;
+
+	status = xe_mmio_read32(gt, GUC_STATUS);
+	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+
+	return  status & GS_MIA_IN_RESET;
+}
diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h
new file mode 100644
index 000000000000..d3e49e7fd7c3
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_H_
+#define _XE_GUC_H_
+
+#include "xe_gt.h"
+#include "xe_guc_types.h"
+#include "xe_hw_engine_types.h"
+#include "xe_macros.h"
+
+struct drm_printer;
+
+int xe_guc_init(struct xe_guc *guc);
+int xe_guc_init_post_hwconfig(struct xe_guc *guc);
+int xe_guc_post_load_init(struct xe_guc *guc);
+int xe_guc_reset(struct xe_guc *guc);
+int xe_guc_upload(struct xe_guc *guc);
+int xe_guc_min_load_for_hwconfig(struct xe_guc *guc);
+int xe_guc_enable_communication(struct xe_guc *guc);
+int xe_guc_suspend(struct xe_guc *guc);
+void xe_guc_notify(struct xe_guc *guc);
+int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr);
+int xe_guc_mmio_send(struct xe_guc *guc, const u32 *request, u32 len);
+int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request, u32 len,
+			  u32 *response_buf);
+int xe_guc_self_cfg32(struct xe_guc *guc, u16 key, u32 val);
+int xe_guc_self_cfg64(struct xe_guc *guc, u16 key, u64 val);
+void xe_guc_irq_handler(struct xe_guc *guc, const u16 iir);
+void xe_guc_sanitize(struct xe_guc *guc);
+void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p);
+int xe_guc_reset_prepare(struct xe_guc *guc);
+void xe_guc_reset_wait(struct xe_guc *guc);
+void xe_guc_stop_prepare(struct xe_guc *guc);
+int xe_guc_stop(struct xe_guc *guc);
+int xe_guc_start(struct xe_guc *guc);
+bool xe_guc_in_reset(struct xe_guc *guc);
+
+static inline u16 xe_engine_class_to_guc_class(enum xe_engine_class class)
+{
+	switch (class) {
+	case XE_ENGINE_CLASS_RENDER:
+		return GUC_RENDER_CLASS;
+	case XE_ENGINE_CLASS_VIDEO_DECODE:
+		return GUC_VIDEO_CLASS;
+	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+		return GUC_VIDEOENHANCE_CLASS;
+	case XE_ENGINE_CLASS_COPY:
+		return GUC_BLITTER_CLASS;
+	case XE_ENGINE_CLASS_COMPUTE:
+		return GUC_COMPUTE_CLASS;
+	case XE_ENGINE_CLASS_OTHER:
+		return GUC_GSC_OTHER_CLASS;
+	default:
+		XE_WARN_ON(class);
+		return -1;
+	}
+}
+
+static inline struct xe_gt *guc_to_gt(struct xe_guc *guc)
+{
+	return container_of(guc, struct xe_gt, uc.guc);
+}
+
+static inline struct xe_device *guc_to_xe(struct xe_guc *guc)
+{
+	return gt_to_xe(guc_to_gt(guc));
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
new file mode 100644
index 000000000000..390e6f1bf4e1
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -0,0 +1,672 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_guc_ads.h"
+
+#include <drm/drm_managed.h>
+
+#include "regs/xe_engine_regs.h"
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_guc_regs.h"
+#include "xe_bo.h"
+#include "xe_gt.h"
+#include "xe_gt_ccs_mode.h"
+#include "xe_guc.h"
+#include "xe_hw_engine.h"
+#include "xe_lrc.h"
+#include "xe_map.h"
+#include "xe_mmio.h"
+#include "xe_platform_types.h"
+
+/* Slack of a few additional entries per engine */
+#define ADS_REGSET_EXTRA_MAX	8
+
+static struct xe_guc *
+ads_to_guc(struct xe_guc_ads *ads)
+{
+	return container_of(ads, struct xe_guc, ads);
+}
+
+static struct xe_gt *
+ads_to_gt(struct xe_guc_ads *ads)
+{
+	return container_of(ads, struct xe_gt, uc.guc.ads);
+}
+
+static struct xe_device *
+ads_to_xe(struct xe_guc_ads *ads)
+{
+	return gt_to_xe(ads_to_gt(ads));
+}
+
+static struct iosys_map *
+ads_to_map(struct xe_guc_ads *ads)
+{
+	return &ads->bo->vmap;
+}
+
+/* UM Queue parameters: */
+#define GUC_UM_QUEUE_SIZE       (SZ_64K)
+#define GUC_PAGE_RES_TIMEOUT_US (-1)
+
+/*
+ * The Additional Data Struct (ADS) has pointers for different buffers used by
+ * the GuC. One single gem object contains the ADS struct itself (guc_ads) and
+ * all the extra buffers indirectly linked via the ADS struct's entries.
+ *
+ * Layout of the ADS blob allocated for the GuC:
+ *
+ *      +---------------------------------------+ <== base
+ *      | guc_ads                               |
+ *      +---------------------------------------+
+ *      | guc_policies                          |
+ *      +---------------------------------------+
+ *      | guc_gt_system_info                    |
+ *      +---------------------------------------+
+ *      | guc_engine_usage                      |
+ *      +---------------------------------------+
+ *      | guc_um_init_params                    |
+ *      +---------------------------------------+ <== static
+ *      | guc_mmio_reg[countA] (engine 0.0)     |
+ *      | guc_mmio_reg[countB] (engine 0.1)     |
+ *      | guc_mmio_reg[countC] (engine 1.0)     |
+ *      |   ...                                 |
+ *      +---------------------------------------+ <== dynamic
+ *      | padding                               |
+ *      +---------------------------------------+ <== 4K aligned
+ *      | golden contexts                       |
+ *      +---------------------------------------+
+ *      | padding                               |
+ *      +---------------------------------------+ <== 4K aligned
+ *      | capture lists                         |
+ *      +---------------------------------------+
+ *      | padding                               |
+ *      +---------------------------------------+ <== 4K aligned
+ *      | UM queues                             |
+ *      +---------------------------------------+
+ *      | padding                               |
+ *      +---------------------------------------+ <== 4K aligned
+ *      | private data                          |
+ *      +---------------------------------------+
+ *      | padding                               |
+ *      +---------------------------------------+ <== 4K aligned
+ */
+struct __guc_ads_blob {
+	struct guc_ads ads;
+	struct guc_policies policies;
+	struct guc_gt_system_info system_info;
+	struct guc_engine_usage engine_usage;
+	struct guc_um_init_params um_init_params;
+	/* From here on, location is dynamic! Refer to above diagram. */
+	struct guc_mmio_reg regset[0];
+} __packed;
+
+#define ads_blob_read(ads_, field_) \
+	xe_map_rd_field(ads_to_xe(ads_), ads_to_map(ads_), 0, \
+			struct __guc_ads_blob, field_)
+
+#define ads_blob_write(ads_, field_, val_)			\
+	xe_map_wr_field(ads_to_xe(ads_), ads_to_map(ads_), 0,	\
+			struct __guc_ads_blob, field_, val_)
+
+#define info_map_write(xe_, map_, field_, val_) \
+	xe_map_wr_field(xe_, map_, 0, struct guc_gt_system_info, field_, val_)
+
+#define info_map_read(xe_, map_, field_) \
+	xe_map_rd_field(xe_, map_, 0, struct guc_gt_system_info, field_)
+
+static size_t guc_ads_regset_size(struct xe_guc_ads *ads)
+{
+	struct xe_device *xe = ads_to_xe(ads);
+
+	xe_assert(xe, ads->regset_size);
+
+	return ads->regset_size;
+}
+
+static size_t guc_ads_golden_lrc_size(struct xe_guc_ads *ads)
+{
+	return PAGE_ALIGN(ads->golden_lrc_size);
+}
+
+static size_t guc_ads_capture_size(struct xe_guc_ads *ads)
+{
+	/* FIXME: Allocate a proper capture list */
+	return PAGE_ALIGN(PAGE_SIZE);
+}
+
+static size_t guc_ads_um_queues_size(struct xe_guc_ads *ads)
+{
+	struct xe_device *xe = ads_to_xe(ads);
+
+	if (!xe->info.has_usm)
+		return 0;
+
+	return GUC_UM_QUEUE_SIZE * GUC_UM_HW_QUEUE_MAX;
+}
+
+static size_t guc_ads_private_data_size(struct xe_guc_ads *ads)
+{
+	return PAGE_ALIGN(ads_to_guc(ads)->fw.private_data_size);
+}
+
+static size_t guc_ads_regset_offset(struct xe_guc_ads *ads)
+{
+	return offsetof(struct __guc_ads_blob, regset);
+}
+
+static size_t guc_ads_golden_lrc_offset(struct xe_guc_ads *ads)
+{
+	size_t offset;
+
+	offset = guc_ads_regset_offset(ads) +
+		guc_ads_regset_size(ads);
+
+	return PAGE_ALIGN(offset);
+}
+
+static size_t guc_ads_capture_offset(struct xe_guc_ads *ads)
+{
+	size_t offset;
+
+	offset = guc_ads_golden_lrc_offset(ads) +
+		guc_ads_golden_lrc_size(ads);
+
+	return PAGE_ALIGN(offset);
+}
+
+static size_t guc_ads_um_queues_offset(struct xe_guc_ads *ads)
+{
+	u32 offset;
+
+	offset = guc_ads_capture_offset(ads) +
+		 guc_ads_capture_size(ads);
+
+	return PAGE_ALIGN(offset);
+}
+
+static size_t guc_ads_private_data_offset(struct xe_guc_ads *ads)
+{
+	size_t offset;
+
+	offset = guc_ads_um_queues_offset(ads) +
+		guc_ads_um_queues_size(ads);
+
+	return PAGE_ALIGN(offset);
+}
+
+static size_t guc_ads_size(struct xe_guc_ads *ads)
+{
+	return guc_ads_private_data_offset(ads) +
+		guc_ads_private_data_size(ads);
+}
+
+static bool needs_wa_1607983814(struct xe_device *xe)
+{
+	return GRAPHICS_VERx100(xe) < 1250;
+}
+
+static size_t calculate_regset_size(struct xe_gt *gt)
+{
+	struct xe_reg_sr_entry *sr_entry;
+	unsigned long sr_idx;
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	unsigned int count = 0;
+
+	for_each_hw_engine(hwe, gt, id)
+		xa_for_each(&hwe->reg_sr.xa, sr_idx, sr_entry)
+			count++;
+
+	count += ADS_REGSET_EXTRA_MAX * XE_NUM_HW_ENGINES;
+
+	if (needs_wa_1607983814(gt_to_xe(gt)))
+		count += LNCFCMOCS_REG_COUNT;
+
+	return count * sizeof(struct guc_mmio_reg);
+}
+
+static u32 engine_enable_mask(struct xe_gt *gt, enum xe_engine_class class)
+{
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	u32 mask = 0;
+
+	for_each_hw_engine(hwe, gt, id)
+		if (hwe->class == class)
+			mask |= BIT(hwe->instance);
+
+	return mask;
+}
+
+static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads)
+{
+	struct xe_device *xe = ads_to_xe(ads);
+	struct xe_gt *gt = ads_to_gt(ads);
+	size_t total_size = 0, alloc_size, real_size;
+	int class;
+
+	for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
+		if (!engine_enable_mask(gt, class))
+			continue;
+
+		real_size = xe_lrc_size(xe, class);
+		alloc_size = PAGE_ALIGN(real_size);
+		total_size += alloc_size;
+	}
+
+	return total_size;
+}
+
+#define MAX_GOLDEN_LRC_SIZE	(SZ_4K * 64)
+
+int xe_guc_ads_init(struct xe_guc_ads *ads)
+{
+	struct xe_device *xe = ads_to_xe(ads);
+	struct xe_gt *gt = ads_to_gt(ads);
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_bo *bo;
+
+	ads->golden_lrc_size = calculate_golden_lrc_size(ads);
+	ads->regset_size = calculate_regset_size(gt);
+
+	bo = xe_managed_bo_create_pin_map(xe, tile, guc_ads_size(ads) + MAX_GOLDEN_LRC_SIZE,
+					  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+					  XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	ads->bo = bo;
+
+	return 0;
+}
+
+/**
+ * xe_guc_ads_init_post_hwconfig - initialize ADS post hwconfig load
+ * @ads: Additional data structures object
+ *
+ * Recalcuate golden_lrc_size & regset_size as the number hardware engines may
+ * have changed after the hwconfig was loaded. Also verify the new sizes fit in
+ * the already allocated ADS buffer object.
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads)
+{
+	struct xe_gt *gt = ads_to_gt(ads);
+	u32 prev_regset_size = ads->regset_size;
+
+	xe_gt_assert(gt, ads->bo);
+
+	ads->golden_lrc_size = calculate_golden_lrc_size(ads);
+	ads->regset_size = calculate_regset_size(gt);
+
+	xe_gt_assert(gt, ads->golden_lrc_size +
+		     (ads->regset_size - prev_regset_size) <=
+		     MAX_GOLDEN_LRC_SIZE);
+
+	return 0;
+}
+
+static void guc_policies_init(struct xe_guc_ads *ads)
+{
+	ads_blob_write(ads, policies.dpc_promote_time,
+		       GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US);
+	ads_blob_write(ads, policies.max_num_work_items,
+		       GLOBAL_POLICY_MAX_NUM_WI);
+	ads_blob_write(ads, policies.global_flags, 0);
+	ads_blob_write(ads, policies.is_valid, 1);
+}
+
+static void fill_engine_enable_masks(struct xe_gt *gt,
+				     struct iosys_map *info_map)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	info_map_write(xe, info_map, engine_enabled_masks[GUC_RENDER_CLASS],
+		       engine_enable_mask(gt, XE_ENGINE_CLASS_RENDER));
+	info_map_write(xe, info_map, engine_enabled_masks[GUC_BLITTER_CLASS],
+		       engine_enable_mask(gt, XE_ENGINE_CLASS_COPY));
+	info_map_write(xe, info_map, engine_enabled_masks[GUC_VIDEO_CLASS],
+		       engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_DECODE));
+	info_map_write(xe, info_map,
+		       engine_enabled_masks[GUC_VIDEOENHANCE_CLASS],
+		       engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_ENHANCE));
+	info_map_write(xe, info_map, engine_enabled_masks[GUC_COMPUTE_CLASS],
+		       engine_enable_mask(gt, XE_ENGINE_CLASS_COMPUTE));
+	info_map_write(xe, info_map, engine_enabled_masks[GUC_GSC_OTHER_CLASS],
+		       engine_enable_mask(gt, XE_ENGINE_CLASS_OTHER));
+}
+
+static void guc_prep_golden_lrc_null(struct xe_guc_ads *ads)
+{
+	struct xe_device *xe = ads_to_xe(ads);
+	struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
+			offsetof(struct __guc_ads_blob, system_info));
+	u8 guc_class;
+
+	for (guc_class = 0; guc_class <= GUC_MAX_ENGINE_CLASSES; ++guc_class) {
+		if (!info_map_read(xe, &info_map,
+				   engine_enabled_masks[guc_class]))
+			continue;
+
+		ads_blob_write(ads, ads.eng_state_size[guc_class],
+			       guc_ads_golden_lrc_size(ads) -
+			       xe_lrc_skip_size(xe));
+		ads_blob_write(ads, ads.golden_context_lrca[guc_class],
+			       xe_bo_ggtt_addr(ads->bo) +
+			       guc_ads_golden_lrc_offset(ads));
+	}
+}
+
+static void guc_mapping_table_init_invalid(struct xe_gt *gt,
+					   struct iosys_map *info_map)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	unsigned int i, j;
+
+	/* Table must be set to invalid values for entries not used */
+	for (i = 0; i < GUC_MAX_ENGINE_CLASSES; ++i)
+		for (j = 0; j < GUC_MAX_INSTANCES_PER_CLASS; ++j)
+			info_map_write(xe, info_map, mapping_table[i][j],
+				       GUC_MAX_INSTANCES_PER_CLASS);
+}
+
+static void guc_mapping_table_init(struct xe_gt *gt,
+				   struct iosys_map *info_map)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+
+	guc_mapping_table_init_invalid(gt, info_map);
+
+	for_each_hw_engine(hwe, gt, id) {
+		u8 guc_class;
+
+		guc_class = xe_engine_class_to_guc_class(hwe->class);
+		info_map_write(xe, info_map,
+			       mapping_table[guc_class][hwe->logical_instance],
+			       hwe->instance);
+	}
+}
+
+static void guc_capture_list_init(struct xe_guc_ads *ads)
+{
+	int i, j;
+	u32 addr = xe_bo_ggtt_addr(ads->bo) + guc_ads_capture_offset(ads);
+
+	/* FIXME: Populate a proper capture list */
+	for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) {
+		for (j = 0; j < GUC_MAX_ENGINE_CLASSES; j++) {
+			ads_blob_write(ads, ads.capture_instance[i][j], addr);
+			ads_blob_write(ads, ads.capture_class[i][j], addr);
+		}
+
+		ads_blob_write(ads, ads.capture_global[i], addr);
+	}
+}
+
+static void guc_mmio_regset_write_one(struct xe_guc_ads *ads,
+				      struct iosys_map *regset_map,
+				      struct xe_reg reg,
+				      unsigned int n_entry)
+{
+	struct guc_mmio_reg entry = {
+		.offset = reg.addr,
+		.flags = reg.masked ? GUC_REGSET_MASKED : 0,
+	};
+
+	xe_map_memcpy_to(ads_to_xe(ads), regset_map, n_entry * sizeof(entry),
+			 &entry, sizeof(entry));
+}
+
+static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads,
+					  struct iosys_map *regset_map,
+					  struct xe_hw_engine *hwe)
+{
+	struct xe_device *xe = ads_to_xe(ads);
+	struct xe_hw_engine *hwe_rcs_reset_domain =
+		xe_gt_any_hw_engine_by_reset_domain(hwe->gt, XE_ENGINE_CLASS_RENDER);
+	struct xe_reg_sr_entry *entry;
+	unsigned long idx;
+	unsigned int count = 0;
+	const struct {
+		struct xe_reg reg;
+		bool skip;
+	} *e, extra_regs[] = {
+		{ .reg = RING_MODE(hwe->mmio_base),			},
+		{ .reg = RING_HWS_PGA(hwe->mmio_base),			},
+		{ .reg = RING_IMR(hwe->mmio_base),			},
+		{ .reg = RCU_MODE, .skip = hwe != hwe_rcs_reset_domain	},
+		{ .reg = CCS_MODE,
+		  .skip = hwe != hwe_rcs_reset_domain || !xe_gt_ccs_mode_enabled(hwe->gt) },
+	};
+	u32 i;
+
+	BUILD_BUG_ON(ARRAY_SIZE(extra_regs) > ADS_REGSET_EXTRA_MAX);
+
+	xa_for_each(&hwe->reg_sr.xa, idx, entry)
+		guc_mmio_regset_write_one(ads, regset_map, entry->reg, count++);
+
+	for (e = extra_regs; e < extra_regs + ARRAY_SIZE(extra_regs); e++) {
+		if (e->skip)
+			continue;
+
+		guc_mmio_regset_write_one(ads, regset_map, e->reg, count++);
+	}
+
+	/* Wa_1607983814 */
+	if (needs_wa_1607983814(xe) && hwe->class == XE_ENGINE_CLASS_RENDER) {
+		for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) {
+			guc_mmio_regset_write_one(ads, regset_map,
+						  XELP_LNCFCMOCS(i), count++);
+		}
+	}
+
+	return count;
+}
+
+static void guc_mmio_reg_state_init(struct xe_guc_ads *ads)
+{
+	size_t regset_offset = guc_ads_regset_offset(ads);
+	struct xe_gt *gt = ads_to_gt(ads);
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	u32 addr = xe_bo_ggtt_addr(ads->bo) + regset_offset;
+	struct iosys_map regset_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
+							    regset_offset);
+	unsigned int regset_used = 0;
+
+	for_each_hw_engine(hwe, gt, id) {
+		unsigned int count;
+		u8 gc;
+
+		/*
+		 * 1. Write all MMIO entries for this exec queue to the table. No
+		 * need to worry about fused-off engines and when there are
+		 * entries in the regset: the reg_state_list has been zero'ed
+		 * by xe_guc_ads_populate()
+		 */
+		count = guc_mmio_regset_write(ads, &regset_map, hwe);
+		if (!count)
+			continue;
+
+		/*
+		 * 2. Record in the header (ads.reg_state_list) the address
+		 * location and number of entries
+		 */
+		gc = xe_engine_class_to_guc_class(hwe->class);
+		ads_blob_write(ads, ads.reg_state_list[gc][hwe->instance].address, addr);
+		ads_blob_write(ads, ads.reg_state_list[gc][hwe->instance].count, count);
+
+		addr += count * sizeof(struct guc_mmio_reg);
+		iosys_map_incr(&regset_map, count * sizeof(struct guc_mmio_reg));
+
+		regset_used += count * sizeof(struct guc_mmio_reg);
+	}
+
+	xe_gt_assert(gt, regset_used <= ads->regset_size);
+}
+
+static void guc_um_init_params(struct xe_guc_ads *ads)
+{
+	u32 um_queue_offset = guc_ads_um_queues_offset(ads);
+	u64 base_dpa;
+	u32 base_ggtt;
+	int i;
+
+	base_ggtt = xe_bo_ggtt_addr(ads->bo) + um_queue_offset;
+	base_dpa = xe_bo_main_addr(ads->bo, PAGE_SIZE) + um_queue_offset;
+
+	for (i = 0; i < GUC_UM_HW_QUEUE_MAX; ++i) {
+		ads_blob_write(ads, um_init_params.queue_params[i].base_dpa,
+			       base_dpa + (i * GUC_UM_QUEUE_SIZE));
+		ads_blob_write(ads, um_init_params.queue_params[i].base_ggtt_address,
+			       base_ggtt + (i * GUC_UM_QUEUE_SIZE));
+		ads_blob_write(ads, um_init_params.queue_params[i].size_in_bytes,
+			       GUC_UM_QUEUE_SIZE);
+	}
+
+	ads_blob_write(ads, um_init_params.page_response_timeout_in_us,
+		       GUC_PAGE_RES_TIMEOUT_US);
+}
+
+static void guc_doorbell_init(struct xe_guc_ads *ads)
+{
+	struct xe_device *xe = ads_to_xe(ads);
+	struct xe_gt *gt = ads_to_gt(ads);
+
+	if (GRAPHICS_VER(xe) >= 12 && !IS_DGFX(xe)) {
+		u32 distdbreg =
+			xe_mmio_read32(gt, DIST_DBS_POPULATED);
+
+		ads_blob_write(ads,
+			       system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI],
+			       REG_FIELD_GET(DOORBELLS_PER_SQIDI_MASK, distdbreg) + 1);
+	}
+}
+
+/**
+ * xe_guc_ads_populate_minimal - populate minimal ADS
+ * @ads: Additional data structures object
+ *
+ * This function populates a minimal ADS that does not support submissions but
+ * enough so the GuC can load and the hwconfig table can be read.
+ */
+void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads)
+{
+	struct xe_gt *gt = ads_to_gt(ads);
+	struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
+			offsetof(struct __guc_ads_blob, system_info));
+	u32 base = xe_bo_ggtt_addr(ads->bo);
+
+	xe_gt_assert(gt, ads->bo);
+
+	xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size);
+	guc_policies_init(ads);
+	guc_prep_golden_lrc_null(ads);
+	guc_mapping_table_init_invalid(gt, &info_map);
+	guc_doorbell_init(ads);
+
+	ads_blob_write(ads, ads.scheduler_policies, base +
+		       offsetof(struct __guc_ads_blob, policies));
+	ads_blob_write(ads, ads.gt_system_info, base +
+		       offsetof(struct __guc_ads_blob, system_info));
+	ads_blob_write(ads, ads.private_data, base +
+		       guc_ads_private_data_offset(ads));
+}
+
+void xe_guc_ads_populate(struct xe_guc_ads *ads)
+{
+	struct xe_device *xe = ads_to_xe(ads);
+	struct xe_gt *gt = ads_to_gt(ads);
+	struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
+			offsetof(struct __guc_ads_blob, system_info));
+	u32 base = xe_bo_ggtt_addr(ads->bo);
+
+	xe_gt_assert(gt, ads->bo);
+
+	xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size);
+	guc_policies_init(ads);
+	fill_engine_enable_masks(gt, &info_map);
+	guc_mmio_reg_state_init(ads);
+	guc_prep_golden_lrc_null(ads);
+	guc_mapping_table_init(gt, &info_map);
+	guc_capture_list_init(ads);
+	guc_doorbell_init(ads);
+
+	if (xe->info.has_usm) {
+		guc_um_init_params(ads);
+		ads_blob_write(ads, ads.um_init_data, base +
+			       offsetof(struct __guc_ads_blob, um_init_params));
+	}
+
+	ads_blob_write(ads, ads.scheduler_policies, base +
+		       offsetof(struct __guc_ads_blob, policies));
+	ads_blob_write(ads, ads.gt_system_info, base +
+		       offsetof(struct __guc_ads_blob, system_info));
+	ads_blob_write(ads, ads.private_data, base +
+		       guc_ads_private_data_offset(ads));
+}
+
+static void guc_populate_golden_lrc(struct xe_guc_ads *ads)
+{
+	struct xe_device *xe = ads_to_xe(ads);
+	struct xe_gt *gt = ads_to_gt(ads);
+	struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
+			offsetof(struct __guc_ads_blob, system_info));
+	size_t total_size = 0, alloc_size, real_size;
+	u32 addr_ggtt, offset;
+	int class;
+
+	offset = guc_ads_golden_lrc_offset(ads);
+	addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset;
+
+	for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
+		u8 guc_class;
+
+		guc_class = xe_engine_class_to_guc_class(class);
+
+		if (!info_map_read(xe, &info_map,
+				   engine_enabled_masks[guc_class]))
+			continue;
+
+		xe_gt_assert(gt, gt->default_lrc[class]);
+
+		real_size = xe_lrc_size(xe, class);
+		alloc_size = PAGE_ALIGN(real_size);
+		total_size += alloc_size;
+
+		/*
+		 * This interface is slightly confusing. We need to pass the
+		 * base address of the full golden context and the size of just
+		 * the engine state, which is the section of the context image
+		 * that starts after the execlists LRC registers. This is
+		 * required to allow the GuC to restore just the engine state
+		 * when a watchdog reset occurs.
+		 * We calculate the engine state size by removing the size of
+		 * what comes before it in the context image (which is identical
+		 * on all engines).
+		 */
+		ads_blob_write(ads, ads.eng_state_size[guc_class],
+			       real_size - xe_lrc_skip_size(xe));
+		ads_blob_write(ads, ads.golden_context_lrca[guc_class],
+			       addr_ggtt);
+
+		xe_map_memcpy_to(xe, ads_to_map(ads), offset,
+				 gt->default_lrc[class], real_size);
+
+		addr_ggtt += alloc_size;
+		offset += alloc_size;
+	}
+
+	xe_gt_assert(gt, total_size == ads->golden_lrc_size);
+}
+
+void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads)
+{
+	guc_populate_golden_lrc(ads);
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.h b/drivers/gpu/drm/xe/xe_guc_ads.h
new file mode 100644
index 000000000000..138ef6267671
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_ads.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_ADS_H_
+#define _XE_GUC_ADS_H_
+
+#include "xe_guc_ads_types.h"
+
+int xe_guc_ads_init(struct xe_guc_ads *ads);
+int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads);
+void xe_guc_ads_populate(struct xe_guc_ads *ads);
+void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads);
+void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_ads_types.h b/drivers/gpu/drm/xe/xe_guc_ads_types.h
new file mode 100644
index 000000000000..4afe44bece4b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_ads_types.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_ADS_TYPES_H_
+#define _XE_GUC_ADS_TYPES_H_
+
+#include <linux/types.h>
+
+struct xe_bo;
+
+/**
+ * struct xe_guc_ads - GuC additional data structures (ADS)
+ */
+struct xe_guc_ads {
+	/** @bo: XE BO for GuC ads blob */
+	struct xe_bo *bo;
+	/** @golden_lrc_size: golden LRC size */
+	size_t golden_lrc_size;
+	/** @regset_size: size of register set passed to GuC for save/restore */
+	u32 regset_size;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
new file mode 100644
index 000000000000..24a33fa36496
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -0,0 +1,1320 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_guc_ct.h"
+
+#include <linux/bitfield.h>
+#include <linux/circ_buf.h>
+#include <linux/delay.h>
+
+#include <drm/drm_managed.h>
+
+#include "abi/guc_actions_abi.h"
+#include "abi/guc_klvs_abi.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_pagefault.h"
+#include "xe_gt_tlb_invalidation.h"
+#include "xe_guc.h"
+#include "xe_guc_submit.h"
+#include "xe_map.h"
+#include "xe_pm.h"
+#include "xe_trace.h"
+
+/* Used when a CT send wants to block and / or receive data */
+struct g2h_fence {
+	u32 *response_buffer;
+	u32 seqno;
+	u16 response_len;
+	u16 error;
+	u16 hint;
+	u16 reason;
+	bool retry;
+	bool fail;
+	bool done;
+};
+
+static void g2h_fence_init(struct g2h_fence *g2h_fence, u32 *response_buffer)
+{
+	g2h_fence->response_buffer = response_buffer;
+	g2h_fence->response_len = 0;
+	g2h_fence->fail = false;
+	g2h_fence->retry = false;
+	g2h_fence->done = false;
+	g2h_fence->seqno = ~0x0;
+}
+
+static bool g2h_fence_needs_alloc(struct g2h_fence *g2h_fence)
+{
+	return g2h_fence->seqno == ~0x0;
+}
+
+static struct xe_guc *
+ct_to_guc(struct xe_guc_ct *ct)
+{
+	return container_of(ct, struct xe_guc, ct);
+}
+
+static struct xe_gt *
+ct_to_gt(struct xe_guc_ct *ct)
+{
+	return container_of(ct, struct xe_gt, uc.guc.ct);
+}
+
+static struct xe_device *
+ct_to_xe(struct xe_guc_ct *ct)
+{
+	return gt_to_xe(ct_to_gt(ct));
+}
+
+/**
+ * DOC: GuC CTB Blob
+ *
+ * We allocate single blob to hold both CTB descriptors and buffers:
+ *
+ *      +--------+-----------------------------------------------+------+
+ *      | offset | contents                                      | size |
+ *      +========+===============================================+======+
+ *      | 0x0000 | H2G CTB Descriptor (send)                     |      |
+ *      +--------+-----------------------------------------------+  4K  |
+ *      | 0x0800 | G2H CTB Descriptor (g2h)                      |      |
+ *      +--------+-----------------------------------------------+------+
+ *      | 0x1000 | H2G CT Buffer (send)                          | n*4K |
+ *      |        |                                               |      |
+ *      +--------+-----------------------------------------------+------+
+ *      | 0x1000 | G2H CT Buffer (g2h)                           | m*4K |
+ *      | + n*4K |                                               |      |
+ *      +--------+-----------------------------------------------+------+
+ *
+ * Size of each ``CT Buffer`` must be multiple of 4K.
+ * We don't expect too many messages in flight at any time, unless we are
+ * using the GuC submission. In that case each request requires a minimum
+ * 2 dwords which gives us a maximum 256 queue'd requests. Hopefully this
+ * enough space to avoid backpressure on the driver. We increase the size
+ * of the receive buffer (relative to the send) to ensure a G2H response
+ * CTB has a landing spot.
+ */
+
+#define CTB_DESC_SIZE		ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K)
+#define CTB_H2G_BUFFER_SIZE	(SZ_4K)
+#define CTB_G2H_BUFFER_SIZE	(4 * CTB_H2G_BUFFER_SIZE)
+#define G2H_ROOM_BUFFER_SIZE	(CTB_G2H_BUFFER_SIZE / 4)
+
+static size_t guc_ct_size(void)
+{
+	return 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE +
+		CTB_G2H_BUFFER_SIZE;
+}
+
+static void guc_ct_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_guc_ct *ct = arg;
+
+	xa_destroy(&ct->fence_lookup);
+}
+
+static void g2h_worker_func(struct work_struct *w);
+
+static void primelockdep(struct xe_guc_ct *ct)
+{
+	if (!IS_ENABLED(CONFIG_LOCKDEP))
+		return;
+
+	fs_reclaim_acquire(GFP_KERNEL);
+	might_lock(&ct->lock);
+	fs_reclaim_release(GFP_KERNEL);
+}
+
+int xe_guc_ct_init(struct xe_guc_ct *ct)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	struct xe_gt *gt = ct_to_gt(ct);
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_bo *bo;
+	int err;
+
+	xe_assert(xe, !(guc_ct_size() % PAGE_SIZE));
+
+	drmm_mutex_init(&xe->drm, &ct->lock);
+	spin_lock_init(&ct->fast_lock);
+	xa_init(&ct->fence_lookup);
+	INIT_WORK(&ct->g2h_worker, g2h_worker_func);
+	init_waitqueue_head(&ct->wq);
+	init_waitqueue_head(&ct->g2h_fence_wq);
+
+	primelockdep(ct);
+
+	bo = xe_managed_bo_create_pin_map(xe, tile, guc_ct_size(),
+					  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+					  XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	ct->bo = bo;
+
+	err = drmm_add_action_or_reset(&xe->drm, guc_ct_fini, ct);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+#define desc_read(xe_, guc_ctb__, field_)			\
+	xe_map_rd_field(xe_, &guc_ctb__->desc, 0,		\
+			struct guc_ct_buffer_desc, field_)
+
+#define desc_write(xe_, guc_ctb__, field_, val_)		\
+	xe_map_wr_field(xe_, &guc_ctb__->desc, 0,		\
+			struct guc_ct_buffer_desc, field_, val_)
+
+static void guc_ct_ctb_h2g_init(struct xe_device *xe, struct guc_ctb *h2g,
+				struct iosys_map *map)
+{
+	h2g->info.size = CTB_H2G_BUFFER_SIZE / sizeof(u32);
+	h2g->info.resv_space = 0;
+	h2g->info.tail = 0;
+	h2g->info.head = 0;
+	h2g->info.space = CIRC_SPACE(h2g->info.tail, h2g->info.head,
+				     h2g->info.size) -
+			  h2g->info.resv_space;
+	h2g->info.broken = false;
+
+	h2g->desc = *map;
+	xe_map_memset(xe, &h2g->desc, 0, 0, sizeof(struct guc_ct_buffer_desc));
+
+	h2g->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE * 2);
+}
+
+static void guc_ct_ctb_g2h_init(struct xe_device *xe, struct guc_ctb *g2h,
+				struct iosys_map *map)
+{
+	g2h->info.size = CTB_G2H_BUFFER_SIZE / sizeof(u32);
+	g2h->info.resv_space = G2H_ROOM_BUFFER_SIZE / sizeof(u32);
+	g2h->info.head = 0;
+	g2h->info.tail = 0;
+	g2h->info.space = CIRC_SPACE(g2h->info.tail, g2h->info.head,
+				     g2h->info.size) -
+			  g2h->info.resv_space;
+	g2h->info.broken = false;
+
+	g2h->desc = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE);
+	xe_map_memset(xe, &g2h->desc, 0, 0, sizeof(struct guc_ct_buffer_desc));
+
+	g2h->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE * 2 +
+					    CTB_H2G_BUFFER_SIZE);
+}
+
+static int guc_ct_ctb_h2g_register(struct xe_guc_ct *ct)
+{
+	struct xe_guc *guc = ct_to_guc(ct);
+	u32 desc_addr, ctb_addr, size;
+	int err;
+
+	desc_addr = xe_bo_ggtt_addr(ct->bo);
+	ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2;
+	size = ct->ctbs.h2g.info.size * sizeof(u32);
+
+	err = xe_guc_self_cfg64(guc,
+				GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_KEY,
+				desc_addr);
+	if (err)
+		return err;
+
+	err = xe_guc_self_cfg64(guc,
+				GUC_KLV_SELF_CFG_H2G_CTB_ADDR_KEY,
+				ctb_addr);
+	if (err)
+		return err;
+
+	return xe_guc_self_cfg32(guc,
+				 GUC_KLV_SELF_CFG_H2G_CTB_SIZE_KEY,
+				 size);
+}
+
+static int guc_ct_ctb_g2h_register(struct xe_guc_ct *ct)
+{
+	struct xe_guc *guc = ct_to_guc(ct);
+	u32 desc_addr, ctb_addr, size;
+	int err;
+
+	desc_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE;
+	ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2 +
+		CTB_H2G_BUFFER_SIZE;
+	size = ct->ctbs.g2h.info.size * sizeof(u32);
+
+	err = xe_guc_self_cfg64(guc,
+				GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_KEY,
+				desc_addr);
+	if (err)
+		return err;
+
+	err = xe_guc_self_cfg64(guc,
+				GUC_KLV_SELF_CFG_G2H_CTB_ADDR_KEY,
+				ctb_addr);
+	if (err)
+		return err;
+
+	return xe_guc_self_cfg32(guc,
+				 GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY,
+				 size);
+}
+
+static int guc_ct_control_toggle(struct xe_guc_ct *ct, bool enable)
+{
+	u32 request[HOST2GUC_CONTROL_CTB_REQUEST_MSG_LEN] = {
+		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION,
+			   GUC_ACTION_HOST2GUC_CONTROL_CTB),
+		FIELD_PREP(HOST2GUC_CONTROL_CTB_REQUEST_MSG_1_CONTROL,
+			   enable ? GUC_CTB_CONTROL_ENABLE :
+			   GUC_CTB_CONTROL_DISABLE),
+	};
+	int ret = xe_guc_mmio_send(ct_to_guc(ct), request, ARRAY_SIZE(request));
+
+	return ret > 0 ? -EPROTO : ret;
+}
+
+int xe_guc_ct_enable(struct xe_guc_ct *ct)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	int err;
+
+	xe_assert(xe, !ct->enabled);
+
+	guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap);
+	guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap);
+
+	err = guc_ct_ctb_h2g_register(ct);
+	if (err)
+		goto err_out;
+
+	err = guc_ct_ctb_g2h_register(ct);
+	if (err)
+		goto err_out;
+
+	err = guc_ct_control_toggle(ct, true);
+	if (err)
+		goto err_out;
+
+	mutex_lock(&ct->lock);
+	spin_lock_irq(&ct->fast_lock);
+	ct->g2h_outstanding = 0;
+	ct->enabled = true;
+	spin_unlock_irq(&ct->fast_lock);
+	mutex_unlock(&ct->lock);
+
+	smp_mb();
+	wake_up_all(&ct->wq);
+	drm_dbg(&xe->drm, "GuC CT communication channel enabled\n");
+
+	return 0;
+
+err_out:
+	drm_err(&xe->drm, "Failed to enable CT (%d)\n", err);
+
+	return err;
+}
+
+void xe_guc_ct_disable(struct xe_guc_ct *ct)
+{
+	mutex_lock(&ct->lock); /* Serialise dequeue_one_g2h() */
+	spin_lock_irq(&ct->fast_lock); /* Serialise CT fast-path */
+	ct->enabled = false; /* Finally disable CT communication */
+	spin_unlock_irq(&ct->fast_lock);
+	mutex_unlock(&ct->lock);
+
+	xa_destroy(&ct->fence_lookup);
+}
+
+static bool h2g_has_room(struct xe_guc_ct *ct, u32 cmd_len)
+{
+	struct guc_ctb *h2g = &ct->ctbs.h2g;
+
+	lockdep_assert_held(&ct->lock);
+
+	if (cmd_len > h2g->info.space) {
+		h2g->info.head = desc_read(ct_to_xe(ct), h2g, head);
+		h2g->info.space = CIRC_SPACE(h2g->info.tail, h2g->info.head,
+					     h2g->info.size) -
+				  h2g->info.resv_space;
+		if (cmd_len > h2g->info.space)
+			return false;
+	}
+
+	return true;
+}
+
+static bool g2h_has_room(struct xe_guc_ct *ct, u32 g2h_len)
+{
+	if (!g2h_len)
+		return true;
+
+	lockdep_assert_held(&ct->fast_lock);
+
+	return ct->ctbs.g2h.info.space > g2h_len;
+}
+
+static int has_room(struct xe_guc_ct *ct, u32 cmd_len, u32 g2h_len)
+{
+	lockdep_assert_held(&ct->lock);
+
+	if (!g2h_has_room(ct, g2h_len) || !h2g_has_room(ct, cmd_len))
+		return -EBUSY;
+
+	return 0;
+}
+
+static void h2g_reserve_space(struct xe_guc_ct *ct, u32 cmd_len)
+{
+	lockdep_assert_held(&ct->lock);
+	ct->ctbs.h2g.info.space -= cmd_len;
+}
+
+static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h)
+{
+	xe_assert(ct_to_xe(ct), g2h_len <= ct->ctbs.g2h.info.space);
+
+	if (g2h_len) {
+		lockdep_assert_held(&ct->fast_lock);
+
+		ct->ctbs.g2h.info.space -= g2h_len;
+		ct->g2h_outstanding += num_g2h;
+	}
+}
+
+static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
+{
+	lockdep_assert_held(&ct->fast_lock);
+	xe_assert(ct_to_xe(ct), ct->ctbs.g2h.info.space + g2h_len <=
+		  ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space);
+
+	ct->ctbs.g2h.info.space += g2h_len;
+	--ct->g2h_outstanding;
+}
+
+static void g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
+{
+	spin_lock_irq(&ct->fast_lock);
+	__g2h_release_space(ct, g2h_len);
+	spin_unlock_irq(&ct->fast_lock);
+}
+
+#define H2G_CT_HEADERS (GUC_CTB_HDR_LEN + 1) /* one DW CTB header and one DW HxG header */
+
+static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
+		     u32 ct_fence_value, bool want_response)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	struct guc_ctb *h2g = &ct->ctbs.h2g;
+	u32 cmd[H2G_CT_HEADERS];
+	u32 tail = h2g->info.tail;
+	u32 full_len;
+	struct iosys_map map = IOSYS_MAP_INIT_OFFSET(&h2g->cmds,
+							 tail * sizeof(u32));
+
+	full_len = len + GUC_CTB_HDR_LEN;
+
+	lockdep_assert_held(&ct->lock);
+	xe_assert(xe, full_len <= GUC_CTB_MSG_MAX_LEN);
+	xe_assert(xe, tail <= h2g->info.size);
+
+	/* Command will wrap, zero fill (NOPs), return and check credits again */
+	if (tail + full_len > h2g->info.size) {
+		xe_map_memset(xe, &map, 0, 0,
+			      (h2g->info.size - tail) * sizeof(u32));
+		h2g_reserve_space(ct, (h2g->info.size - tail));
+		h2g->info.tail = 0;
+		desc_write(xe, h2g, tail, h2g->info.tail);
+
+		return -EAGAIN;
+	}
+
+	/*
+	 * dw0: CT header (including fence)
+	 * dw1: HXG header (including action code)
+	 * dw2+: action data
+	 */
+	cmd[0] = FIELD_PREP(GUC_CTB_MSG_0_FORMAT, GUC_CTB_FORMAT_HXG) |
+		FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) |
+		FIELD_PREP(GUC_CTB_MSG_0_FENCE, ct_fence_value);
+	if (want_response) {
+		cmd[1] =
+			FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+			FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
+				   GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
+	} else {
+		cmd[1] =
+			FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) |
+			FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
+				   GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
+	}
+
+	/* H2G header in cmd[1] replaces action[0] so: */
+	--len;
+	++action;
+
+	/* Write H2G ensuring visable before descriptor update */
+	xe_map_memcpy_to(xe, &map, 0, cmd, H2G_CT_HEADERS * sizeof(u32));
+	xe_map_memcpy_to(xe, &map, H2G_CT_HEADERS * sizeof(u32), action, len * sizeof(u32));
+	xe_device_wmb(xe);
+
+	/* Update local copies */
+	h2g->info.tail = (tail + full_len) % h2g->info.size;
+	h2g_reserve_space(ct, full_len);
+
+	/* Update descriptor */
+	desc_write(xe, h2g, tail, h2g->info.tail);
+
+	trace_xe_guc_ctb_h2g(ct_to_gt(ct)->info.id, *(action - 1), full_len,
+			     desc_read(xe, h2g, head), h2g->info.tail);
+
+	return 0;
+}
+
+static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
+				u32 len, u32 g2h_len, u32 num_g2h,
+				struct g2h_fence *g2h_fence)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	int ret;
+
+	xe_assert(xe, !g2h_len || !g2h_fence);
+	xe_assert(xe, !num_g2h || !g2h_fence);
+	xe_assert(xe, !g2h_len || num_g2h);
+	xe_assert(xe, g2h_len || !num_g2h);
+	lockdep_assert_held(&ct->lock);
+
+	if (unlikely(ct->ctbs.h2g.info.broken)) {
+		ret = -EPIPE;
+		goto out;
+	}
+
+	if (unlikely(!ct->enabled)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	if (g2h_fence) {
+		g2h_len = GUC_CTB_HXG_MSG_MAX_LEN;
+		num_g2h = 1;
+
+		if (g2h_fence_needs_alloc(g2h_fence)) {
+			void *ptr;
+
+			g2h_fence->seqno = (ct->fence_seqno++ & 0xffff);
+			ptr = xa_store(&ct->fence_lookup,
+				       g2h_fence->seqno,
+				       g2h_fence, GFP_ATOMIC);
+			if (IS_ERR(ptr)) {
+				ret = PTR_ERR(ptr);
+				goto out;
+			}
+		}
+	}
+
+	if (g2h_len)
+		spin_lock_irq(&ct->fast_lock);
+retry:
+	ret = has_room(ct, len + GUC_CTB_HDR_LEN, g2h_len);
+	if (unlikely(ret))
+		goto out_unlock;
+
+	ret = h2g_write(ct, action, len, g2h_fence ? g2h_fence->seqno : 0,
+			!!g2h_fence);
+	if (unlikely(ret)) {
+		if (ret == -EAGAIN)
+			goto retry;
+		goto out_unlock;
+	}
+
+	__g2h_reserve_space(ct, g2h_len, num_g2h);
+	xe_guc_notify(ct_to_guc(ct));
+out_unlock:
+	if (g2h_len)
+		spin_unlock_irq(&ct->fast_lock);
+out:
+	return ret;
+}
+
+static void kick_reset(struct xe_guc_ct *ct)
+{
+	xe_gt_reset_async(ct_to_gt(ct));
+}
+
+static int dequeue_one_g2h(struct xe_guc_ct *ct);
+
+static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
+			      u32 g2h_len, u32 num_g2h,
+			      struct g2h_fence *g2h_fence)
+{
+	struct drm_device *drm = &ct_to_xe(ct)->drm;
+	struct drm_printer p = drm_info_printer(drm->dev);
+	unsigned int sleep_period_ms = 1;
+	int ret;
+
+	xe_assert(ct_to_xe(ct), !g2h_len || !g2h_fence);
+	lockdep_assert_held(&ct->lock);
+	xe_device_assert_mem_access(ct_to_xe(ct));
+
+try_again:
+	ret = __guc_ct_send_locked(ct, action, len, g2h_len, num_g2h,
+				   g2h_fence);
+
+	/*
+	 * We wait to try to restore credits for about 1 second before bailing.
+	 * In the case of H2G credits we have no choice but just to wait for the
+	 * GuC to consume H2Gs in the channel so we use a wait / sleep loop. In
+	 * the case of G2H we process any G2H in the channel, hopefully freeing
+	 * credits as we consume the G2H messages.
+	 */
+	if (unlikely(ret == -EBUSY &&
+		     !h2g_has_room(ct, len + GUC_CTB_HDR_LEN))) {
+		struct guc_ctb *h2g = &ct->ctbs.h2g;
+
+		if (sleep_period_ms == 1024)
+			goto broken;
+
+		trace_xe_guc_ct_h2g_flow_control(h2g->info.head, h2g->info.tail,
+						 h2g->info.size,
+						 h2g->info.space,
+						 len + GUC_CTB_HDR_LEN);
+		msleep(sleep_period_ms);
+		sleep_period_ms <<= 1;
+
+		goto try_again;
+	} else if (unlikely(ret == -EBUSY)) {
+		struct xe_device *xe = ct_to_xe(ct);
+		struct guc_ctb *g2h = &ct->ctbs.g2h;
+
+		trace_xe_guc_ct_g2h_flow_control(g2h->info.head,
+						 desc_read(xe, g2h, tail),
+						 g2h->info.size,
+						 g2h->info.space,
+						 g2h_fence ?
+						 GUC_CTB_HXG_MSG_MAX_LEN :
+						 g2h_len);
+
+#define g2h_avail(ct)	\
+	(desc_read(ct_to_xe(ct), (&ct->ctbs.g2h), tail) != ct->ctbs.g2h.info.head)
+		if (!wait_event_timeout(ct->wq, !ct->g2h_outstanding ||
+					g2h_avail(ct), HZ))
+			goto broken;
+#undef g2h_avail
+
+		if (dequeue_one_g2h(ct) < 0)
+			goto broken;
+
+		goto try_again;
+	}
+
+	return ret;
+
+broken:
+	drm_err(drm, "No forward process on H2G, reset required");
+	xe_guc_ct_print(ct, &p, true);
+	ct->ctbs.h2g.info.broken = true;
+
+	return -EDEADLK;
+}
+
+static int guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len,
+		       u32 g2h_len, u32 num_g2h, struct g2h_fence *g2h_fence)
+{
+	int ret;
+
+	xe_assert(ct_to_xe(ct), !g2h_len || !g2h_fence);
+
+	mutex_lock(&ct->lock);
+	ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, g2h_fence);
+	mutex_unlock(&ct->lock);
+
+	return ret;
+}
+
+int xe_guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len,
+		   u32 g2h_len, u32 num_g2h)
+{
+	int ret;
+
+	ret = guc_ct_send(ct, action, len, g2h_len, num_g2h, NULL);
+	if (ret == -EDEADLK)
+		kick_reset(ct);
+
+	return ret;
+}
+
+int xe_guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
+			  u32 g2h_len, u32 num_g2h)
+{
+	int ret;
+
+	ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, NULL);
+	if (ret == -EDEADLK)
+		kick_reset(ct);
+
+	return ret;
+}
+
+int xe_guc_ct_send_g2h_handler(struct xe_guc_ct *ct, const u32 *action, u32 len)
+{
+	int ret;
+
+	lockdep_assert_held(&ct->lock);
+
+	ret = guc_ct_send_locked(ct, action, len, 0, 0, NULL);
+	if (ret == -EDEADLK)
+		kick_reset(ct);
+
+	return ret;
+}
+
+/*
+ * Check if a GT reset is in progress or will occur and if GT reset brought the
+ * CT back up. Randomly picking 5 seconds for an upper limit to do a GT a reset.
+ */
+static bool retry_failure(struct xe_guc_ct *ct, int ret)
+{
+	if (!(ret == -EDEADLK || ret == -EPIPE || ret == -ENODEV))
+		return false;
+
+#define ct_alive(ct)	\
+	(ct->enabled && !ct->ctbs.h2g.info.broken && !ct->ctbs.g2h.info.broken)
+	if (!wait_event_interruptible_timeout(ct->wq, ct_alive(ct),  HZ * 5))
+		return false;
+#undef ct_alive
+
+	return true;
+}
+
+static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
+			    u32 *response_buffer, bool no_fail)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	struct g2h_fence g2h_fence;
+	int ret = 0;
+
+	/*
+	 * We use a fence to implement blocking sends / receiving response data.
+	 * The seqno of the fence is sent in the H2G, returned in the G2H, and
+	 * an xarray is used as storage media with the seqno being to key.
+	 * Fields in the fence hold success, failure, retry status and the
+	 * response data. Safe to allocate on the stack as the xarray is the
+	 * only reference and it cannot be present after this function exits.
+	 */
+retry:
+	g2h_fence_init(&g2h_fence, response_buffer);
+retry_same_fence:
+	ret = guc_ct_send(ct, action, len, 0, 0, &g2h_fence);
+	if (unlikely(ret == -ENOMEM)) {
+		void *ptr;
+
+		/* Retry allocation /w GFP_KERNEL */
+		ptr = xa_store(&ct->fence_lookup,
+			       g2h_fence.seqno,
+			       &g2h_fence, GFP_KERNEL);
+		if (IS_ERR(ptr))
+			return PTR_ERR(ptr);
+
+		goto retry_same_fence;
+	} else if (unlikely(ret)) {
+		if (ret == -EDEADLK)
+			kick_reset(ct);
+
+		if (no_fail && retry_failure(ct, ret))
+			goto retry_same_fence;
+
+		if (!g2h_fence_needs_alloc(&g2h_fence))
+			xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno);
+
+		return ret;
+	}
+
+	ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ);
+	if (!ret) {
+		drm_err(&xe->drm, "Timed out wait for G2H, fence %u, action %04x",
+			g2h_fence.seqno, action[0]);
+		xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno);
+		return -ETIME;
+	}
+
+	if (g2h_fence.retry) {
+		drm_warn(&xe->drm, "Send retry, action 0x%04x, reason %d",
+			 action[0], g2h_fence.reason);
+		goto retry;
+	}
+	if (g2h_fence.fail) {
+		drm_err(&xe->drm, "Send failed, action 0x%04x, error %d, hint %d",
+			action[0], g2h_fence.error, g2h_fence.hint);
+		ret = -EIO;
+	}
+
+	return ret > 0 ? 0 : ret;
+}
+
+int xe_guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
+			u32 *response_buffer)
+{
+	return guc_ct_send_recv(ct, action, len, response_buffer, false);
+}
+
+int xe_guc_ct_send_recv_no_fail(struct xe_guc_ct *ct, const u32 *action,
+				u32 len, u32 *response_buffer)
+{
+	return guc_ct_send_recv(ct, action, len, response_buffer, true);
+}
+
+static int parse_g2h_event(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+	u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]);
+
+	lockdep_assert_held(&ct->lock);
+
+	switch (action) {
+	case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE:
+	case XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE:
+	case XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE:
+	case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+		g2h_release_space(ct, len);
+	}
+
+	return 0;
+}
+
+static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	u32 response_len = len - GUC_CTB_MSG_MIN_LEN;
+	u32 fence = FIELD_GET(GUC_CTB_MSG_0_FENCE, msg[0]);
+	u32 type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]);
+	struct g2h_fence *g2h_fence;
+
+	lockdep_assert_held(&ct->lock);
+
+	g2h_fence = xa_erase(&ct->fence_lookup, fence);
+	if (unlikely(!g2h_fence)) {
+		/* Don't tear down channel, as send could've timed out */
+		drm_warn(&xe->drm, "G2H fence (%u) not found!\n", fence);
+		g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
+		return 0;
+	}
+
+	xe_assert(xe, fence == g2h_fence->seqno);
+
+	if (type == GUC_HXG_TYPE_RESPONSE_FAILURE) {
+		g2h_fence->fail = true;
+		g2h_fence->error =
+			FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, msg[1]);
+		g2h_fence->hint =
+			FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, msg[1]);
+	} else if (type == GUC_HXG_TYPE_NO_RESPONSE_RETRY) {
+		g2h_fence->retry = true;
+		g2h_fence->reason =
+			FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, msg[1]);
+	} else if (g2h_fence->response_buffer) {
+		g2h_fence->response_len = response_len;
+		memcpy(g2h_fence->response_buffer, msg + GUC_CTB_MSG_MIN_LEN,
+		       response_len * sizeof(u32));
+	}
+
+	g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
+
+	g2h_fence->done = true;
+	smp_mb();
+
+	wake_up_all(&ct->g2h_fence_wq);
+
+	return 0;
+}
+
+static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	u32 hxg, origin, type;
+	int ret;
+
+	lockdep_assert_held(&ct->lock);
+
+	hxg = msg[1];
+
+	origin = FIELD_GET(GUC_HXG_MSG_0_ORIGIN, hxg);
+	if (unlikely(origin != GUC_HXG_ORIGIN_GUC)) {
+		drm_err(&xe->drm,
+			"G2H channel broken on read, origin=%d, reset required\n",
+			origin);
+		ct->ctbs.g2h.info.broken = true;
+
+		return -EPROTO;
+	}
+
+	type = FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg);
+	switch (type) {
+	case GUC_HXG_TYPE_EVENT:
+		ret = parse_g2h_event(ct, msg, len);
+		break;
+	case GUC_HXG_TYPE_RESPONSE_SUCCESS:
+	case GUC_HXG_TYPE_RESPONSE_FAILURE:
+	case GUC_HXG_TYPE_NO_RESPONSE_RETRY:
+		ret = parse_g2h_response(ct, msg, len);
+		break;
+	default:
+		drm_err(&xe->drm,
+			"G2H channel broken on read, type=%d, reset required\n",
+			type);
+		ct->ctbs.g2h.info.broken = true;
+
+		ret = -EOPNOTSUPP;
+	}
+
+	return ret;
+}
+
+static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	struct xe_guc *guc = ct_to_guc(ct);
+	u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]);
+	u32 *payload = msg + GUC_CTB_HXG_MSG_MIN_LEN;
+	u32 adj_len = len - GUC_CTB_HXG_MSG_MIN_LEN;
+	int ret = 0;
+
+	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]) != GUC_HXG_TYPE_EVENT)
+		return 0;
+
+	switch (action) {
+	case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE:
+		ret = xe_guc_sched_done_handler(guc, payload, adj_len);
+		break;
+	case XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE:
+		ret = xe_guc_deregister_done_handler(guc, payload, adj_len);
+		break;
+	case XE_GUC_ACTION_CONTEXT_RESET_NOTIFICATION:
+		ret = xe_guc_exec_queue_reset_handler(guc, payload, adj_len);
+		break;
+	case XE_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION:
+		ret = xe_guc_exec_queue_reset_failure_handler(guc, payload,
+							      adj_len);
+		break;
+	case XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE:
+		/* Selftest only at the moment */
+		break;
+	case XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION:
+	case XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE:
+		/* FIXME: Handle this */
+		break;
+	case XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR:
+		ret = xe_guc_exec_queue_memory_cat_error_handler(guc, payload,
+								 adj_len);
+		break;
+	case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC:
+		ret = xe_guc_pagefault_handler(guc, payload, adj_len);
+		break;
+	case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+		ret = xe_guc_tlb_invalidation_done_handler(guc, payload,
+							   adj_len);
+		break;
+	case XE_GUC_ACTION_ACCESS_COUNTER_NOTIFY:
+		ret = xe_guc_access_counter_notify_handler(guc, payload,
+							   adj_len);
+		break;
+	default:
+		drm_err(&xe->drm, "unexpected action 0x%04x\n", action);
+	}
+
+	if (ret)
+		drm_err(&xe->drm, "action 0x%04x failed processing, ret=%d\n",
+			action, ret);
+
+	return 0;
+}
+
+static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	struct guc_ctb *g2h = &ct->ctbs.g2h;
+	u32 tail, head, len;
+	s32 avail;
+	u32 action;
+
+	lockdep_assert_held(&ct->fast_lock);
+
+	if (!ct->enabled)
+		return -ENODEV;
+
+	if (g2h->info.broken)
+		return -EPIPE;
+
+	/* Calculate DW available to read */
+	tail = desc_read(xe, g2h, tail);
+	avail = tail - g2h->info.head;
+	if (unlikely(avail == 0))
+		return 0;
+
+	if (avail < 0)
+		avail += g2h->info.size;
+
+	/* Read header */
+	xe_map_memcpy_from(xe, msg, &g2h->cmds, sizeof(u32) * g2h->info.head,
+			   sizeof(u32));
+	len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, msg[0]) + GUC_CTB_MSG_MIN_LEN;
+	if (len > avail) {
+		drm_err(&xe->drm,
+			"G2H channel broken on read, avail=%d, len=%d, reset required\n",
+			avail, len);
+		g2h->info.broken = true;
+
+		return -EPROTO;
+	}
+
+	head = (g2h->info.head + 1) % g2h->info.size;
+	avail = len - 1;
+
+	/* Read G2H message */
+	if (avail + head > g2h->info.size) {
+		u32 avail_til_wrap = g2h->info.size - head;
+
+		xe_map_memcpy_from(xe, msg + 1,
+				   &g2h->cmds, sizeof(u32) * head,
+				   avail_til_wrap * sizeof(u32));
+		xe_map_memcpy_from(xe, msg + 1 + avail_til_wrap,
+				   &g2h->cmds, 0,
+				   (avail - avail_til_wrap) * sizeof(u32));
+	} else {
+		xe_map_memcpy_from(xe, msg + 1,
+				   &g2h->cmds, sizeof(u32) * head,
+				   avail * sizeof(u32));
+	}
+
+	action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]);
+
+	if (fast_path) {
+		if (FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]) != GUC_HXG_TYPE_EVENT)
+			return 0;
+
+		switch (action) {
+		case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC:
+		case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+			break;	/* Process these in fast-path */
+		default:
+			return 0;
+		}
+	}
+
+	/* Update local / descriptor header */
+	g2h->info.head = (head + avail) % g2h->info.size;
+	desc_write(xe, g2h, head, g2h->info.head);
+
+	trace_xe_guc_ctb_g2h(ct_to_gt(ct)->info.id, action, len,
+			     g2h->info.head, tail);
+
+	return len;
+}
+
+static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	struct xe_guc *guc = ct_to_guc(ct);
+	u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]);
+	u32 *payload = msg + GUC_CTB_HXG_MSG_MIN_LEN;
+	u32 adj_len = len - GUC_CTB_HXG_MSG_MIN_LEN;
+	int ret = 0;
+
+	switch (action) {
+	case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC:
+		ret = xe_guc_pagefault_handler(guc, payload, adj_len);
+		break;
+	case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+		__g2h_release_space(ct, len);
+		ret = xe_guc_tlb_invalidation_done_handler(guc, payload,
+							   adj_len);
+		break;
+	default:
+		drm_warn(&xe->drm, "NOT_POSSIBLE");
+	}
+
+	if (ret)
+		drm_err(&xe->drm, "action 0x%04x failed processing, ret=%d\n",
+			action, ret);
+}
+
+/**
+ * xe_guc_ct_fast_path - process critical G2H in the IRQ handler
+ * @ct: GuC CT object
+ *
+ * Anything related to page faults is critical for performance, process these
+ * critical G2H in the IRQ. This is safe as these handlers either just wake up
+ * waiters or queue another worker.
+ */
+void xe_guc_ct_fast_path(struct xe_guc_ct *ct)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	bool ongoing;
+	int len;
+
+	ongoing = xe_device_mem_access_get_if_ongoing(ct_to_xe(ct));
+	if (!ongoing && xe_pm_read_callback_task(ct_to_xe(ct)) == NULL)
+		return;
+
+	spin_lock(&ct->fast_lock);
+	do {
+		len = g2h_read(ct, ct->fast_msg, true);
+		if (len > 0)
+			g2h_fast_path(ct, ct->fast_msg, len);
+	} while (len > 0);
+	spin_unlock(&ct->fast_lock);
+
+	if (ongoing)
+		xe_device_mem_access_put(xe);
+}
+
+/* Returns less than zero on error, 0 on done, 1 on more available */
+static int dequeue_one_g2h(struct xe_guc_ct *ct)
+{
+	int len;
+	int ret;
+
+	lockdep_assert_held(&ct->lock);
+
+	spin_lock_irq(&ct->fast_lock);
+	len = g2h_read(ct, ct->msg, false);
+	spin_unlock_irq(&ct->fast_lock);
+	if (len <= 0)
+		return len;
+
+	ret = parse_g2h_msg(ct, ct->msg, len);
+	if (unlikely(ret < 0))
+		return ret;
+
+	ret = process_g2h_msg(ct, ct->msg, len);
+	if (unlikely(ret < 0))
+		return ret;
+
+	return 1;
+}
+
+static void g2h_worker_func(struct work_struct *w)
+{
+	struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, g2h_worker);
+	bool ongoing;
+	int ret;
+
+	/*
+	 * Normal users must always hold mem_access.ref around CT calls. However
+	 * during the runtime pm callbacks we rely on CT to talk to the GuC, but
+	 * at this stage we can't rely on mem_access.ref and even the
+	 * callback_task will be different than current.  For such cases we just
+	 * need to ensure we always process the responses from any blocking
+	 * ct_send requests or where we otherwise expect some response when
+	 * initiated from those callbacks (which will need to wait for the below
+	 * dequeue_one_g2h()).  The dequeue_one_g2h() will gracefully fail if
+	 * the device has suspended to the point that the CT communication has
+	 * been disabled.
+	 *
+	 * If we are inside the runtime pm callback, we can be the only task
+	 * still issuing CT requests (since that requires having the
+	 * mem_access.ref).  It seems like it might in theory be possible to
+	 * receive unsolicited events from the GuC just as we are
+	 * suspending-resuming, but those will currently anyway be lost when
+	 * eventually exiting from suspend, hence no need to wake up the device
+	 * here. If we ever need something stronger than get_if_ongoing() then
+	 * we need to be careful with blocking the pm callbacks from getting CT
+	 * responses, if the worker here is blocked on those callbacks
+	 * completing, creating a deadlock.
+	 */
+	ongoing = xe_device_mem_access_get_if_ongoing(ct_to_xe(ct));
+	if (!ongoing && xe_pm_read_callback_task(ct_to_xe(ct)) == NULL)
+		return;
+
+	do {
+		mutex_lock(&ct->lock);
+		ret = dequeue_one_g2h(ct);
+		mutex_unlock(&ct->lock);
+
+		if (unlikely(ret == -EPROTO || ret == -EOPNOTSUPP)) {
+			struct drm_device *drm = &ct_to_xe(ct)->drm;
+			struct drm_printer p = drm_info_printer(drm->dev);
+
+			xe_guc_ct_print(ct, &p, false);
+			kick_reset(ct);
+		}
+	} while (ret == 1);
+
+	if (ongoing)
+		xe_device_mem_access_put(ct_to_xe(ct));
+}
+
+static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb,
+				     struct guc_ctb_snapshot *snapshot,
+				     bool atomic)
+{
+	u32 head, tail;
+
+	xe_map_memcpy_from(xe, &snapshot->desc, &ctb->desc, 0,
+			   sizeof(struct guc_ct_buffer_desc));
+	memcpy(&snapshot->info, &ctb->info, sizeof(struct guc_ctb_info));
+
+	snapshot->cmds = kmalloc_array(ctb->info.size, sizeof(u32),
+				       atomic ? GFP_ATOMIC : GFP_KERNEL);
+
+	if (!snapshot->cmds) {
+		drm_err(&xe->drm, "Skipping CTB commands snapshot. Only CTB info will be available.\n");
+		return;
+	}
+
+	head = snapshot->desc.head;
+	tail = snapshot->desc.tail;
+
+	if (head != tail) {
+		struct iosys_map map =
+			IOSYS_MAP_INIT_OFFSET(&ctb->cmds, head * sizeof(u32));
+
+		while (head != tail) {
+			snapshot->cmds[head] = xe_map_rd(xe, &map, 0, u32);
+			++head;
+			if (head == ctb->info.size) {
+				head = 0;
+				map = ctb->cmds;
+			} else {
+				iosys_map_incr(&map, sizeof(u32));
+			}
+		}
+	}
+}
+
+static void guc_ctb_snapshot_print(struct guc_ctb_snapshot *snapshot,
+				   struct drm_printer *p)
+{
+	u32 head, tail;
+
+	drm_printf(p, "\tsize: %d\n", snapshot->info.size);
+	drm_printf(p, "\tresv_space: %d\n", snapshot->info.resv_space);
+	drm_printf(p, "\thead: %d\n", snapshot->info.head);
+	drm_printf(p, "\ttail: %d\n", snapshot->info.tail);
+	drm_printf(p, "\tspace: %d\n", snapshot->info.space);
+	drm_printf(p, "\tbroken: %d\n", snapshot->info.broken);
+	drm_printf(p, "\thead (memory): %d\n", snapshot->desc.head);
+	drm_printf(p, "\ttail (memory): %d\n", snapshot->desc.tail);
+	drm_printf(p, "\tstatus (memory): 0x%x\n", snapshot->desc.status);
+
+	if (!snapshot->cmds)
+		return;
+
+	head = snapshot->desc.head;
+	tail = snapshot->desc.tail;
+
+	while (head != tail) {
+		drm_printf(p, "\tcmd[%d]: 0x%08x\n", head,
+			   snapshot->cmds[head]);
+		++head;
+		if (head == snapshot->info.size)
+			head = 0;
+	}
+}
+
+static void guc_ctb_snapshot_free(struct guc_ctb_snapshot *snapshot)
+{
+	kfree(snapshot->cmds);
+}
+
+/**
+ * xe_guc_ct_snapshot_capture - Take a quick snapshot of the CT state.
+ * @ct: GuC CT object.
+ * @atomic: Boolean to indicate if this is called from atomic context like
+ * reset or CTB handler or from some regular path like debugfs.
+ *
+ * This can be printed out in a later stage like during dev_coredump
+ * analysis.
+ *
+ * Returns: a GuC CT snapshot object that must be freed by the caller
+ * by using `xe_guc_ct_snapshot_free`.
+ */
+struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct,
+						      bool atomic)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	struct xe_guc_ct_snapshot *snapshot;
+
+	snapshot = kzalloc(sizeof(*snapshot),
+			   atomic ? GFP_ATOMIC : GFP_KERNEL);
+
+	if (!snapshot) {
+		drm_err(&xe->drm, "Skipping CTB snapshot entirely.\n");
+		return NULL;
+	}
+
+	if (ct->enabled) {
+		snapshot->ct_enabled = true;
+		snapshot->g2h_outstanding = READ_ONCE(ct->g2h_outstanding);
+		guc_ctb_snapshot_capture(xe, &ct->ctbs.h2g,
+					 &snapshot->h2g, atomic);
+		guc_ctb_snapshot_capture(xe, &ct->ctbs.g2h,
+					 &snapshot->g2h, atomic);
+	}
+
+	return snapshot;
+}
+
+/**
+ * xe_guc_ct_snapshot_print - Print out a given GuC CT snapshot.
+ * @snapshot: GuC CT snapshot object.
+ * @p: drm_printer where it will be printed out.
+ *
+ * This function prints out a given GuC CT snapshot object.
+ */
+void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
+			      struct drm_printer *p)
+{
+	if (!snapshot)
+		return;
+
+	if (snapshot->ct_enabled) {
+		drm_puts(p, "\nH2G CTB (all sizes in DW):\n");
+		guc_ctb_snapshot_print(&snapshot->h2g, p);
+
+		drm_puts(p, "\nG2H CTB (all sizes in DW):\n");
+		guc_ctb_snapshot_print(&snapshot->g2h, p);
+
+		drm_printf(p, "\tg2h outstanding: %d\n",
+			   snapshot->g2h_outstanding);
+	} else {
+		drm_puts(p, "\nCT disabled\n");
+	}
+}
+
+/**
+ * xe_guc_ct_snapshot_free - Free all allocated objects for a given snapshot.
+ * @snapshot: GuC CT snapshot object.
+ *
+ * This function free all the memory that needed to be allocated at capture
+ * time.
+ */
+void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot)
+{
+	if (!snapshot)
+		return;
+
+	guc_ctb_snapshot_free(&snapshot->h2g);
+	guc_ctb_snapshot_free(&snapshot->g2h);
+	kfree(snapshot);
+}
+
+/**
+ * xe_guc_ct_print - GuC CT Print.
+ * @ct: GuC CT.
+ * @p: drm_printer where it will be printed out.
+ * @atomic: Boolean to indicate if this is called from atomic context like
+ * reset or CTB handler or from some regular path like debugfs.
+ *
+ * This function quickly capture a snapshot and immediately print it out.
+ */
+void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool atomic)
+{
+	struct xe_guc_ct_snapshot *snapshot;
+
+	snapshot = xe_guc_ct_snapshot_capture(ct, atomic);
+	xe_guc_ct_snapshot_print(snapshot, p);
+	xe_guc_ct_snapshot_free(snapshot);
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h
new file mode 100644
index 000000000000..f15f8a4857e0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_ct.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_CT_H_
+#define _XE_GUC_CT_H_
+
+#include "xe_guc_ct_types.h"
+
+struct drm_printer;
+
+int xe_guc_ct_init(struct xe_guc_ct *ct);
+int xe_guc_ct_enable(struct xe_guc_ct *ct);
+void xe_guc_ct_disable(struct xe_guc_ct *ct);
+void xe_guc_ct_fast_path(struct xe_guc_ct *ct);
+
+struct xe_guc_ct_snapshot *
+xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct, bool atomic);
+void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
+			      struct drm_printer *p);
+void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot);
+void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool atomic);
+
+static inline void xe_guc_ct_irq_handler(struct xe_guc_ct *ct)
+{
+	wake_up_all(&ct->wq);
+	if (ct->enabled)
+		queue_work(system_unbound_wq, &ct->g2h_worker);
+	xe_guc_ct_fast_path(ct);
+}
+
+/* Basic CT send / receives */
+int xe_guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len,
+		   u32 g2h_len, u32 num_g2h);
+int xe_guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
+			  u32 g2h_len, u32 num_g2h);
+int xe_guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
+			u32 *response_buffer);
+static inline int
+xe_guc_ct_send_block(struct xe_guc_ct *ct, const u32 *action, u32 len)
+{
+	return xe_guc_ct_send_recv(ct, action, len, NULL);
+}
+
+/* This is only version of the send CT you can call from a G2H handler */
+int xe_guc_ct_send_g2h_handler(struct xe_guc_ct *ct, const u32 *action,
+			       u32 len);
+
+/* Can't fail because a GT reset is in progress */
+int xe_guc_ct_send_recv_no_fail(struct xe_guc_ct *ct, const u32 *action,
+				u32 len, u32 *response_buffer);
+static inline int
+xe_guc_ct_send_block_no_fail(struct xe_guc_ct *ct, const u32 *action, u32 len)
+{
+	return xe_guc_ct_send_recv_no_fail(ct, action, len, NULL);
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h
new file mode 100644
index 000000000000..d814d4ee3fc6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_CT_TYPES_H_
+#define _XE_GUC_CT_TYPES_H_
+
+#include <linux/interrupt.h>
+#include <linux/iosys-map.h>
+#include <linux/spinlock_types.h>
+#include <linux/wait.h>
+#include <linux/xarray.h>
+
+#include "abi/guc_communication_ctb_abi.h"
+
+struct xe_bo;
+
+/**
+ * struct guc_ctb_info - GuC command transport buffer (CTB) info
+ */
+struct guc_ctb_info {
+	/** @size: size of CTB commands (DW) */
+	u32 size;
+	/** @resv_space: reserved space of CTB commands (DW) */
+	u32 resv_space;
+	/** @head: head of CTB commands (DW) */
+	u32 head;
+	/** @tail: tail of CTB commands (DW) */
+	u32 tail;
+	/** @space: space in CTB commands (DW) */
+	u32 space;
+	/** @broken: channel broken */
+	bool broken;
+};
+
+/**
+ * struct guc_ctb - GuC command transport buffer (CTB)
+ */
+struct guc_ctb {
+	/** @desc: dma buffer map for CTB descriptor */
+	struct iosys_map desc;
+	/** @cmds: dma buffer map for CTB commands */
+	struct iosys_map cmds;
+	/** @info: CTB info */
+	struct guc_ctb_info info;
+};
+
+/**
+ * struct guc_ctb_snapshot - GuC command transport buffer (CTB) snapshot
+ */
+struct guc_ctb_snapshot {
+	/** @desc: snapshot of the CTB descriptor */
+	struct guc_ct_buffer_desc desc;
+	/** @cmds: snapshot of the CTB commands */
+	u32 *cmds;
+	/** @info: snapshot of the CTB info */
+	struct guc_ctb_info info;
+};
+
+/**
+ * struct xe_guc_ct_snapshot - GuC command transport (CT) snapshot
+ */
+struct xe_guc_ct_snapshot {
+	/** @ct_enabled: CT enabled info at capture time. */
+	bool ct_enabled;
+	/** @g2h_outstanding: G2H outstanding info at the capture time */
+	u32 g2h_outstanding;
+	/** @g2h: G2H CTB snapshot */
+	struct guc_ctb_snapshot g2h;
+	/** @h2g: H2G CTB snapshot */
+	struct guc_ctb_snapshot h2g;
+};
+
+/**
+ * struct xe_guc_ct - GuC command transport (CT) layer
+ *
+ * Includes a pair of CT buffers for bi-directional communication and tracking
+ * for the H2G and G2H requests sent and received through the buffers.
+ */
+struct xe_guc_ct {
+	/** @bo: XE BO for CT */
+	struct xe_bo *bo;
+	/** @lock: protects everything in CT layer */
+	struct mutex lock;
+	/** @fast_lock: protects G2H channel and credits */
+	spinlock_t fast_lock;
+	/** @ctbs: buffers for sending and receiving commands */
+	struct {
+		/** @send: Host to GuC (H2G, send) channel */
+		struct guc_ctb h2g;
+		/** @recv: GuC to Host (G2H, receive) channel */
+		struct guc_ctb g2h;
+	} ctbs;
+	/** @g2h_outstanding: number of outstanding G2H */
+	u32 g2h_outstanding;
+	/** @g2h_worker: worker to process G2H messages */
+	struct work_struct g2h_worker;
+	/** @enabled: CT enabled */
+	bool enabled;
+	/** @fence_seqno: G2H fence seqno - 16 bits used by CT */
+	u32 fence_seqno;
+	/** @fence_lookup: G2H fence lookup */
+	struct xarray fence_lookup;
+	/** @wq: wait queue used for reliable CT sends and freeing G2H credits */
+	wait_queue_head_t wq;
+	/** @g2h_fence_wq: wait queue used for G2H fencing */
+	wait_queue_head_t g2h_fence_wq;
+	/** @msg: Message buffer */
+	u32 msg[GUC_CTB_MSG_MAX_LEN];
+	/** @fast_msg: Message buffer */
+	u32 fast_msg[GUC_CTB_MSG_MAX_LEN];
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c
new file mode 100644
index 000000000000..ffd7d53bcc42
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_guc_debugfs.h"
+
+#include <drm/drm_debugfs.h>
+#include <drm/drm_managed.h>
+
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_guc.h"
+#include "xe_guc_ct.h"
+#include "xe_guc_log.h"
+#include "xe_macros.h"
+
+static struct xe_guc *node_to_guc(struct drm_info_node *node)
+{
+	return node->info_ent->data;
+}
+
+static int guc_info(struct seq_file *m, void *data)
+{
+	struct xe_guc *guc = node_to_guc(m->private);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_device_mem_access_get(xe);
+	xe_guc_print_info(guc, &p);
+	xe_device_mem_access_put(xe);
+
+	return 0;
+}
+
+static int guc_log(struct seq_file *m, void *data)
+{
+	struct xe_guc *guc = node_to_guc(m->private);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_device_mem_access_get(xe);
+	xe_guc_log_print(&guc->log, &p);
+	xe_device_mem_access_put(xe);
+
+	return 0;
+}
+
+static const struct drm_info_list debugfs_list[] = {
+	{"guc_info", guc_info, 0},
+	{"guc_log", guc_log, 0},
+};
+
+void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent)
+{
+	struct drm_minor *minor = guc_to_xe(guc)->drm.primary;
+	struct drm_info_list *local;
+	int i;
+
+#define DEBUGFS_SIZE	(ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list))
+	local = drmm_kmalloc(&guc_to_xe(guc)->drm, DEBUGFS_SIZE, GFP_KERNEL);
+	if (!local)
+		return;
+
+	memcpy(local, debugfs_list, DEBUGFS_SIZE);
+#undef DEBUGFS_SIZE
+
+	for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i)
+		local[i].data = guc;
+
+	drm_debugfs_create_files(local,
+				 ARRAY_SIZE(debugfs_list),
+				 parent, minor);
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.h b/drivers/gpu/drm/xe/xe_guc_debugfs.h
new file mode 100644
index 000000000000..4756dff26fca
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_debugfs.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_DEBUGFS_H_
+#define _XE_GUC_DEBUGFS_H_
+
+struct dentry;
+struct xe_guc;
+
+void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h
new file mode 100644
index 000000000000..4c39f01e4f52
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_ENGINE_TYPES_H_
+#define _XE_GUC_ENGINE_TYPES_H_
+
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+
+#include "xe_gpu_scheduler_types.h"
+
+struct dma_fence;
+struct xe_exec_queue;
+
+/**
+ * struct xe_guc_exec_queue - GuC specific state for an xe_exec_queue
+ */
+struct xe_guc_exec_queue {
+	/** @q: Backpointer to parent xe_exec_queue */
+	struct xe_exec_queue *q;
+	/** @sched: GPU scheduler for this xe_exec_queue */
+	struct xe_gpu_scheduler sched;
+	/** @entity: Scheduler entity for this xe_exec_queue */
+	struct xe_sched_entity entity;
+	/**
+	 * @static_msgs: Static messages for this xe_exec_queue, used when
+	 * a message needs to sent through the GPU scheduler but memory
+	 * allocations are not allowed.
+	 */
+#define MAX_STATIC_MSG_TYPE	3
+	struct xe_sched_msg static_msgs[MAX_STATIC_MSG_TYPE];
+	/** @lr_tdr: long running TDR worker */
+	struct work_struct lr_tdr;
+	/** @fini_async: do final fini async from this worker */
+	struct work_struct fini_async;
+	/** @resume_time: time of last resume */
+	u64 resume_time;
+	/** @state: GuC specific state for this xe_exec_queue */
+	atomic_t state;
+	/** @wqi_head: work queue item tail */
+	u32 wqi_head;
+	/** @wqi_tail: work queue item tail */
+	u32 wqi_tail;
+	/** @id: GuC id for this exec_queue */
+	u16 id;
+	/** @suspend_wait: wait queue used to wait on pending suspends */
+	wait_queue_head_t suspend_wait;
+	/** @suspend_pending: a suspend of the exec_queue is pending */
+	bool suspend_pending;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h
new file mode 100644
index 000000000000..4dd5a88a7826
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_fwif.h
@@ -0,0 +1,361 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_FWIF_H
+#define _XE_GUC_FWIF_H
+
+#include <linux/bits.h>
+
+#include "abi/guc_klvs_abi.h"
+
+#define G2H_LEN_DW_SCHED_CONTEXT_MODE_SET	4
+#define G2H_LEN_DW_DEREGISTER_CONTEXT		3
+#define G2H_LEN_DW_TLB_INVALIDATE		3
+
+#define GUC_CONTEXT_DISABLE		0
+#define GUC_CONTEXT_ENABLE		1
+
+#define GUC_CLIENT_PRIORITY_KMD_HIGH	0
+#define GUC_CLIENT_PRIORITY_HIGH	1
+#define GUC_CLIENT_PRIORITY_KMD_NORMAL	2
+#define GUC_CLIENT_PRIORITY_NORMAL	3
+#define GUC_CLIENT_PRIORITY_NUM		4
+
+#define GUC_RENDER_ENGINE		0
+#define GUC_VIDEO_ENGINE		1
+#define GUC_BLITTER_ENGINE		2
+#define GUC_VIDEOENHANCE_ENGINE		3
+#define GUC_VIDEO_ENGINE2		4
+#define GUC_MAX_ENGINES_NUM		(GUC_VIDEO_ENGINE2 + 1)
+
+#define GUC_RENDER_CLASS		0
+#define GUC_VIDEO_CLASS			1
+#define GUC_VIDEOENHANCE_CLASS		2
+#define GUC_BLITTER_CLASS		3
+#define GUC_COMPUTE_CLASS		4
+#define GUC_GSC_OTHER_CLASS		5
+#define GUC_LAST_ENGINE_CLASS		GUC_GSC_OTHER_CLASS
+#define GUC_MAX_ENGINE_CLASSES		16
+#define GUC_MAX_INSTANCES_PER_CLASS	32
+
+/* Helper for context registration H2G */
+struct guc_ctxt_registration_info {
+	u32 flags;
+	u32 context_idx;
+	u32 engine_class;
+	u32 engine_submit_mask;
+	u32 wq_desc_lo;
+	u32 wq_desc_hi;
+	u32 wq_base_lo;
+	u32 wq_base_hi;
+	u32 wq_size;
+	u32 hwlrca_lo;
+	u32 hwlrca_hi;
+};
+#define CONTEXT_REGISTRATION_FLAG_KMD	BIT(0)
+
+/* 32-bit KLV structure as used by policy updates and others */
+struct guc_klv_generic_dw_t {
+	u32 kl;
+	u32 value;
+} __packed;
+
+/* Format of the UPDATE_CONTEXT_POLICIES H2G data packet */
+struct guc_update_exec_queue_policy_header {
+	u32 action;
+	u32 guc_id;
+} __packed;
+
+struct guc_update_exec_queue_policy {
+	struct guc_update_exec_queue_policy_header header;
+	struct guc_klv_generic_dw_t klv[GUC_CONTEXT_POLICIES_KLV_NUM_IDS];
+} __packed;
+
+/* GUC_CTL_* - Parameters for loading the GuC */
+#define GUC_CTL_LOG_PARAMS		0
+#define   GUC_LOG_VALID			BIT(0)
+#define   GUC_LOG_NOTIFY_ON_HALF_FULL	BIT(1)
+#define   GUC_LOG_CAPTURE_ALLOC_UNITS	BIT(2)
+#define   GUC_LOG_LOG_ALLOC_UNITS	BIT(3)
+#define   GUC_LOG_CRASH_SHIFT		4
+#define   GUC_LOG_CRASH_MASK		(0x3 << GUC_LOG_CRASH_SHIFT)
+#define   GUC_LOG_DEBUG_SHIFT		6
+#define   GUC_LOG_DEBUG_MASK	        (0xF << GUC_LOG_DEBUG_SHIFT)
+#define   GUC_LOG_CAPTURE_SHIFT		10
+#define   GUC_LOG_CAPTURE_MASK	        (0x3 << GUC_LOG_CAPTURE_SHIFT)
+#define   GUC_LOG_BUF_ADDR_SHIFT	12
+
+#define GUC_CTL_WA			1
+#define   GUC_WA_GAM_CREDITS		BIT(10)
+#define   GUC_WA_DUAL_QUEUE		BIT(11)
+#define   GUC_WA_RCS_RESET_BEFORE_RC6	BIT(13)
+#define   GUC_WA_CONTEXT_ISOLATION	BIT(15)
+#define   GUC_WA_PRE_PARSER		BIT(14)
+#define   GUC_WA_HOLD_CCS_SWITCHOUT	BIT(17)
+#define   GUC_WA_POLLCS			BIT(18)
+#define   GUC_WA_RENDER_RST_RC6_EXIT	BIT(19)
+#define   GUC_WA_RCS_REGS_IN_CCS_REGS_LIST	BIT(21)
+
+#define GUC_CTL_FEATURE			2
+#define   GUC_CTL_ENABLE_SLPC		BIT(2)
+#define   GUC_CTL_DISABLE_SCHEDULER	BIT(14)
+
+#define GUC_CTL_DEBUG			3
+#define   GUC_LOG_VERBOSITY_SHIFT	0
+#define   GUC_LOG_VERBOSITY_LOW		(0 << GUC_LOG_VERBOSITY_SHIFT)
+#define   GUC_LOG_VERBOSITY_MED		(1 << GUC_LOG_VERBOSITY_SHIFT)
+#define   GUC_LOG_VERBOSITY_HIGH	(2 << GUC_LOG_VERBOSITY_SHIFT)
+#define   GUC_LOG_VERBOSITY_ULTRA	(3 << GUC_LOG_VERBOSITY_SHIFT)
+#define	  GUC_LOG_VERBOSITY_MIN		0
+#define	  GUC_LOG_VERBOSITY_MAX		3
+#define	  GUC_LOG_VERBOSITY_MASK	0x0000000f
+#define	  GUC_LOG_DESTINATION_MASK	(3 << 4)
+#define   GUC_LOG_DISABLED		(1 << 6)
+#define   GUC_PROFILE_ENABLED		(1 << 7)
+
+#define GUC_CTL_ADS			4
+#define   GUC_ADS_ADDR_SHIFT		1
+#define   GUC_ADS_ADDR_MASK		(0xFFFFF << GUC_ADS_ADDR_SHIFT)
+
+#define GUC_CTL_DEVID			5
+
+#define GUC_CTL_MAX_DWORDS		14
+
+/* Scheduling policy settings */
+
+#define GLOBAL_POLICY_MAX_NUM_WI 15
+
+/* Don't reset an engine upon preemption failure */
+#define GLOBAL_POLICY_DISABLE_ENGINE_RESET				BIT(0)
+
+#define GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US 500000
+
+struct guc_policies {
+	u32 submission_queue_depth[GUC_MAX_ENGINE_CLASSES];
+	/*
+	 * In micro seconds. How much time to allow before DPC processing is
+	 * called back via interrupt (to prevent DPC queue drain starving).
+	 * Typically 1000s of micro seconds (example only, not granularity).
+	 */
+	u32 dpc_promote_time;
+
+	/* Must be set to take these new values. */
+	u32 is_valid;
+
+	/*
+	 * Max number of WIs to process per call. A large value may keep CS
+	 * idle.
+	 */
+	u32 max_num_work_items;
+
+	u32 global_flags;
+	u32 reserved[4];
+} __packed;
+
+/* GuC MMIO reg state struct */
+struct guc_mmio_reg {
+	u32 offset;
+	u32 value;
+	u32 flags;
+	u32 mask;
+#define GUC_REGSET_MASKED		BIT(0)
+#define GUC_REGSET_MASKED_WITH_VALUE	BIT(2)
+#define GUC_REGSET_RESTORE_ONLY		BIT(3)
+} __packed;
+
+/* GuC register sets */
+struct guc_mmio_reg_set {
+	u32 address;
+	u16 count;
+	u16 reserved;
+} __packed;
+
+/* Generic GT SysInfo data types */
+#define GUC_GENERIC_GT_SYSINFO_SLICE_ENABLED		0
+#define GUC_GENERIC_GT_SYSINFO_VDBOX_SFC_SUPPORT_MASK	1
+#define GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI	2
+#define GUC_GENERIC_GT_SYSINFO_MAX			16
+
+/* HW info */
+struct guc_gt_system_info {
+	u8 mapping_table[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
+	u32 engine_enabled_masks[GUC_MAX_ENGINE_CLASSES];
+	u32 generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_MAX];
+} __packed;
+
+enum {
+	GUC_CAPTURE_LIST_INDEX_PF = 0,
+	GUC_CAPTURE_LIST_INDEX_VF = 1,
+	GUC_CAPTURE_LIST_INDEX_MAX = 2,
+};
+
+/* GuC Additional Data Struct */
+struct guc_ads {
+	struct guc_mmio_reg_set reg_state_list[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
+	u32 reserved0;
+	u32 scheduler_policies;
+	u32 gt_system_info;
+	u32 reserved1;
+	u32 control_data;
+	u32 golden_context_lrca[GUC_MAX_ENGINE_CLASSES];
+	u32 eng_state_size[GUC_MAX_ENGINE_CLASSES];
+	u32 private_data;
+	u32 um_init_data;
+	u32 capture_instance[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES];
+	u32 capture_class[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES];
+	u32 capture_global[GUC_CAPTURE_LIST_INDEX_MAX];
+	u32 reserved[14];
+} __packed;
+
+/* Engine usage stats */
+struct guc_engine_usage_record {
+	u32 current_context_index;
+	u32 last_switch_in_stamp;
+	u32 reserved0;
+	u32 total_runtime;
+	u32 reserved1[4];
+} __packed;
+
+struct guc_engine_usage {
+	struct guc_engine_usage_record engines[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
+} __packed;
+
+/* This action will be programmed in C1BC - SOFT_SCRATCH_15_REG */
+enum xe_guc_recv_message {
+	XE_GUC_RECV_MSG_CRASH_DUMP_POSTED = BIT(1),
+	XE_GUC_RECV_MSG_EXCEPTION = BIT(30),
+};
+
+/* Page fault structures */
+struct access_counter_desc {
+	u32 dw0;
+#define ACCESS_COUNTER_TYPE	BIT(0)
+#define ACCESS_COUNTER_SUBG_LO	GENMASK(31, 1)
+
+	u32 dw1;
+#define ACCESS_COUNTER_SUBG_HI	BIT(0)
+#define ACCESS_COUNTER_RSVD0	GENMASK(2, 1)
+#define ACCESS_COUNTER_ENG_INSTANCE	GENMASK(8, 3)
+#define ACCESS_COUNTER_ENG_CLASS	GENMASK(11, 9)
+#define ACCESS_COUNTER_ASID	GENMASK(31, 12)
+
+	u32 dw2;
+#define ACCESS_COUNTER_VFID	GENMASK(5, 0)
+#define ACCESS_COUNTER_RSVD1	GENMASK(7, 6)
+#define ACCESS_COUNTER_GRANULARITY	GENMASK(10, 8)
+#define ACCESS_COUNTER_RSVD2	GENMASK(16, 11)
+#define ACCESS_COUNTER_VIRTUAL_ADDR_RANGE_LO	GENMASK(31, 17)
+
+	u32 dw3;
+#define ACCESS_COUNTER_VIRTUAL_ADDR_RANGE_HI	GENMASK(31, 0)
+} __packed;
+
+enum guc_um_queue_type {
+	GUC_UM_HW_QUEUE_PAGE_FAULT = 0,
+	GUC_UM_HW_QUEUE_PAGE_FAULT_RESPONSE,
+	GUC_UM_HW_QUEUE_ACCESS_COUNTER,
+	GUC_UM_HW_QUEUE_MAX
+};
+
+struct guc_um_queue_params {
+	u64 base_dpa;
+	u32 base_ggtt_address;
+	u32 size_in_bytes;
+	u32 rsvd[4];
+} __packed;
+
+struct guc_um_init_params {
+	u64 page_response_timeout_in_us;
+	u32 rsvd[6];
+	struct guc_um_queue_params queue_params[GUC_UM_HW_QUEUE_MAX];
+} __packed;
+
+enum xe_guc_fault_reply_type {
+	PFR_ACCESS = 0,
+	PFR_ENGINE,
+	PFR_VFID,
+	PFR_ALL,
+	PFR_INVALID
+};
+
+enum xe_guc_response_desc_type {
+	TLB_INVALIDATION_DESC = 0,
+	FAULT_RESPONSE_DESC
+};
+
+struct xe_guc_pagefault_desc {
+	u32 dw0;
+#define PFD_FAULT_LEVEL		GENMASK(2, 0)
+#define PFD_SRC_ID		GENMASK(10, 3)
+#define PFD_RSVD_0		GENMASK(17, 11)
+#define XE2_PFD_TRVA_FAULT	BIT(18)
+#define PFD_ENG_INSTANCE	GENMASK(24, 19)
+#define PFD_ENG_CLASS		GENMASK(27, 25)
+#define PFD_PDATA_LO		GENMASK(31, 28)
+
+	u32 dw1;
+#define PFD_PDATA_HI		GENMASK(11, 0)
+#define PFD_PDATA_HI_SHIFT	4
+#define PFD_ASID		GENMASK(31, 12)
+
+	u32 dw2;
+#define PFD_ACCESS_TYPE		GENMASK(1, 0)
+#define PFD_FAULT_TYPE		GENMASK(3, 2)
+#define PFD_VFID		GENMASK(9, 4)
+#define PFD_RSVD_1		GENMASK(11, 10)
+#define PFD_VIRTUAL_ADDR_LO	GENMASK(31, 12)
+#define PFD_VIRTUAL_ADDR_LO_SHIFT 12
+
+	u32 dw3;
+#define PFD_VIRTUAL_ADDR_HI	GENMASK(31, 0)
+#define PFD_VIRTUAL_ADDR_HI_SHIFT 32
+} __packed;
+
+struct xe_guc_pagefault_reply {
+	u32 dw0;
+#define PFR_VALID		BIT(0)
+#define PFR_SUCCESS		BIT(1)
+#define PFR_REPLY		GENMASK(4, 2)
+#define PFR_RSVD_0		GENMASK(9, 5)
+#define PFR_DESC_TYPE		GENMASK(11, 10)
+#define PFR_ASID		GENMASK(31, 12)
+
+	u32 dw1;
+#define PFR_VFID		GENMASK(5, 0)
+#define PFR_RSVD_1		BIT(6)
+#define PFR_ENG_INSTANCE	GENMASK(12, 7)
+#define PFR_ENG_CLASS		GENMASK(15, 13)
+#define PFR_PDATA		GENMASK(31, 16)
+
+	u32 dw2;
+#define PFR_RSVD_2		GENMASK(31, 0)
+} __packed;
+
+struct xe_guc_acc_desc {
+	u32 dw0;
+#define ACC_TYPE	BIT(0)
+#define ACC_TRIGGER	0
+#define ACC_NOTIFY	1
+#define ACC_SUBG_LO	GENMASK(31, 1)
+
+	u32 dw1;
+#define ACC_SUBG_HI	BIT(0)
+#define ACC_RSVD0	GENMASK(2, 1)
+#define ACC_ENG_INSTANCE	GENMASK(8, 3)
+#define ACC_ENG_CLASS	GENMASK(11, 9)
+#define ACC_ASID	GENMASK(31, 12)
+
+	u32 dw2;
+#define ACC_VFID	GENMASK(5, 0)
+#define ACC_RSVD1	GENMASK(7, 6)
+#define ACC_GRANULARITY	GENMASK(10, 8)
+#define ACC_RSVD2	GENMASK(16, 11)
+#define ACC_VIRTUAL_ADDR_RANGE_LO	GENMASK(31, 17)
+
+	u32 dw3;
+#define ACC_VIRTUAL_ADDR_RANGE_HI	GENMASK(31, 0)
+} __packed;
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
new file mode 100644
index 000000000000..2a13a00917f8
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_guc_hwconfig.h"
+
+#include <drm/drm_managed.h>
+
+#include "abi/guc_actions_abi.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_guc.h"
+#include "xe_map.h"
+
+static int send_get_hwconfig(struct xe_guc *guc, u32 ggtt_addr, u32 size)
+{
+	u32 action[] = {
+		XE_GUC_ACTION_GET_HWCONFIG,
+		lower_32_bits(ggtt_addr),
+		upper_32_bits(ggtt_addr),
+		size,
+	};
+
+	return xe_guc_mmio_send(guc, action, ARRAY_SIZE(action));
+}
+
+static int guc_hwconfig_size(struct xe_guc *guc, u32 *size)
+{
+	int ret = send_get_hwconfig(guc, 0, 0);
+
+	if (ret < 0)
+		return ret;
+
+	*size = ret;
+	return 0;
+}
+
+static int guc_hwconfig_copy(struct xe_guc *guc)
+{
+	int ret = send_get_hwconfig(guc, xe_bo_ggtt_addr(guc->hwconfig.bo),
+				    guc->hwconfig.size);
+
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+int xe_guc_hwconfig_init(struct xe_guc *guc)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_gt *gt = guc_to_gt(guc);
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_bo *bo;
+	u32 size;
+	int err;
+
+	/* Initialization already done */
+	if (guc->hwconfig.bo)
+		return 0;
+
+	/*
+	 * All hwconfig the same across GTs so only GT0 needs to be configured
+	 */
+	if (gt->info.id != XE_GT0)
+		return 0;
+
+	/* ADL_P, DG2+ supports hwconfig table */
+	if (GRAPHICS_VERx100(xe) < 1255 && xe->info.platform != XE_ALDERLAKE_P)
+		return 0;
+
+	err = guc_hwconfig_size(guc, &size);
+	if (err)
+		return err;
+	if (!size)
+		return -EINVAL;
+
+	bo = xe_managed_bo_create_pin_map(xe, tile, PAGE_ALIGN(size),
+					  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+					  XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+	guc->hwconfig.bo = bo;
+	guc->hwconfig.size = size;
+
+	return guc_hwconfig_copy(guc);
+}
+
+u32 xe_guc_hwconfig_size(struct xe_guc *guc)
+{
+	return !guc->hwconfig.bo ? 0 : guc->hwconfig.size;
+}
+
+void xe_guc_hwconfig_copy(struct xe_guc *guc, void *dst)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+
+	XE_WARN_ON(!guc->hwconfig.bo);
+
+	xe_map_memcpy_from(xe, dst, &guc->hwconfig.bo->vmap, 0,
+			   guc->hwconfig.size);
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.h b/drivers/gpu/drm/xe/xe_guc_hwconfig.h
new file mode 100644
index 000000000000..b5794d641900
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_HWCONFIG_H_
+#define _XE_GUC_HWCONFIG_H_
+
+#include <linux/types.h>
+
+struct xe_guc;
+
+int xe_guc_hwconfig_init(struct xe_guc *guc);
+u32 xe_guc_hwconfig_size(struct xe_guc *guc);
+void xe_guc_hwconfig_copy(struct xe_guc *guc, void *dst);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c
new file mode 100644
index 000000000000..bcd2f4d34081
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_log.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_guc_log.h"
+
+#include <drm/drm_managed.h>
+
+#include "xe_bo.h"
+#include "xe_gt.h"
+#include "xe_map.h"
+#include "xe_module.h"
+
+static struct xe_gt *
+log_to_gt(struct xe_guc_log *log)
+{
+	return container_of(log, struct xe_gt, uc.guc.log);
+}
+
+static struct xe_device *
+log_to_xe(struct xe_guc_log *log)
+{
+	return gt_to_xe(log_to_gt(log));
+}
+
+static size_t guc_log_size(void)
+{
+	/*
+	 *  GuC Log buffer Layout
+	 *
+	 *  +===============================+ 00B
+	 *  |    Crash dump state header    |
+	 *  +-------------------------------+ 32B
+	 *  |      Debug state header       |
+	 *  +-------------------------------+ 64B
+	 *  |     Capture state header      |
+	 *  +-------------------------------+ 96B
+	 *  |                               |
+	 *  +===============================+ PAGE_SIZE (4KB)
+	 *  |        Crash Dump logs        |
+	 *  +===============================+ + CRASH_SIZE
+	 *  |          Debug logs           |
+	 *  +===============================+ + DEBUG_SIZE
+	 *  |         Capture logs          |
+	 *  +===============================+ + CAPTURE_SIZE
+	 */
+	return PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE +
+		CAPTURE_BUFFER_SIZE;
+}
+
+void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p)
+{
+	struct xe_device *xe = log_to_xe(log);
+	size_t size;
+	int i, j;
+
+	xe_assert(xe, log->bo);
+
+	size = log->bo->size;
+
+#define DW_PER_READ		128
+	xe_assert(xe, !(size % (DW_PER_READ * sizeof(u32))));
+	for (i = 0; i < size / sizeof(u32); i += DW_PER_READ) {
+		u32 read[DW_PER_READ];
+
+		xe_map_memcpy_from(xe, read, &log->bo->vmap, i * sizeof(u32),
+				   DW_PER_READ * sizeof(u32));
+#define DW_PER_PRINT		4
+		for (j = 0; j < DW_PER_READ / DW_PER_PRINT; ++j) {
+			u32 *print = read + j * DW_PER_PRINT;
+
+			drm_printf(p, "0x%08x 0x%08x 0x%08x 0x%08x\n",
+				   *(print + 0), *(print + 1),
+				   *(print + 2), *(print + 3));
+		}
+	}
+}
+
+int xe_guc_log_init(struct xe_guc_log *log)
+{
+	struct xe_device *xe = log_to_xe(log);
+	struct xe_tile *tile = gt_to_tile(log_to_gt(log));
+	struct xe_bo *bo;
+
+	bo = xe_managed_bo_create_pin_map(xe, tile, guc_log_size(),
+					  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+					  XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	xe_map_memset(xe, &bo->vmap, 0, 0, guc_log_size());
+	log->bo = bo;
+	log->level = xe_modparam.guc_log_level;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_log.h b/drivers/gpu/drm/xe/xe_guc_log.h
new file mode 100644
index 000000000000..2d25ab28b4b3
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_log.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_LOG_H_
+#define _XE_GUC_LOG_H_
+
+#include "xe_guc_log_types.h"
+
+struct drm_printer;
+
+#if IS_ENABLED(CONFIG_DRM_XE_LARGE_GUC_BUFFER)
+#define CRASH_BUFFER_SIZE       SZ_1M
+#define DEBUG_BUFFER_SIZE       SZ_8M
+#define CAPTURE_BUFFER_SIZE     SZ_2M
+#else
+#define CRASH_BUFFER_SIZE	SZ_8K
+#define DEBUG_BUFFER_SIZE	SZ_64K
+#define CAPTURE_BUFFER_SIZE	SZ_16K
+#endif
+/*
+ * While we're using plain log level in i915, GuC controls are much more...
+ * "elaborate"? We have a couple of bits for verbosity, separate bit for actual
+ * log enabling, and separate bit for default logging - which "conveniently"
+ * ignores the enable bit.
+ */
+#define GUC_LOG_LEVEL_DISABLED		0
+#define GUC_LOG_LEVEL_NON_VERBOSE	1
+#define GUC_LOG_LEVEL_IS_ENABLED(x)	((x) > GUC_LOG_LEVEL_DISABLED)
+#define GUC_LOG_LEVEL_IS_VERBOSE(x)	((x) > GUC_LOG_LEVEL_NON_VERBOSE)
+#define GUC_LOG_LEVEL_TO_VERBOSITY(x) ({		\
+	typeof(x) _x = (x);				\
+	GUC_LOG_LEVEL_IS_VERBOSE(_x) ? _x - 2 : 0;	\
+})
+#define GUC_VERBOSITY_TO_LOG_LEVEL(x)	((x) + 2)
+#define GUC_LOG_LEVEL_MAX GUC_VERBOSITY_TO_LOG_LEVEL(GUC_LOG_VERBOSITY_MAX)
+
+int xe_guc_log_init(struct xe_guc_log *log);
+void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p);
+
+static inline u32
+xe_guc_log_get_level(struct xe_guc_log *log)
+{
+	return log->level;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_log_types.h b/drivers/gpu/drm/xe/xe_guc_log_types.h
new file mode 100644
index 000000000000..125080d138a7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_log_types.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_LOG_TYPES_H_
+#define _XE_GUC_LOG_TYPES_H_
+
+#include <linux/types.h>
+
+struct xe_bo;
+
+/**
+ * struct xe_guc_log - GuC log
+ */
+struct xe_guc_log {
+	/** @level: GuC log level */
+	u32 level;
+	/** @bo: XE BO for GuC log */
+	struct xe_bo *bo;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
new file mode 100644
index 000000000000..d91702592520
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -0,0 +1,1002 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_guc_pc.h"
+
+#include <linux/delay.h>
+
+#include <drm/drm_managed.h>
+
+#include "abi/guc_actions_abi.h"
+#include "abi/guc_actions_slpc_abi.h"
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_regs.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_idle.h"
+#include "xe_gt_sysfs.h"
+#include "xe_gt_types.h"
+#include "xe_guc_ct.h"
+#include "xe_map.h"
+#include "xe_mmio.h"
+#include "xe_pcode.h"
+
+#define MCHBAR_MIRROR_BASE_SNB	0x140000
+
+#define RP_STATE_CAP		XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5998)
+#define   RP0_MASK		REG_GENMASK(7, 0)
+#define   RP1_MASK		REG_GENMASK(15, 8)
+#define   RPN_MASK		REG_GENMASK(23, 16)
+
+#define FREQ_INFO_REC	XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5ef0)
+#define   RPE_MASK		REG_GENMASK(15, 8)
+
+#define GT_PERF_STATUS		XE_REG(0x1381b4)
+#define   CAGF_MASK	REG_GENMASK(19, 11)
+
+#define GT_FREQUENCY_MULTIPLIER	50
+#define GT_FREQUENCY_SCALER	3
+
+/**
+ * DOC: GuC Power Conservation (PC)
+ *
+ * GuC Power Conservation (PC) supports multiple features for the most
+ * efficient and performing use of the GT when GuC submission is enabled,
+ * including frequency management, Render-C states management, and various
+ * algorithms for power balancing.
+ *
+ * Single Loop Power Conservation (SLPC) is the name given to the suite of
+ * connected power conservation features in the GuC firmware. The firmware
+ * exposes a programming interface to the host for the control of SLPC.
+ *
+ * Frequency management:
+ * =====================
+ *
+ * Xe driver enables SLPC with all of its defaults features and frequency
+ * selection, which varies per platform.
+ *
+ * Render-C States:
+ * ================
+ *
+ * Render-C states is also a GuC PC feature that is now enabled in Xe for
+ * all platforms.
+ *
+ */
+
+static struct xe_guc *
+pc_to_guc(struct xe_guc_pc *pc)
+{
+	return container_of(pc, struct xe_guc, pc);
+}
+
+static struct xe_device *
+pc_to_xe(struct xe_guc_pc *pc)
+{
+	struct xe_guc *guc = pc_to_guc(pc);
+	struct xe_gt *gt = container_of(guc, struct xe_gt, uc.guc);
+
+	return gt_to_xe(gt);
+}
+
+static struct xe_gt *
+pc_to_gt(struct xe_guc_pc *pc)
+{
+	return container_of(pc, struct xe_gt, uc.guc.pc);
+}
+
+static struct iosys_map *
+pc_to_maps(struct xe_guc_pc *pc)
+{
+	return &pc->bo->vmap;
+}
+
+#define slpc_shared_data_read(pc_, field_) \
+	xe_map_rd_field(pc_to_xe(pc_), pc_to_maps(pc_), 0, \
+			struct slpc_shared_data, field_)
+
+#define slpc_shared_data_write(pc_, field_, val_) \
+	xe_map_wr_field(pc_to_xe(pc_), pc_to_maps(pc_), 0, \
+			struct slpc_shared_data, field_, val_)
+
+#define SLPC_EVENT(id, count) \
+	(FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID, id) | \
+	 FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC, count))
+
+static int wait_for_pc_state(struct xe_guc_pc *pc,
+			     enum slpc_global_state state)
+{
+	int timeout_us = 5000; /* rought 5ms, but no need for precision */
+	int slept, wait = 10;
+
+	xe_device_assert_mem_access(pc_to_xe(pc));
+
+	for (slept = 0; slept < timeout_us;) {
+		if (slpc_shared_data_read(pc, header.global_state) == state)
+			return 0;
+
+		usleep_range(wait, wait << 1);
+		slept += wait;
+		wait <<= 1;
+		if (slept + wait > timeout_us)
+			wait = timeout_us - slept;
+	}
+
+	return -ETIMEDOUT;
+}
+
+static int pc_action_reset(struct xe_guc_pc *pc)
+{
+	struct  xe_guc_ct *ct = &pc_to_guc(pc)->ct;
+	int ret;
+	u32 action[] = {
+		GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
+		SLPC_EVENT(SLPC_EVENT_RESET, 2),
+		xe_bo_ggtt_addr(pc->bo),
+		0,
+	};
+
+	ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
+	if (ret)
+		drm_err(&pc_to_xe(pc)->drm, "GuC PC reset: %pe", ERR_PTR(ret));
+
+	return ret;
+}
+
+static int pc_action_shutdown(struct xe_guc_pc *pc)
+{
+	struct  xe_guc_ct *ct = &pc_to_guc(pc)->ct;
+	int ret;
+	u32 action[] = {
+		GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
+		SLPC_EVENT(SLPC_EVENT_SHUTDOWN, 2),
+		xe_bo_ggtt_addr(pc->bo),
+		0,
+	};
+
+	ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
+	if (ret)
+		drm_err(&pc_to_xe(pc)->drm, "GuC PC shutdown %pe",
+			ERR_PTR(ret));
+
+	return ret;
+}
+
+static int pc_action_query_task_state(struct xe_guc_pc *pc)
+{
+	struct xe_guc_ct *ct = &pc_to_guc(pc)->ct;
+	int ret;
+	u32 action[] = {
+		GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
+		SLPC_EVENT(SLPC_EVENT_QUERY_TASK_STATE, 2),
+		xe_bo_ggtt_addr(pc->bo),
+		0,
+	};
+
+	if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING))
+		return -EAGAIN;
+
+	/* Blocking here to ensure the results are ready before reading them */
+	ret = xe_guc_ct_send_block(ct, action, ARRAY_SIZE(action));
+	if (ret)
+		drm_err(&pc_to_xe(pc)->drm,
+			"GuC PC query task state failed: %pe", ERR_PTR(ret));
+
+	return ret;
+}
+
+static int pc_action_set_param(struct xe_guc_pc *pc, u8 id, u32 value)
+{
+	struct xe_guc_ct *ct = &pc_to_guc(pc)->ct;
+	int ret;
+	u32 action[] = {
+		GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
+		SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2),
+		id,
+		value,
+	};
+
+	if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING))
+		return -EAGAIN;
+
+	ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
+	if (ret)
+		drm_err(&pc_to_xe(pc)->drm, "GuC PC set param failed: %pe",
+			ERR_PTR(ret));
+
+	return ret;
+}
+
+static int pc_action_setup_gucrc(struct xe_guc_pc *pc, u32 mode)
+{
+	struct xe_guc_ct *ct = &pc_to_guc(pc)->ct;
+	u32 action[] = {
+		XE_GUC_ACTION_SETUP_PC_GUCRC,
+		mode,
+	};
+	int ret;
+
+	ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
+	if (ret)
+		drm_err(&pc_to_xe(pc)->drm, "GuC RC enable failed: %pe",
+			ERR_PTR(ret));
+	return ret;
+}
+
+static u32 decode_freq(u32 raw)
+{
+	return DIV_ROUND_CLOSEST(raw * GT_FREQUENCY_MULTIPLIER,
+				 GT_FREQUENCY_SCALER);
+}
+
+static u32 encode_freq(u32 freq)
+{
+	return DIV_ROUND_CLOSEST(freq * GT_FREQUENCY_SCALER,
+				 GT_FREQUENCY_MULTIPLIER);
+}
+
+static u32 pc_get_min_freq(struct xe_guc_pc *pc)
+{
+	u32 freq;
+
+	freq = FIELD_GET(SLPC_MIN_UNSLICE_FREQ_MASK,
+			 slpc_shared_data_read(pc, task_state_data.freq));
+
+	return decode_freq(freq);
+}
+
+static void pc_set_manual_rp_ctrl(struct xe_guc_pc *pc, bool enable)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	u32 state = enable ? RPSWCTL_ENABLE : RPSWCTL_DISABLE;
+
+	/* Allow/Disallow punit to process software freq requests */
+	xe_mmio_write32(gt, RP_CONTROL, state);
+}
+
+static void pc_set_cur_freq(struct xe_guc_pc *pc, u32 freq)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	u32 rpnswreq;
+
+	pc_set_manual_rp_ctrl(pc, true);
+
+	/* Req freq is in units of 16.66 Mhz */
+	rpnswreq = REG_FIELD_PREP(REQ_RATIO_MASK, encode_freq(freq));
+	xe_mmio_write32(gt, RPNSWREQ, rpnswreq);
+
+	/* Sleep for a small time to allow pcode to respond */
+	usleep_range(100, 300);
+
+	pc_set_manual_rp_ctrl(pc, false);
+}
+
+static int pc_set_min_freq(struct xe_guc_pc *pc, u32 freq)
+{
+	/*
+	 * Let's only check for the rpn-rp0 range. If max < min,
+	 * min becomes a fixed request.
+	 */
+	if (freq < pc->rpn_freq || freq > pc->rp0_freq)
+		return -EINVAL;
+
+	/*
+	 * GuC policy is to elevate minimum frequency to the efficient levels
+	 * Our goal is to have the admin choices respected.
+	 */
+	pc_action_set_param(pc, SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY,
+			    freq < pc->rpe_freq);
+
+	return pc_action_set_param(pc,
+				   SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
+				   freq);
+}
+
+static int pc_get_max_freq(struct xe_guc_pc *pc)
+{
+	u32 freq;
+
+	freq = FIELD_GET(SLPC_MAX_UNSLICE_FREQ_MASK,
+			 slpc_shared_data_read(pc, task_state_data.freq));
+
+	return decode_freq(freq);
+}
+
+static int pc_set_max_freq(struct xe_guc_pc *pc, u32 freq)
+{
+	/*
+	 * Let's only check for the rpn-rp0 range. If max < min,
+	 * min becomes a fixed request.
+	 * Also, overclocking is not supported.
+	 */
+	if (freq < pc->rpn_freq || freq > pc->rp0_freq)
+		return -EINVAL;
+
+	return pc_action_set_param(pc,
+				   SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ,
+				   freq);
+}
+
+static void mtl_update_rpe_value(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	u32 reg;
+
+	if (xe_gt_is_media_type(gt))
+		reg = xe_mmio_read32(gt, MTL_MPE_FREQUENCY);
+	else
+		reg = xe_mmio_read32(gt, MTL_GT_RPE_FREQUENCY);
+
+	pc->rpe_freq = decode_freq(REG_FIELD_GET(MTL_RPE_MASK, reg));
+}
+
+static void tgl_update_rpe_value(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 reg;
+
+	/*
+	 * For PVC we still need to use fused RP1 as the approximation for RPe
+	 * For other platforms than PVC we get the resolved RPe directly from
+	 * PCODE at a different register
+	 */
+	if (xe->info.platform == XE_PVC)
+		reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP);
+	else
+		reg = xe_mmio_read32(gt, FREQ_INFO_REC);
+
+	pc->rpe_freq = REG_FIELD_GET(RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
+}
+
+static void pc_update_rp_values(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	struct xe_device *xe = gt_to_xe(gt);
+
+	if (GRAPHICS_VERx100(xe) >= 1270)
+		mtl_update_rpe_value(pc);
+	else
+		tgl_update_rpe_value(pc);
+
+	/*
+	 * RPe is decided at runtime by PCODE. In the rare case where that's
+	 * smaller than the fused min, we will trust the PCODE and use that
+	 * as our minimum one.
+	 */
+	pc->rpn_freq = min(pc->rpn_freq, pc->rpe_freq);
+}
+
+/**
+ * xe_guc_pc_get_act_freq - Get Actual running frequency
+ * @pc: The GuC PC
+ *
+ * Returns: The Actual running frequency. Which might be 0 if GT is in Render-C sleep state (RC6).
+ */
+u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 freq;
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+
+	/* When in RC6, actual frequency reported will be 0. */
+	if (GRAPHICS_VERx100(xe) >= 1270) {
+		freq = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1);
+		freq = REG_FIELD_GET(MTL_CAGF_MASK, freq);
+	} else {
+		freq = xe_mmio_read32(gt, GT_PERF_STATUS);
+		freq = REG_FIELD_GET(CAGF_MASK, freq);
+	}
+
+	freq = decode_freq(freq);
+
+	xe_device_mem_access_put(gt_to_xe(gt));
+
+	return freq;
+}
+
+/**
+ * xe_guc_pc_get_cur_freq - Get Current requested frequency
+ * @pc: The GuC PC
+ * @freq: A pointer to a u32 where the freq value will be returned
+ *
+ * Returns: 0 on success,
+ *         -EAGAIN if GuC PC not ready (likely in middle of a reset).
+ */
+int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	int ret;
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	/*
+	 * GuC SLPC plays with cur freq request when GuCRC is enabled
+	 * Block RC6 for a more reliable read.
+	 */
+	ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (ret)
+		goto out;
+
+	*freq = xe_mmio_read32(gt, RPNSWREQ);
+
+	*freq = REG_FIELD_GET(REQ_RATIO_MASK, *freq);
+	*freq = decode_freq(*freq);
+
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+out:
+	xe_device_mem_access_put(gt_to_xe(gt));
+	return ret;
+}
+
+/**
+ * xe_guc_pc_get_rp0_freq - Get the RP0 freq
+ * @pc: The GuC PC
+ *
+ * Returns: RP0 freq.
+ */
+u32 xe_guc_pc_get_rp0_freq(struct xe_guc_pc *pc)
+{
+	return pc->rp0_freq;
+}
+
+/**
+ * xe_guc_pc_get_rpe_freq - Get the RPe freq
+ * @pc: The GuC PC
+ *
+ * Returns: RPe freq.
+ */
+u32 xe_guc_pc_get_rpe_freq(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	struct xe_device *xe = gt_to_xe(gt);
+
+	xe_device_mem_access_get(xe);
+	pc_update_rp_values(pc);
+	xe_device_mem_access_put(xe);
+
+	return pc->rpe_freq;
+}
+
+/**
+ * xe_guc_pc_get_rpn_freq - Get the RPn freq
+ * @pc: The GuC PC
+ *
+ * Returns: RPn freq.
+ */
+u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc)
+{
+	return pc->rpn_freq;
+}
+
+/**
+ * xe_guc_pc_get_min_freq - Get the min operational frequency
+ * @pc: The GuC PC
+ * @freq: A pointer to a u32 where the freq value will be returned
+ *
+ * Returns: 0 on success,
+ *         -EAGAIN if GuC PC not ready (likely in middle of a reset).
+ */
+int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	int ret;
+
+	xe_device_mem_access_get(pc_to_xe(pc));
+	mutex_lock(&pc->freq_lock);
+	if (!pc->freq_ready) {
+		/* Might be in the middle of a gt reset */
+		ret = -EAGAIN;
+		goto out;
+	}
+
+	/*
+	 * GuC SLPC plays with min freq request when GuCRC is enabled
+	 * Block RC6 for a more reliable read.
+	 */
+	ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (ret)
+		goto out;
+
+	ret = pc_action_query_task_state(pc);
+	if (ret)
+		goto fw;
+
+	*freq = pc_get_min_freq(pc);
+
+fw:
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+out:
+	mutex_unlock(&pc->freq_lock);
+	xe_device_mem_access_put(pc_to_xe(pc));
+	return ret;
+}
+
+/**
+ * xe_guc_pc_set_min_freq - Set the minimal operational frequency
+ * @pc: The GuC PC
+ * @freq: The selected minimal frequency
+ *
+ * Returns: 0 on success,
+ *         -EAGAIN if GuC PC not ready (likely in middle of a reset),
+ *         -EINVAL if value out of bounds.
+ */
+int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq)
+{
+	int ret;
+
+	xe_device_mem_access_get(pc_to_xe(pc));
+	mutex_lock(&pc->freq_lock);
+	if (!pc->freq_ready) {
+		/* Might be in the middle of a gt reset */
+		ret = -EAGAIN;
+		goto out;
+	}
+
+	ret = pc_set_min_freq(pc, freq);
+	if (ret)
+		goto out;
+
+	pc->user_requested_min = freq;
+
+out:
+	mutex_unlock(&pc->freq_lock);
+	xe_device_mem_access_put(pc_to_xe(pc));
+
+	return ret;
+}
+
+/**
+ * xe_guc_pc_get_max_freq - Get Maximum operational frequency
+ * @pc: The GuC PC
+ * @freq: A pointer to a u32 where the freq value will be returned
+ *
+ * Returns: 0 on success,
+ *         -EAGAIN if GuC PC not ready (likely in middle of a reset).
+ */
+int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq)
+{
+	int ret;
+
+	xe_device_mem_access_get(pc_to_xe(pc));
+	mutex_lock(&pc->freq_lock);
+	if (!pc->freq_ready) {
+		/* Might be in the middle of a gt reset */
+		ret = -EAGAIN;
+		goto out;
+	}
+
+	ret = pc_action_query_task_state(pc);
+	if (ret)
+		goto out;
+
+	*freq = pc_get_max_freq(pc);
+
+out:
+	mutex_unlock(&pc->freq_lock);
+	xe_device_mem_access_put(pc_to_xe(pc));
+	return ret;
+}
+
+/**
+ * xe_guc_pc_set_max_freq - Set the maximum operational frequency
+ * @pc: The GuC PC
+ * @freq: The selected maximum frequency value
+ *
+ * Returns: 0 on success,
+ *         -EAGAIN if GuC PC not ready (likely in middle of a reset),
+ *         -EINVAL if value out of bounds.
+ */
+int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq)
+{
+	int ret;
+
+	xe_device_mem_access_get(pc_to_xe(pc));
+	mutex_lock(&pc->freq_lock);
+	if (!pc->freq_ready) {
+		/* Might be in the middle of a gt reset */
+		ret = -EAGAIN;
+		goto out;
+	}
+
+	ret = pc_set_max_freq(pc, freq);
+	if (ret)
+		goto out;
+
+	pc->user_requested_max = freq;
+
+out:
+	mutex_unlock(&pc->freq_lock);
+	xe_device_mem_access_put(pc_to_xe(pc));
+	return ret;
+}
+
+/**
+ * xe_guc_pc_c_status - get the current GT C state
+ * @pc: XE_GuC_PC instance
+ */
+enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	u32 reg, gt_c_state;
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+
+	if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) {
+		reg = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1);
+		gt_c_state = REG_FIELD_GET(MTL_CC_MASK, reg);
+	} else {
+		reg = xe_mmio_read32(gt, GT_CORE_STATUS);
+		gt_c_state = REG_FIELD_GET(RCN_MASK, reg);
+	}
+
+	xe_device_mem_access_put(gt_to_xe(gt));
+
+	switch (gt_c_state) {
+	case GT_C6:
+		return GT_IDLE_C6;
+	case GT_C0:
+		return GT_IDLE_C0;
+	default:
+		return GT_IDLE_UNKNOWN;
+	}
+}
+
+/**
+ * xe_guc_pc_rc6_residency - rc6 residency counter
+ * @pc: Xe_GuC_PC instance
+ */
+u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	u32 reg;
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	reg = xe_mmio_read32(gt, GT_GFX_RC6);
+	xe_device_mem_access_put(gt_to_xe(gt));
+
+	return reg;
+}
+
+/**
+ * xe_guc_pc_mc6_residency - mc6 residency counter
+ * @pc: Xe_GuC_PC instance
+ */
+u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	u64 reg;
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	reg = xe_mmio_read32(gt, MTL_MEDIA_MC6);
+	xe_device_mem_access_put(gt_to_xe(gt));
+
+	return reg;
+}
+
+static void mtl_init_fused_rp_values(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	u32 reg;
+
+	xe_device_assert_mem_access(pc_to_xe(pc));
+
+	if (xe_gt_is_media_type(gt))
+		reg = xe_mmio_read32(gt, MTL_MEDIAP_STATE_CAP);
+	else
+		reg = xe_mmio_read32(gt, MTL_RP_STATE_CAP);
+
+	pc->rp0_freq = decode_freq(REG_FIELD_GET(MTL_RP0_CAP_MASK, reg));
+
+	pc->rpn_freq = decode_freq(REG_FIELD_GET(MTL_RPN_CAP_MASK, reg));
+}
+
+static void tgl_init_fused_rp_values(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 reg;
+
+	xe_device_assert_mem_access(pc_to_xe(pc));
+
+	if (xe->info.platform == XE_PVC)
+		reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP);
+	else
+		reg = xe_mmio_read32(gt, RP_STATE_CAP);
+	pc->rp0_freq = REG_FIELD_GET(RP0_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
+	pc->rpn_freq = REG_FIELD_GET(RPN_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
+}
+
+static void pc_init_fused_rp_values(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	struct xe_device *xe = gt_to_xe(gt);
+
+	if (GRAPHICS_VERx100(xe) >= 1270)
+		mtl_init_fused_rp_values(pc);
+	else
+		tgl_init_fused_rp_values(pc);
+}
+
+/**
+ * xe_guc_pc_init_early - Initialize RPx values and request a higher GT
+ * frequency to allow faster GuC load times
+ * @pc: Xe_GuC_PC instance
+ */
+void xe_guc_pc_init_early(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+	pc_init_fused_rp_values(pc);
+	pc_set_cur_freq(pc, pc->rp0_freq);
+}
+
+static int pc_adjust_freq_bounds(struct xe_guc_pc *pc)
+{
+	int ret;
+
+	lockdep_assert_held(&pc->freq_lock);
+
+	ret = pc_action_query_task_state(pc);
+	if (ret)
+		return ret;
+
+	/*
+	 * GuC defaults to some RPmax that is not actually achievable without
+	 * overclocking. Let's adjust it to the Hardware RP0, which is the
+	 * regular maximum
+	 */
+	if (pc_get_max_freq(pc) > pc->rp0_freq)
+		pc_set_max_freq(pc, pc->rp0_freq);
+
+	/*
+	 * Same thing happens for Server platforms where min is listed as
+	 * RPMax
+	 */
+	if (pc_get_min_freq(pc) > pc->rp0_freq)
+		pc_set_min_freq(pc, pc->rp0_freq);
+
+	return 0;
+}
+
+static int pc_adjust_requested_freq(struct xe_guc_pc *pc)
+{
+	int ret = 0;
+
+	lockdep_assert_held(&pc->freq_lock);
+
+	if (pc->user_requested_min != 0) {
+		ret = pc_set_min_freq(pc, pc->user_requested_min);
+		if (ret)
+			return ret;
+	}
+
+	if (pc->user_requested_max != 0) {
+		ret = pc_set_max_freq(pc, pc->user_requested_max);
+		if (ret)
+			return ret;
+	}
+
+	return ret;
+}
+
+/**
+ * xe_guc_pc_gucrc_disable - Disable GuC RC
+ * @pc: Xe_GuC_PC instance
+ *
+ * Disables GuC RC by taking control of RC6 back from GuC.
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc)
+{
+	struct xe_device *xe = pc_to_xe(pc);
+	struct xe_gt *gt = pc_to_gt(pc);
+	int ret = 0;
+
+	if (xe->info.skip_guc_pc)
+		return 0;
+
+	xe_device_mem_access_get(pc_to_xe(pc));
+
+	ret = pc_action_setup_gucrc(pc, XE_GUCRC_HOST_CONTROL);
+	if (ret)
+		goto out;
+
+	ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (ret)
+		goto out;
+
+	xe_gt_idle_disable_c6(gt);
+
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+
+out:
+	xe_device_mem_access_put(pc_to_xe(pc));
+	return ret;
+}
+
+static void pc_init_pcode_freq(struct xe_guc_pc *pc)
+{
+	u32 min = DIV_ROUND_CLOSEST(pc->rpn_freq, GT_FREQUENCY_MULTIPLIER);
+	u32 max = DIV_ROUND_CLOSEST(pc->rp0_freq, GT_FREQUENCY_MULTIPLIER);
+
+	XE_WARN_ON(xe_pcode_init_min_freq_table(pc_to_gt(pc), min, max));
+}
+
+static int pc_init_freqs(struct xe_guc_pc *pc)
+{
+	int ret;
+
+	mutex_lock(&pc->freq_lock);
+
+	ret = pc_adjust_freq_bounds(pc);
+	if (ret)
+		goto out;
+
+	ret = pc_adjust_requested_freq(pc);
+	if (ret)
+		goto out;
+
+	pc_update_rp_values(pc);
+
+	pc_init_pcode_freq(pc);
+
+	/*
+	 * The frequencies are really ready for use only after the user
+	 * requested ones got restored.
+	 */
+	pc->freq_ready = true;
+
+out:
+	mutex_unlock(&pc->freq_lock);
+	return ret;
+}
+
+/**
+ * xe_guc_pc_start - Start GuC's Power Conservation component
+ * @pc: Xe_GuC_PC instance
+ */
+int xe_guc_pc_start(struct xe_guc_pc *pc)
+{
+	struct xe_device *xe = pc_to_xe(pc);
+	struct xe_gt *gt = pc_to_gt(pc);
+	u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data));
+	int ret;
+
+	xe_gt_assert(gt, xe_device_uc_enabled(xe));
+
+	xe_device_mem_access_get(pc_to_xe(pc));
+
+	ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (ret)
+		goto out_fail_force_wake;
+
+	if (xe->info.skip_guc_pc) {
+		if (xe->info.platform != XE_PVC)
+			xe_gt_idle_enable_c6(gt);
+
+		/* Request max possible since dynamic freq mgmt is not enabled */
+		pc_set_cur_freq(pc, UINT_MAX);
+
+		ret = 0;
+		goto out;
+	}
+
+	memset(pc->bo->vmap.vaddr, 0, size);
+	slpc_shared_data_write(pc, header.size, size);
+
+	ret = pc_action_reset(pc);
+	if (ret)
+		goto out;
+
+	if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING)) {
+		drm_err(&pc_to_xe(pc)->drm, "GuC PC Start failed\n");
+		ret = -EIO;
+		goto out;
+	}
+
+	ret = pc_init_freqs(pc);
+	if (ret)
+		goto out;
+
+	if (xe->info.platform == XE_PVC) {
+		xe_guc_pc_gucrc_disable(pc);
+		ret = 0;
+		goto out;
+	}
+
+	ret = pc_action_setup_gucrc(pc, XE_GUCRC_FIRMWARE_CONTROL);
+
+out:
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+out_fail_force_wake:
+	xe_device_mem_access_put(pc_to_xe(pc));
+	return ret;
+}
+
+/**
+ * xe_guc_pc_stop - Stop GuC's Power Conservation component
+ * @pc: Xe_GuC_PC instance
+ */
+int xe_guc_pc_stop(struct xe_guc_pc *pc)
+{
+	struct xe_device *xe = pc_to_xe(pc);
+	int ret;
+
+	xe_device_mem_access_get(pc_to_xe(pc));
+
+	if (xe->info.skip_guc_pc) {
+		xe_gt_idle_disable_c6(pc_to_gt(pc));
+		ret = 0;
+		goto out;
+	}
+
+	mutex_lock(&pc->freq_lock);
+	pc->freq_ready = false;
+	mutex_unlock(&pc->freq_lock);
+
+	ret = pc_action_shutdown(pc);
+	if (ret)
+		goto out;
+
+	if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_NOT_RUNNING)) {
+		drm_err(&pc_to_xe(pc)->drm, "GuC PC Shutdown failed\n");
+		ret = -EIO;
+	}
+
+out:
+	xe_device_mem_access_put(pc_to_xe(pc));
+	return ret;
+}
+
+/**
+ * xe_guc_pc_fini - Finalize GuC's Power Conservation component
+ * @pc: Xe_GuC_PC instance
+ */
+void xe_guc_pc_fini(struct xe_guc_pc *pc)
+{
+	struct xe_device *xe = pc_to_xe(pc);
+
+	if (xe->info.skip_guc_pc) {
+		xe_device_mem_access_get(xe);
+		xe_gt_idle_disable_c6(pc_to_gt(pc));
+		xe_device_mem_access_put(xe);
+		return;
+	}
+
+	XE_WARN_ON(xe_guc_pc_gucrc_disable(pc));
+	XE_WARN_ON(xe_guc_pc_stop(pc));
+	mutex_destroy(&pc->freq_lock);
+}
+
+/**
+ * xe_guc_pc_init - Initialize GuC's Power Conservation component
+ * @pc: Xe_GuC_PC instance
+ */
+int xe_guc_pc_init(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_bo *bo;
+	u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data));
+
+	if (xe->info.skip_guc_pc)
+		return 0;
+
+	mutex_init(&pc->freq_lock);
+
+	bo = xe_managed_bo_create_pin_map(xe, tile, size,
+					  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+					  XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	pc->bo = bo;
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h
new file mode 100644
index 000000000000..cecad8e9300b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_pc.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_PC_H_
+#define _XE_GUC_PC_H_
+
+#include "xe_guc_pc_types.h"
+
+int xe_guc_pc_init(struct xe_guc_pc *pc);
+void xe_guc_pc_fini(struct xe_guc_pc *pc);
+int xe_guc_pc_start(struct xe_guc_pc *pc);
+int xe_guc_pc_stop(struct xe_guc_pc *pc);
+int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc);
+
+u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc);
+int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq);
+u32 xe_guc_pc_get_rp0_freq(struct xe_guc_pc *pc);
+u32 xe_guc_pc_get_rpe_freq(struct xe_guc_pc *pc);
+u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc);
+int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq);
+int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq);
+int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq);
+int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq);
+
+enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc);
+u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc);
+u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc);
+void xe_guc_pc_init_early(struct xe_guc_pc *pc);
+#endif /* _XE_GUC_PC_H_ */
diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h
new file mode 100644
index 000000000000..2afd0dbc3542
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_PC_TYPES_H_
+#define _XE_GUC_PC_TYPES_H_
+
+#include <linux/mutex.h>
+#include <linux/types.h>
+
+/**
+ * struct xe_guc_pc - GuC Power Conservation (PC)
+ */
+struct xe_guc_pc {
+	/** @bo: GGTT buffer object that is shared with GuC PC */
+	struct xe_bo *bo;
+	/** @rp0_freq: HW RP0 frequency - The Maximum one */
+	u32 rp0_freq;
+	/** @rpe_freq: HW RPe frequency - The Efficient one */
+	u32 rpe_freq;
+	/** @rpn_freq: HW RPN frequency - The Minimum one */
+	u32 rpn_freq;
+	/** @user_requested_min: Stash the minimum requested freq by user */
+	u32 user_requested_min;
+	/** @user_requested_max: Stash the maximum requested freq by user */
+	u32 user_requested_max;
+	/** @freq_lock: Let's protect the frequencies */
+	struct mutex freq_lock;
+	/** @freq_ready: Only handle freq changes, if they are really ready */
+	bool freq_ready;
+};
+
+#endif	/* _XE_GUC_PC_TYPES_H_ */
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
new file mode 100644
index 000000000000..f22ae717b0b2
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -0,0 +1,1987 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_guc_submit.h"
+
+#include <linux/bitfield.h>
+#include <linux/bitmap.h>
+#include <linux/circ_buf.h>
+#include <linux/delay.h>
+#include <linux/dma-fence-array.h>
+
+#include <drm/drm_managed.h>
+
+#include "abi/guc_actions_abi.h"
+#include "abi/guc_klvs_abi.h"
+#include "regs/xe_lrc_layout.h"
+#include "xe_assert.h"
+#include "xe_devcoredump.h"
+#include "xe_device.h"
+#include "xe_exec_queue.h"
+#include "xe_force_wake.h"
+#include "xe_gpu_scheduler.h"
+#include "xe_gt.h"
+#include "xe_guc.h"
+#include "xe_guc_ct.h"
+#include "xe_guc_exec_queue_types.h"
+#include "xe_guc_submit_types.h"
+#include "xe_hw_engine.h"
+#include "xe_hw_fence.h"
+#include "xe_lrc.h"
+#include "xe_macros.h"
+#include "xe_map.h"
+#include "xe_mocs.h"
+#include "xe_ring_ops_types.h"
+#include "xe_sched_job.h"
+#include "xe_trace.h"
+#include "xe_vm.h"
+
+static struct xe_guc *
+exec_queue_to_guc(struct xe_exec_queue *q)
+{
+	return &q->gt->uc.guc;
+}
+
+/*
+ * Helpers for engine state, using an atomic as some of the bits can transition
+ * as the same time (e.g. a suspend can be happning at the same time as schedule
+ * engine done being processed).
+ */
+#define EXEC_QUEUE_STATE_REGISTERED		(1 << 0)
+#define ENGINE_STATE_ENABLED		(1 << 1)
+#define EXEC_QUEUE_STATE_PENDING_ENABLE	(1 << 2)
+#define EXEC_QUEUE_STATE_PENDING_DISABLE	(1 << 3)
+#define EXEC_QUEUE_STATE_DESTROYED		(1 << 4)
+#define ENGINE_STATE_SUSPENDED		(1 << 5)
+#define EXEC_QUEUE_STATE_RESET		(1 << 6)
+#define ENGINE_STATE_KILLED		(1 << 7)
+
+static bool exec_queue_registered(struct xe_exec_queue *q)
+{
+	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_REGISTERED;
+}
+
+static void set_exec_queue_registered(struct xe_exec_queue *q)
+{
+	atomic_or(EXEC_QUEUE_STATE_REGISTERED, &q->guc->state);
+}
+
+static void clear_exec_queue_registered(struct xe_exec_queue *q)
+{
+	atomic_and(~EXEC_QUEUE_STATE_REGISTERED, &q->guc->state);
+}
+
+static bool exec_queue_enabled(struct xe_exec_queue *q)
+{
+	return atomic_read(&q->guc->state) & ENGINE_STATE_ENABLED;
+}
+
+static void set_exec_queue_enabled(struct xe_exec_queue *q)
+{
+	atomic_or(ENGINE_STATE_ENABLED, &q->guc->state);
+}
+
+static void clear_exec_queue_enabled(struct xe_exec_queue *q)
+{
+	atomic_and(~ENGINE_STATE_ENABLED, &q->guc->state);
+}
+
+static bool exec_queue_pending_enable(struct xe_exec_queue *q)
+{
+	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE;
+}
+
+static void set_exec_queue_pending_enable(struct xe_exec_queue *q)
+{
+	atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state);
+}
+
+static void clear_exec_queue_pending_enable(struct xe_exec_queue *q)
+{
+	atomic_and(~EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state);
+}
+
+static bool exec_queue_pending_disable(struct xe_exec_queue *q)
+{
+	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE;
+}
+
+static void set_exec_queue_pending_disable(struct xe_exec_queue *q)
+{
+	atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state);
+}
+
+static void clear_exec_queue_pending_disable(struct xe_exec_queue *q)
+{
+	atomic_and(~EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state);
+}
+
+static bool exec_queue_destroyed(struct xe_exec_queue *q)
+{
+	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_DESTROYED;
+}
+
+static void set_exec_queue_destroyed(struct xe_exec_queue *q)
+{
+	atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state);
+}
+
+static bool exec_queue_banned(struct xe_exec_queue *q)
+{
+	return (q->flags & EXEC_QUEUE_FLAG_BANNED);
+}
+
+static void set_exec_queue_banned(struct xe_exec_queue *q)
+{
+	q->flags |= EXEC_QUEUE_FLAG_BANNED;
+}
+
+static bool exec_queue_suspended(struct xe_exec_queue *q)
+{
+	return atomic_read(&q->guc->state) & ENGINE_STATE_SUSPENDED;
+}
+
+static void set_exec_queue_suspended(struct xe_exec_queue *q)
+{
+	atomic_or(ENGINE_STATE_SUSPENDED, &q->guc->state);
+}
+
+static void clear_exec_queue_suspended(struct xe_exec_queue *q)
+{
+	atomic_and(~ENGINE_STATE_SUSPENDED, &q->guc->state);
+}
+
+static bool exec_queue_reset(struct xe_exec_queue *q)
+{
+	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_RESET;
+}
+
+static void set_exec_queue_reset(struct xe_exec_queue *q)
+{
+	atomic_or(EXEC_QUEUE_STATE_RESET, &q->guc->state);
+}
+
+static bool exec_queue_killed(struct xe_exec_queue *q)
+{
+	return atomic_read(&q->guc->state) & ENGINE_STATE_KILLED;
+}
+
+static void set_exec_queue_killed(struct xe_exec_queue *q)
+{
+	atomic_or(ENGINE_STATE_KILLED, &q->guc->state);
+}
+
+static bool exec_queue_killed_or_banned(struct xe_exec_queue *q)
+{
+	return exec_queue_killed(q) || exec_queue_banned(q);
+}
+
+#ifdef CONFIG_PROVE_LOCKING
+static int alloc_submit_wq(struct xe_guc *guc)
+{
+	int i;
+
+	for (i = 0; i < NUM_SUBMIT_WQ; ++i) {
+		guc->submission_state.submit_wq_pool[i] =
+			alloc_ordered_workqueue("submit_wq", 0);
+		if (!guc->submission_state.submit_wq_pool[i])
+			goto err_free;
+	}
+
+	return 0;
+
+err_free:
+	while (i)
+		destroy_workqueue(guc->submission_state.submit_wq_pool[--i]);
+
+	return -ENOMEM;
+}
+
+static void free_submit_wq(struct xe_guc *guc)
+{
+	int i;
+
+	for (i = 0; i < NUM_SUBMIT_WQ; ++i)
+		destroy_workqueue(guc->submission_state.submit_wq_pool[i]);
+}
+
+static struct workqueue_struct *get_submit_wq(struct xe_guc *guc)
+{
+	int idx = guc->submission_state.submit_wq_idx++ % NUM_SUBMIT_WQ;
+
+	return guc->submission_state.submit_wq_pool[idx];
+}
+#else
+static int alloc_submit_wq(struct xe_guc *guc)
+{
+	return 0;
+}
+
+static void free_submit_wq(struct xe_guc *guc)
+{
+
+}
+
+static struct workqueue_struct *get_submit_wq(struct xe_guc *guc)
+{
+	return NULL;
+}
+#endif
+
+static void guc_submit_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_guc *guc = arg;
+
+	xa_destroy(&guc->submission_state.exec_queue_lookup);
+	ida_destroy(&guc->submission_state.guc_ids);
+	bitmap_free(guc->submission_state.guc_ids_bitmap);
+	free_submit_wq(guc);
+	mutex_destroy(&guc->submission_state.lock);
+}
+
+#define GUC_ID_MAX		65535
+#define GUC_ID_NUMBER_MLRC	4096
+#define GUC_ID_NUMBER_SLRC	(GUC_ID_MAX - GUC_ID_NUMBER_MLRC)
+#define GUC_ID_START_MLRC	GUC_ID_NUMBER_SLRC
+
+static const struct xe_exec_queue_ops guc_exec_queue_ops;
+
+static void primelockdep(struct xe_guc *guc)
+{
+	if (!IS_ENABLED(CONFIG_LOCKDEP))
+		return;
+
+	fs_reclaim_acquire(GFP_KERNEL);
+
+	mutex_lock(&guc->submission_state.lock);
+	might_lock(&guc->submission_state.suspend.lock);
+	mutex_unlock(&guc->submission_state.lock);
+
+	fs_reclaim_release(GFP_KERNEL);
+}
+
+int xe_guc_submit_init(struct xe_guc *guc)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_gt *gt = guc_to_gt(guc);
+	int err;
+
+	guc->submission_state.guc_ids_bitmap =
+		bitmap_zalloc(GUC_ID_NUMBER_MLRC, GFP_KERNEL);
+	if (!guc->submission_state.guc_ids_bitmap)
+		return -ENOMEM;
+
+	err = alloc_submit_wq(guc);
+	if (err) {
+		bitmap_free(guc->submission_state.guc_ids_bitmap);
+		return err;
+	}
+
+	gt->exec_queue_ops = &guc_exec_queue_ops;
+
+	mutex_init(&guc->submission_state.lock);
+	xa_init(&guc->submission_state.exec_queue_lookup);
+	ida_init(&guc->submission_state.guc_ids);
+
+	spin_lock_init(&guc->submission_state.suspend.lock);
+	guc->submission_state.suspend.context = dma_fence_context_alloc(1);
+
+	primelockdep(guc);
+
+	err = drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count)
+{
+	int i;
+
+	lockdep_assert_held(&guc->submission_state.lock);
+
+	for (i = 0; i < xa_count; ++i)
+		xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i);
+
+	if (xe_exec_queue_is_parallel(q))
+		bitmap_release_region(guc->submission_state.guc_ids_bitmap,
+				      q->guc->id - GUC_ID_START_MLRC,
+				      order_base_2(q->width));
+	else
+		ida_simple_remove(&guc->submission_state.guc_ids, q->guc->id);
+}
+
+static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
+{
+	int ret;
+	void *ptr;
+	int i;
+
+	/*
+	 * Must use GFP_NOWAIT as this lock is in the dma fence signalling path,
+	 * worse case user gets -ENOMEM on engine create and has to try again.
+	 *
+	 * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent
+	 * failure.
+	 */
+	lockdep_assert_held(&guc->submission_state.lock);
+
+	if (xe_exec_queue_is_parallel(q)) {
+		void *bitmap = guc->submission_state.guc_ids_bitmap;
+
+		ret = bitmap_find_free_region(bitmap, GUC_ID_NUMBER_MLRC,
+					      order_base_2(q->width));
+	} else {
+		ret = ida_simple_get(&guc->submission_state.guc_ids, 0,
+				     GUC_ID_NUMBER_SLRC, GFP_NOWAIT);
+	}
+	if (ret < 0)
+		return ret;
+
+	q->guc->id = ret;
+	if (xe_exec_queue_is_parallel(q))
+		q->guc->id += GUC_ID_START_MLRC;
+
+	for (i = 0; i < q->width; ++i) {
+		ptr = xa_store(&guc->submission_state.exec_queue_lookup,
+			       q->guc->id + i, q, GFP_NOWAIT);
+		if (IS_ERR(ptr)) {
+			ret = PTR_ERR(ptr);
+			goto err_release;
+		}
+	}
+
+	return 0;
+
+err_release:
+	__release_guc_id(guc, q, i);
+
+	return ret;
+}
+
+static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
+{
+	mutex_lock(&guc->submission_state.lock);
+	__release_guc_id(guc, q, q->width);
+	mutex_unlock(&guc->submission_state.lock);
+}
+
+struct exec_queue_policy {
+	u32 count;
+	struct guc_update_exec_queue_policy h2g;
+};
+
+static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy)
+{
+	size_t bytes = sizeof(policy->h2g.header) +
+		       (sizeof(policy->h2g.klv[0]) * policy->count);
+
+	return bytes / sizeof(u32);
+}
+
+static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy,
+					      u16 guc_id)
+{
+	policy->h2g.header.action =
+		XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES;
+	policy->h2g.header.guc_id = guc_id;
+	policy->count = 0;
+}
+
+#define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \
+static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \
+					   u32 data) \
+{ \
+	XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \
+\
+	policy->h2g.klv[policy->count].kl = \
+		FIELD_PREP(GUC_KLV_0_KEY, \
+			   GUC_CONTEXT_POLICIES_KLV_ID_##id) | \
+		FIELD_PREP(GUC_KLV_0_LEN, 1); \
+	policy->h2g.klv[policy->count].value = data; \
+	policy->count++; \
+}
+
+MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
+MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
+MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY)
+#undef MAKE_EXEC_QUEUE_POLICY_ADD
+
+static const int xe_exec_queue_prio_to_guc[] = {
+	[XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL,
+	[XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL,
+	[XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH,
+	[XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH,
+};
+
+static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q)
+{
+	struct exec_queue_policy policy;
+	struct xe_device *xe = guc_to_xe(guc);
+	enum xe_exec_queue_priority prio = q->sched_props.priority;
+	u32 timeslice_us = q->sched_props.timeslice_us;
+	u32 preempt_timeout_us = q->sched_props.preempt_timeout_us;
+
+	xe_assert(xe, exec_queue_registered(q));
+
+	__guc_exec_queue_policy_start_klv(&policy, q->guc->id);
+	__guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]);
+	__guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us);
+	__guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us);
+
+	xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
+		       __guc_exec_queue_policy_action_size(&policy), 0, 0);
+}
+
+static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue *q)
+{
+	struct exec_queue_policy policy;
+
+	__guc_exec_queue_policy_start_klv(&policy, q->guc->id);
+	__guc_exec_queue_policy_add_preemption_timeout(&policy, 1);
+
+	xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
+		       __guc_exec_queue_policy_action_size(&policy), 0, 0);
+}
+
+#define parallel_read(xe_, map_, field_) \
+	xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
+			field_)
+#define parallel_write(xe_, map_, field_, val_) \
+	xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
+			field_, val_)
+
+static void __register_mlrc_engine(struct xe_guc *guc,
+				   struct xe_exec_queue *q,
+				   struct guc_ctxt_registration_info *info)
+{
+#define MAX_MLRC_REG_SIZE      (13 + XE_HW_ENGINE_MAX_INSTANCE * 2)
+	struct xe_device *xe = guc_to_xe(guc);
+	u32 action[MAX_MLRC_REG_SIZE];
+	int len = 0;
+	int i;
+
+	xe_assert(xe, xe_exec_queue_is_parallel(q));
+
+	action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
+	action[len++] = info->flags;
+	action[len++] = info->context_idx;
+	action[len++] = info->engine_class;
+	action[len++] = info->engine_submit_mask;
+	action[len++] = info->wq_desc_lo;
+	action[len++] = info->wq_desc_hi;
+	action[len++] = info->wq_base_lo;
+	action[len++] = info->wq_base_hi;
+	action[len++] = info->wq_size;
+	action[len++] = q->width;
+	action[len++] = info->hwlrca_lo;
+	action[len++] = info->hwlrca_hi;
+
+	for (i = 1; i < q->width; ++i) {
+		struct xe_lrc *lrc = q->lrc + i;
+
+		action[len++] = lower_32_bits(xe_lrc_descriptor(lrc));
+		action[len++] = upper_32_bits(xe_lrc_descriptor(lrc));
+	}
+
+	xe_assert(xe, len <= MAX_MLRC_REG_SIZE);
+#undef MAX_MLRC_REG_SIZE
+
+	xe_guc_ct_send(&guc->ct, action, len, 0, 0);
+}
+
+static void __register_engine(struct xe_guc *guc,
+			      struct guc_ctxt_registration_info *info)
+{
+	u32 action[] = {
+		XE_GUC_ACTION_REGISTER_CONTEXT,
+		info->flags,
+		info->context_idx,
+		info->engine_class,
+		info->engine_submit_mask,
+		info->wq_desc_lo,
+		info->wq_desc_hi,
+		info->wq_base_lo,
+		info->wq_base_hi,
+		info->wq_size,
+		info->hwlrca_lo,
+		info->hwlrca_hi,
+	};
+
+	xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
+}
+
+static void register_engine(struct xe_exec_queue *q)
+{
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_lrc *lrc = q->lrc;
+	struct guc_ctxt_registration_info info;
+
+	xe_assert(xe, !exec_queue_registered(q));
+
+	memset(&info, 0, sizeof(info));
+	info.context_idx = q->guc->id;
+	info.engine_class = xe_engine_class_to_guc_class(q->class);
+	info.engine_submit_mask = q->logical_mask;
+	info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc));
+	info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc));
+	info.flags = CONTEXT_REGISTRATION_FLAG_KMD;
+
+	if (xe_exec_queue_is_parallel(q)) {
+		u32 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc);
+		struct iosys_map map = xe_lrc_parallel_map(lrc);
+
+		info.wq_desc_lo = lower_32_bits(ggtt_addr +
+			offsetof(struct guc_submit_parallel_scratch, wq_desc));
+		info.wq_desc_hi = upper_32_bits(ggtt_addr +
+			offsetof(struct guc_submit_parallel_scratch, wq_desc));
+		info.wq_base_lo = lower_32_bits(ggtt_addr +
+			offsetof(struct guc_submit_parallel_scratch, wq[0]));
+		info.wq_base_hi = upper_32_bits(ggtt_addr +
+			offsetof(struct guc_submit_parallel_scratch, wq[0]));
+		info.wq_size = WQ_SIZE;
+
+		q->guc->wqi_head = 0;
+		q->guc->wqi_tail = 0;
+		xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE);
+		parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE);
+	}
+
+	/*
+	 * We must keep a reference for LR engines if engine is registered with
+	 * the GuC as jobs signal immediately and can't destroy an engine if the
+	 * GuC has a reference to it.
+	 */
+	if (xe_exec_queue_is_lr(q))
+		xe_exec_queue_get(q);
+
+	set_exec_queue_registered(q);
+	trace_xe_exec_queue_register(q);
+	if (xe_exec_queue_is_parallel(q))
+		__register_mlrc_engine(guc, q, &info);
+	else
+		__register_engine(guc, &info);
+	init_policies(guc, q);
+}
+
+static u32 wq_space_until_wrap(struct xe_exec_queue *q)
+{
+	return (WQ_SIZE - q->guc->wqi_tail);
+}
+
+static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size)
+{
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct iosys_map map = xe_lrc_parallel_map(q->lrc);
+	unsigned int sleep_period_ms = 1;
+
+#define AVAILABLE_SPACE \
+	CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE)
+	if (wqi_size > AVAILABLE_SPACE) {
+try_again:
+		q->guc->wqi_head = parallel_read(xe, map, wq_desc.head);
+		if (wqi_size > AVAILABLE_SPACE) {
+			if (sleep_period_ms == 1024) {
+				xe_gt_reset_async(q->gt);
+				return -ENODEV;
+			}
+
+			msleep(sleep_period_ms);
+			sleep_period_ms <<= 1;
+			goto try_again;
+		}
+	}
+#undef AVAILABLE_SPACE
+
+	return 0;
+}
+
+static int wq_noop_append(struct xe_exec_queue *q)
+{
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct iosys_map map = xe_lrc_parallel_map(q->lrc);
+	u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1;
+
+	if (wq_wait_for_space(q, wq_space_until_wrap(q)))
+		return -ENODEV;
+
+	xe_assert(xe, FIELD_FIT(WQ_LEN_MASK, len_dw));
+
+	parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)],
+		       FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
+		       FIELD_PREP(WQ_LEN_MASK, len_dw));
+	q->guc->wqi_tail = 0;
+
+	return 0;
+}
+
+static void wq_item_append(struct xe_exec_queue *q)
+{
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct iosys_map map = xe_lrc_parallel_map(q->lrc);
+#define WQ_HEADER_SIZE	4	/* Includes 1 LRC address too */
+	u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)];
+	u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32);
+	u32 len_dw = (wqi_size / sizeof(u32)) - 1;
+	int i = 0, j;
+
+	if (wqi_size > wq_space_until_wrap(q)) {
+		if (wq_noop_append(q))
+			return;
+	}
+	if (wq_wait_for_space(q, wqi_size))
+		return;
+
+	wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) |
+		FIELD_PREP(WQ_LEN_MASK, len_dw);
+	wqi[i++] = xe_lrc_descriptor(q->lrc);
+	wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) |
+		FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc->ring.tail / sizeof(u64));
+	wqi[i++] = 0;
+	for (j = 1; j < q->width; ++j) {
+		struct xe_lrc *lrc = q->lrc + j;
+
+		wqi[i++] = lrc->ring.tail / sizeof(u64);
+	}
+
+	xe_assert(xe, i == wqi_size / sizeof(u32));
+
+	iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch,
+				      wq[q->guc->wqi_tail / sizeof(u32)]));
+	xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size);
+	q->guc->wqi_tail += wqi_size;
+	xe_assert(xe, q->guc->wqi_tail <= WQ_SIZE);
+
+	xe_device_wmb(xe);
+
+	map = xe_lrc_parallel_map(q->lrc);
+	parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail);
+}
+
+#define RESUME_PENDING	~0x0ull
+static void submit_exec_queue(struct xe_exec_queue *q)
+{
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_lrc *lrc = q->lrc;
+	u32 action[3];
+	u32 g2h_len = 0;
+	u32 num_g2h = 0;
+	int len = 0;
+	bool extra_submit = false;
+
+	xe_assert(xe, exec_queue_registered(q));
+
+	if (xe_exec_queue_is_parallel(q))
+		wq_item_append(q);
+	else
+		xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
+
+	if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q))
+		return;
+
+	if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) {
+		action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
+		action[len++] = q->guc->id;
+		action[len++] = GUC_CONTEXT_ENABLE;
+		g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET;
+		num_g2h = 1;
+		if (xe_exec_queue_is_parallel(q))
+			extra_submit = true;
+
+		q->guc->resume_time = RESUME_PENDING;
+		set_exec_queue_pending_enable(q);
+		set_exec_queue_enabled(q);
+		trace_xe_exec_queue_scheduling_enable(q);
+	} else {
+		action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
+		action[len++] = q->guc->id;
+		trace_xe_exec_queue_submit(q);
+	}
+
+	xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h);
+
+	if (extra_submit) {
+		len = 0;
+		action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
+		action[len++] = q->guc->id;
+		trace_xe_exec_queue_submit(q);
+
+		xe_guc_ct_send(&guc->ct, action, len, 0, 0);
+	}
+}
+
+static struct dma_fence *
+guc_exec_queue_run_job(struct drm_sched_job *drm_job)
+{
+	struct xe_sched_job *job = to_xe_sched_job(drm_job);
+	struct xe_exec_queue *q = job->q;
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
+	bool lr = xe_exec_queue_is_lr(q);
+
+	xe_assert(xe, !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) ||
+		  exec_queue_banned(q) || exec_queue_suspended(q));
+
+	trace_xe_sched_job_run(job);
+
+	if (!exec_queue_killed_or_banned(q) && !xe_sched_job_is_error(job)) {
+		if (!exec_queue_registered(q))
+			register_engine(q);
+		if (!lr)	/* LR jobs are emitted in the exec IOCTL */
+			q->ring_ops->emit_job(job);
+		submit_exec_queue(q);
+	}
+
+	if (lr) {
+		xe_sched_job_set_error(job, -EOPNOTSUPP);
+		return NULL;
+	} else if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags)) {
+		return job->fence;
+	} else {
+		return dma_fence_get(job->fence);
+	}
+}
+
+static void guc_exec_queue_free_job(struct drm_sched_job *drm_job)
+{
+	struct xe_sched_job *job = to_xe_sched_job(drm_job);
+
+	trace_xe_sched_job_free(job);
+	xe_sched_job_put(job);
+}
+
+static int guc_read_stopped(struct xe_guc *guc)
+{
+	return atomic_read(&guc->submission_state.stopped);
+}
+
+#define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable)			\
+	u32 action[] = {						\
+		XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET,			\
+		q->guc->id,						\
+		GUC_CONTEXT_##enable_disable,				\
+	}
+
+static void disable_scheduling_deregister(struct xe_guc *guc,
+					  struct xe_exec_queue *q)
+{
+	MAKE_SCHED_CONTEXT_ACTION(q, DISABLE);
+	struct xe_device *xe = guc_to_xe(guc);
+	int ret;
+
+	set_min_preemption_timeout(guc, q);
+	smp_rmb();
+	ret = wait_event_timeout(guc->ct.wq, !exec_queue_pending_enable(q) ||
+				 guc_read_stopped(guc), HZ * 5);
+	if (!ret) {
+		struct xe_gpu_scheduler *sched = &q->guc->sched;
+
+		drm_warn(&xe->drm, "Pending enable failed to respond");
+		xe_sched_submission_start(sched);
+		xe_gt_reset_async(q->gt);
+		xe_sched_tdr_queue_imm(sched);
+		return;
+	}
+
+	clear_exec_queue_enabled(q);
+	set_exec_queue_pending_disable(q);
+	set_exec_queue_destroyed(q);
+	trace_xe_exec_queue_scheduling_disable(q);
+
+	/*
+	 * Reserve space for both G2H here as the 2nd G2H is sent from a G2H
+	 * handler and we are not allowed to reserved G2H space in handlers.
+	 */
+	xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
+		       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET +
+		       G2H_LEN_DW_DEREGISTER_CONTEXT, 2);
+}
+
+static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p);
+
+#if IS_ENABLED(CONFIG_DRM_XE_SIMPLE_ERROR_CAPTURE)
+static void simple_error_capture(struct xe_exec_queue *q)
+{
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct drm_printer p = drm_err_printer("");
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	u32 adj_logical_mask = q->logical_mask;
+	u32 width_mask = (0x1 << q->width) - 1;
+	int i;
+	bool cookie;
+
+	if (q->vm && !q->vm->error_capture.capture_once) {
+		q->vm->error_capture.capture_once = true;
+		cookie = dma_fence_begin_signalling();
+		for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) {
+			if (adj_logical_mask & BIT(i)) {
+				adj_logical_mask |= width_mask << i;
+				i += q->width;
+			} else {
+				++i;
+			}
+		}
+
+		xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
+		xe_guc_ct_print(&guc->ct, &p, true);
+		guc_exec_queue_print(q, &p);
+		for_each_hw_engine(hwe, guc_to_gt(guc), id) {
+			if (hwe->class != q->hwe->class ||
+			    !(BIT(hwe->logical_instance) & adj_logical_mask))
+				continue;
+			xe_hw_engine_print(hwe, &p);
+		}
+		xe_analyze_vm(&p, q->vm, q->gt->info.id);
+		xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
+		dma_fence_end_signalling(cookie);
+	}
+}
+#else
+static void simple_error_capture(struct xe_exec_queue *q)
+{
+}
+#endif
+
+static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q)
+{
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
+
+	/** to wakeup xe_wait_user_fence ioctl if exec queue is reset */
+	wake_up_all(&xe->ufence_wq);
+
+	if (xe_exec_queue_is_lr(q))
+		queue_work(guc_to_gt(guc)->ordered_wq, &q->guc->lr_tdr);
+	else
+		xe_sched_tdr_queue_imm(&q->guc->sched);
+}
+
+static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
+{
+	struct xe_guc_exec_queue *ge =
+		container_of(w, struct xe_guc_exec_queue, lr_tdr);
+	struct xe_exec_queue *q = ge->q;
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_gpu_scheduler *sched = &ge->sched;
+
+	xe_assert(xe, xe_exec_queue_is_lr(q));
+	trace_xe_exec_queue_lr_cleanup(q);
+
+	/* Kill the run_job / process_msg entry points */
+	xe_sched_submission_stop(sched);
+
+	/*
+	 * Engine state now mostly stable, disable scheduling / deregister if
+	 * needed. This cleanup routine might be called multiple times, where
+	 * the actual async engine deregister drops the final engine ref.
+	 * Calling disable_scheduling_deregister will mark the engine as
+	 * destroyed and fire off the CT requests to disable scheduling /
+	 * deregister, which we only want to do once. We also don't want to mark
+	 * the engine as pending_disable again as this may race with the
+	 * xe_guc_deregister_done_handler() which treats it as an unexpected
+	 * state.
+	 */
+	if (exec_queue_registered(q) && !exec_queue_destroyed(q)) {
+		struct xe_guc *guc = exec_queue_to_guc(q);
+		int ret;
+
+		set_exec_queue_banned(q);
+		disable_scheduling_deregister(guc, q);
+
+		/*
+		 * Must wait for scheduling to be disabled before signalling
+		 * any fences, if GT broken the GT reset code should signal us.
+		 */
+		ret = wait_event_timeout(guc->ct.wq,
+					 !exec_queue_pending_disable(q) ||
+					 guc_read_stopped(guc), HZ * 5);
+		if (!ret) {
+			drm_warn(&xe->drm, "Schedule disable failed to respond");
+			xe_sched_submission_start(sched);
+			xe_gt_reset_async(q->gt);
+			return;
+		}
+	}
+
+	xe_sched_submission_start(sched);
+}
+
+static enum drm_gpu_sched_stat
+guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
+{
+	struct xe_sched_job *job = to_xe_sched_job(drm_job);
+	struct xe_sched_job *tmp_job;
+	struct xe_exec_queue *q = job->q;
+	struct xe_gpu_scheduler *sched = &q->guc->sched;
+	struct xe_device *xe = guc_to_xe(exec_queue_to_guc(q));
+	int err = -ETIME;
+	int i = 0;
+
+	if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) {
+		xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_KERNEL));
+		xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)));
+
+		drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx",
+			   xe_sched_job_seqno(job), q->guc->id, q->flags);
+		simple_error_capture(q);
+		xe_devcoredump(q);
+	} else {
+		drm_dbg(&xe->drm, "Timedout signaled job: seqno=%u, guc_id=%d, flags=0x%lx",
+			 xe_sched_job_seqno(job), q->guc->id, q->flags);
+	}
+	trace_xe_sched_job_timedout(job);
+
+	/* Kill the run_job entry point */
+	xe_sched_submission_stop(sched);
+
+	/*
+	 * Kernel jobs should never fail, nor should VM jobs if they do
+	 * somethings has gone wrong and the GT needs a reset
+	 */
+	if (q->flags & EXEC_QUEUE_FLAG_KERNEL ||
+	    (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q))) {
+		if (!xe_sched_invalidate_job(job, 2)) {
+			xe_sched_add_pending_job(sched, job);
+			xe_sched_submission_start(sched);
+			xe_gt_reset_async(q->gt);
+			goto out;
+		}
+	}
+
+	/* Engine state now stable, disable scheduling if needed */
+	if (exec_queue_registered(q)) {
+		struct xe_guc *guc = exec_queue_to_guc(q);
+		int ret;
+
+		if (exec_queue_reset(q))
+			err = -EIO;
+		set_exec_queue_banned(q);
+		if (!exec_queue_destroyed(q)) {
+			xe_exec_queue_get(q);
+			disable_scheduling_deregister(guc, q);
+		}
+
+		/*
+		 * Must wait for scheduling to be disabled before signalling
+		 * any fences, if GT broken the GT reset code should signal us.
+		 *
+		 * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault
+		 * error) messages which can cause the schedule disable to get
+		 * lost. If this occurs, trigger a GT reset to recover.
+		 */
+		smp_rmb();
+		ret = wait_event_timeout(guc->ct.wq,
+					 !exec_queue_pending_disable(q) ||
+					 guc_read_stopped(guc), HZ * 5);
+		if (!ret || guc_read_stopped(guc)) {
+			drm_warn(&xe->drm, "Schedule disable failed to respond");
+			xe_sched_add_pending_job(sched, job);
+			xe_sched_submission_start(sched);
+			xe_gt_reset_async(q->gt);
+			xe_sched_tdr_queue_imm(sched);
+			goto out;
+		}
+	}
+
+	/* Stop fence signaling */
+	xe_hw_fence_irq_stop(q->fence_irq);
+
+	/*
+	 * Fence state now stable, stop / start scheduler which cleans up any
+	 * fences that are complete
+	 */
+	xe_sched_add_pending_job(sched, job);
+	xe_sched_submission_start(sched);
+	xe_guc_exec_queue_trigger_cleanup(q);
+
+	/* Mark all outstanding jobs as bad, thus completing them */
+	spin_lock(&sched->base.job_list_lock);
+	list_for_each_entry(tmp_job, &sched->base.pending_list, drm.list)
+		xe_sched_job_set_error(tmp_job, !i++ ? err : -ECANCELED);
+	spin_unlock(&sched->base.job_list_lock);
+
+	/* Start fence signaling */
+	xe_hw_fence_irq_start(q->fence_irq);
+
+out:
+	return DRM_GPU_SCHED_STAT_NOMINAL;
+}
+
+static void __guc_exec_queue_fini_async(struct work_struct *w)
+{
+	struct xe_guc_exec_queue *ge =
+		container_of(w, struct xe_guc_exec_queue, fini_async);
+	struct xe_exec_queue *q = ge->q;
+	struct xe_guc *guc = exec_queue_to_guc(q);
+
+	trace_xe_exec_queue_destroy(q);
+
+	if (xe_exec_queue_is_lr(q))
+		cancel_work_sync(&ge->lr_tdr);
+	release_guc_id(guc, q);
+	xe_sched_entity_fini(&ge->entity);
+	xe_sched_fini(&ge->sched);
+
+	kfree(ge);
+	xe_exec_queue_fini(q);
+}
+
+static void guc_exec_queue_fini_async(struct xe_exec_queue *q)
+{
+	INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async);
+
+	/* We must block on kernel engines so slabs are empty on driver unload */
+	if (q->flags & EXEC_QUEUE_FLAG_PERMANENT)
+		__guc_exec_queue_fini_async(&q->guc->fini_async);
+	else
+		queue_work(system_wq, &q->guc->fini_async);
+}
+
+static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q)
+{
+	/*
+	 * Might be done from within the GPU scheduler, need to do async as we
+	 * fini the scheduler when the engine is fini'd, the scheduler can't
+	 * complete fini within itself (circular dependency). Async resolves
+	 * this we and don't really care when everything is fini'd, just that it
+	 * is.
+	 */
+	guc_exec_queue_fini_async(q);
+}
+
+static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg)
+{
+	struct xe_exec_queue *q = msg->private_data;
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
+
+	xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_PERMANENT));
+	trace_xe_exec_queue_cleanup_entity(q);
+
+	if (exec_queue_registered(q))
+		disable_scheduling_deregister(guc, q);
+	else
+		__guc_exec_queue_fini(guc, q);
+}
+
+static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q)
+{
+	return !exec_queue_killed_or_banned(q) && exec_queue_registered(q);
+}
+
+static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg)
+{
+	struct xe_exec_queue *q = msg->private_data;
+	struct xe_guc *guc = exec_queue_to_guc(q);
+
+	if (guc_exec_queue_allowed_to_change_state(q))
+		init_policies(guc, q);
+	kfree(msg);
+}
+
+static void suspend_fence_signal(struct xe_exec_queue *q)
+{
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
+
+	xe_assert(xe, exec_queue_suspended(q) || exec_queue_killed(q) ||
+		  guc_read_stopped(guc));
+	xe_assert(xe, q->guc->suspend_pending);
+
+	q->guc->suspend_pending = false;
+	smp_wmb();
+	wake_up(&q->guc->suspend_wait);
+}
+
+static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg)
+{
+	struct xe_exec_queue *q = msg->private_data;
+	struct xe_guc *guc = exec_queue_to_guc(q);
+
+	if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) &&
+	    exec_queue_enabled(q)) {
+		wait_event(guc->ct.wq, q->guc->resume_time != RESUME_PENDING ||
+			   guc_read_stopped(guc));
+
+		if (!guc_read_stopped(guc)) {
+			MAKE_SCHED_CONTEXT_ACTION(q, DISABLE);
+			s64 since_resume_ms =
+				ktime_ms_delta(ktime_get(),
+					       q->guc->resume_time);
+			s64 wait_ms = q->vm->preempt.min_run_period_ms -
+				since_resume_ms;
+
+			if (wait_ms > 0 && q->guc->resume_time)
+				msleep(wait_ms);
+
+			set_exec_queue_suspended(q);
+			clear_exec_queue_enabled(q);
+			set_exec_queue_pending_disable(q);
+			trace_xe_exec_queue_scheduling_disable(q);
+
+			xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
+				       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
+		}
+	} else if (q->guc->suspend_pending) {
+		set_exec_queue_suspended(q);
+		suspend_fence_signal(q);
+	}
+}
+
+static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg)
+{
+	struct xe_exec_queue *q = msg->private_data;
+	struct xe_guc *guc = exec_queue_to_guc(q);
+
+	if (guc_exec_queue_allowed_to_change_state(q)) {
+		MAKE_SCHED_CONTEXT_ACTION(q, ENABLE);
+
+		q->guc->resume_time = RESUME_PENDING;
+		clear_exec_queue_suspended(q);
+		set_exec_queue_pending_enable(q);
+		set_exec_queue_enabled(q);
+		trace_xe_exec_queue_scheduling_enable(q);
+
+		xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
+			       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
+	} else {
+		clear_exec_queue_suspended(q);
+	}
+}
+
+#define CLEANUP		1	/* Non-zero values to catch uninitialized msg */
+#define SET_SCHED_PROPS	2
+#define SUSPEND		3
+#define RESUME		4
+
+static void guc_exec_queue_process_msg(struct xe_sched_msg *msg)
+{
+	trace_xe_sched_msg_recv(msg);
+
+	switch (msg->opcode) {
+	case CLEANUP:
+		__guc_exec_queue_process_msg_cleanup(msg);
+		break;
+	case SET_SCHED_PROPS:
+		__guc_exec_queue_process_msg_set_sched_props(msg);
+		break;
+	case SUSPEND:
+		__guc_exec_queue_process_msg_suspend(msg);
+		break;
+	case RESUME:
+		__guc_exec_queue_process_msg_resume(msg);
+		break;
+	default:
+		XE_WARN_ON("Unknown message type");
+	}
+}
+
+static const struct drm_sched_backend_ops drm_sched_ops = {
+	.run_job = guc_exec_queue_run_job,
+	.free_job = guc_exec_queue_free_job,
+	.timedout_job = guc_exec_queue_timedout_job,
+};
+
+static const struct xe_sched_backend_ops xe_sched_ops = {
+	.process_msg = guc_exec_queue_process_msg,
+};
+
+static int guc_exec_queue_init(struct xe_exec_queue *q)
+{
+	struct xe_gpu_scheduler *sched;
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_guc_exec_queue *ge;
+	long timeout;
+	int err;
+
+	xe_assert(xe, xe_device_uc_enabled(guc_to_xe(guc)));
+
+	ge = kzalloc(sizeof(*ge), GFP_KERNEL);
+	if (!ge)
+		return -ENOMEM;
+
+	q->guc = ge;
+	ge->q = q;
+	init_waitqueue_head(&ge->suspend_wait);
+
+	timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT :
+		  q->hwe->eclass->sched_props.job_timeout_ms;
+	err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops,
+			    get_submit_wq(guc),
+			    q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, 64,
+			    timeout, guc_to_gt(guc)->ordered_wq, NULL,
+			    q->name, gt_to_xe(q->gt)->drm.dev);
+	if (err)
+		goto err_free;
+
+	sched = &ge->sched;
+	err = xe_sched_entity_init(&ge->entity, sched);
+	if (err)
+		goto err_sched;
+
+	if (xe_exec_queue_is_lr(q))
+		INIT_WORK(&q->guc->lr_tdr, xe_guc_exec_queue_lr_cleanup);
+
+	mutex_lock(&guc->submission_state.lock);
+
+	err = alloc_guc_id(guc, q);
+	if (err)
+		goto err_entity;
+
+	q->entity = &ge->entity;
+
+	if (guc_read_stopped(guc))
+		xe_sched_stop(sched);
+
+	mutex_unlock(&guc->submission_state.lock);
+
+	xe_exec_queue_assign_name(q, q->guc->id);
+
+	trace_xe_exec_queue_create(q);
+
+	return 0;
+
+err_entity:
+	xe_sched_entity_fini(&ge->entity);
+err_sched:
+	xe_sched_fini(&ge->sched);
+err_free:
+	kfree(ge);
+
+	return err;
+}
+
+static void guc_exec_queue_kill(struct xe_exec_queue *q)
+{
+	trace_xe_exec_queue_kill(q);
+	set_exec_queue_killed(q);
+	xe_guc_exec_queue_trigger_cleanup(q);
+}
+
+static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg,
+				   u32 opcode)
+{
+	INIT_LIST_HEAD(&msg->link);
+	msg->opcode = opcode;
+	msg->private_data = q;
+
+	trace_xe_sched_msg_add(msg);
+	xe_sched_add_msg(&q->guc->sched, msg);
+}
+
+#define STATIC_MSG_CLEANUP	0
+#define STATIC_MSG_SUSPEND	1
+#define STATIC_MSG_RESUME	2
+static void guc_exec_queue_fini(struct xe_exec_queue *q)
+{
+	struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP;
+
+	if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT))
+		guc_exec_queue_add_msg(q, msg, CLEANUP);
+	else
+		__guc_exec_queue_fini(exec_queue_to_guc(q), q);
+}
+
+static int guc_exec_queue_set_priority(struct xe_exec_queue *q,
+				       enum xe_exec_queue_priority priority)
+{
+	struct xe_sched_msg *msg;
+
+	if (q->sched_props.priority == priority || exec_queue_killed_or_banned(q))
+		return 0;
+
+	msg = kmalloc(sizeof(*msg), GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	q->sched_props.priority = priority;
+	guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
+
+	return 0;
+}
+
+static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us)
+{
+	struct xe_sched_msg *msg;
+
+	if (q->sched_props.timeslice_us == timeslice_us ||
+	    exec_queue_killed_or_banned(q))
+		return 0;
+
+	msg = kmalloc(sizeof(*msg), GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	q->sched_props.timeslice_us = timeslice_us;
+	guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
+
+	return 0;
+}
+
+static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
+					      u32 preempt_timeout_us)
+{
+	struct xe_sched_msg *msg;
+
+	if (q->sched_props.preempt_timeout_us == preempt_timeout_us ||
+	    exec_queue_killed_or_banned(q))
+		return 0;
+
+	msg = kmalloc(sizeof(*msg), GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	q->sched_props.preempt_timeout_us = preempt_timeout_us;
+	guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
+
+	return 0;
+}
+
+static int guc_exec_queue_set_job_timeout(struct xe_exec_queue *q, u32 job_timeout_ms)
+{
+	struct xe_gpu_scheduler *sched = &q->guc->sched;
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
+
+	xe_assert(xe, !exec_queue_registered(q));
+	xe_assert(xe, !exec_queue_banned(q));
+	xe_assert(xe, !exec_queue_killed(q));
+
+	sched->base.timeout = job_timeout_ms;
+
+	return 0;
+}
+
+static int guc_exec_queue_suspend(struct xe_exec_queue *q)
+{
+	struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND;
+
+	if (exec_queue_killed_or_banned(q) || q->guc->suspend_pending)
+		return -EINVAL;
+
+	q->guc->suspend_pending = true;
+	guc_exec_queue_add_msg(q, msg, SUSPEND);
+
+	return 0;
+}
+
+static void guc_exec_queue_suspend_wait(struct xe_exec_queue *q)
+{
+	struct xe_guc *guc = exec_queue_to_guc(q);
+
+	wait_event(q->guc->suspend_wait, !q->guc->suspend_pending ||
+		   guc_read_stopped(guc));
+}
+
+static void guc_exec_queue_resume(struct xe_exec_queue *q)
+{
+	struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME;
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
+
+	xe_assert(xe, !q->guc->suspend_pending);
+
+	guc_exec_queue_add_msg(q, msg, RESUME);
+}
+
+static bool guc_exec_queue_reset_status(struct xe_exec_queue *q)
+{
+	return exec_queue_reset(q);
+}
+
+/*
+ * All of these functions are an abstraction layer which other parts of XE can
+ * use to trap into the GuC backend. All of these functions, aside from init,
+ * really shouldn't do much other than trap into the DRM scheduler which
+ * synchronizes these operations.
+ */
+static const struct xe_exec_queue_ops guc_exec_queue_ops = {
+	.init = guc_exec_queue_init,
+	.kill = guc_exec_queue_kill,
+	.fini = guc_exec_queue_fini,
+	.set_priority = guc_exec_queue_set_priority,
+	.set_timeslice = guc_exec_queue_set_timeslice,
+	.set_preempt_timeout = guc_exec_queue_set_preempt_timeout,
+	.set_job_timeout = guc_exec_queue_set_job_timeout,
+	.suspend = guc_exec_queue_suspend,
+	.suspend_wait = guc_exec_queue_suspend_wait,
+	.resume = guc_exec_queue_resume,
+	.reset_status = guc_exec_queue_reset_status,
+};
+
+static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
+{
+	struct xe_gpu_scheduler *sched = &q->guc->sched;
+
+	/* Stop scheduling + flush any DRM scheduler operations */
+	xe_sched_submission_stop(sched);
+
+	/* Clean up lost G2H + reset engine state */
+	if (exec_queue_registered(q)) {
+		if ((exec_queue_banned(q) && exec_queue_destroyed(q)) ||
+		    xe_exec_queue_is_lr(q))
+			xe_exec_queue_put(q);
+		else if (exec_queue_destroyed(q))
+			__guc_exec_queue_fini(guc, q);
+	}
+	if (q->guc->suspend_pending) {
+		set_exec_queue_suspended(q);
+		suspend_fence_signal(q);
+	}
+	atomic_and(EXEC_QUEUE_STATE_DESTROYED | ENGINE_STATE_SUSPENDED,
+		   &q->guc->state);
+	q->guc->resume_time = 0;
+	trace_xe_exec_queue_stop(q);
+
+	/*
+	 * Ban any engine (aside from kernel and engines used for VM ops) with a
+	 * started but not complete job or if a job has gone through a GT reset
+	 * more than twice.
+	 */
+	if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) {
+		struct xe_sched_job *job = xe_sched_first_pending_job(sched);
+
+		if (job) {
+			if ((xe_sched_job_started(job) &&
+			    !xe_sched_job_completed(job)) ||
+			    xe_sched_invalidate_job(job, 2)) {
+				trace_xe_sched_job_ban(job);
+				xe_sched_tdr_queue_imm(&q->guc->sched);
+				set_exec_queue_banned(q);
+			}
+		}
+	}
+}
+
+int xe_guc_submit_reset_prepare(struct xe_guc *guc)
+{
+	int ret;
+
+	/*
+	 * Using an atomic here rather than submission_state.lock as this
+	 * function can be called while holding the CT lock (engine reset
+	 * failure). submission_state.lock needs the CT lock to resubmit jobs.
+	 * Atomic is not ideal, but it works to prevent against concurrent reset
+	 * and releasing any TDRs waiting on guc->submission_state.stopped.
+	 */
+	ret = atomic_fetch_or(1, &guc->submission_state.stopped);
+	smp_wmb();
+	wake_up_all(&guc->ct.wq);
+
+	return ret;
+}
+
+void xe_guc_submit_reset_wait(struct xe_guc *guc)
+{
+	wait_event(guc->ct.wq, !guc_read_stopped(guc));
+}
+
+int xe_guc_submit_stop(struct xe_guc *guc)
+{
+	struct xe_exec_queue *q;
+	unsigned long index;
+	struct xe_device *xe = guc_to_xe(guc);
+
+	xe_assert(xe, guc_read_stopped(guc) == 1);
+
+	mutex_lock(&guc->submission_state.lock);
+
+	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
+		guc_exec_queue_stop(guc, q);
+
+	mutex_unlock(&guc->submission_state.lock);
+
+	/*
+	 * No one can enter the backend at this point, aside from new engine
+	 * creation which is protected by guc->submission_state.lock.
+	 */
+
+	return 0;
+}
+
+static void guc_exec_queue_start(struct xe_exec_queue *q)
+{
+	struct xe_gpu_scheduler *sched = &q->guc->sched;
+
+	if (!exec_queue_killed_or_banned(q)) {
+		int i;
+
+		trace_xe_exec_queue_resubmit(q);
+		for (i = 0; i < q->width; ++i)
+			xe_lrc_set_ring_head(q->lrc + i, q->lrc[i].ring.tail);
+		xe_sched_resubmit_jobs(sched);
+	}
+
+	xe_sched_submission_start(sched);
+}
+
+int xe_guc_submit_start(struct xe_guc *guc)
+{
+	struct xe_exec_queue *q;
+	unsigned long index;
+	struct xe_device *xe = guc_to_xe(guc);
+
+	xe_assert(xe, guc_read_stopped(guc) == 1);
+
+	mutex_lock(&guc->submission_state.lock);
+	atomic_dec(&guc->submission_state.stopped);
+	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
+		guc_exec_queue_start(q);
+	mutex_unlock(&guc->submission_state.lock);
+
+	wake_up_all(&guc->ct.wq);
+
+	return 0;
+}
+
+static struct xe_exec_queue *
+g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_exec_queue *q;
+
+	if (unlikely(guc_id >= GUC_ID_MAX)) {
+		drm_err(&xe->drm, "Invalid guc_id %u", guc_id);
+		return NULL;
+	}
+
+	q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id);
+	if (unlikely(!q)) {
+		drm_err(&xe->drm, "Not engine present for guc_id %u", guc_id);
+		return NULL;
+	}
+
+	xe_assert(xe, guc_id >= q->guc->id);
+	xe_assert(xe, guc_id < (q->guc->id + q->width));
+
+	return q;
+}
+
+static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q)
+{
+	u32 action[] = {
+		XE_GUC_ACTION_DEREGISTER_CONTEXT,
+		q->guc->id,
+	};
+
+	trace_xe_exec_queue_deregister(q);
+
+	xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action));
+}
+
+int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_exec_queue *q;
+	u32 guc_id = msg[0];
+
+	if (unlikely(len < 2)) {
+		drm_err(&xe->drm, "Invalid length %u", len);
+		return -EPROTO;
+	}
+
+	q = g2h_exec_queue_lookup(guc, guc_id);
+	if (unlikely(!q))
+		return -EPROTO;
+
+	if (unlikely(!exec_queue_pending_enable(q) &&
+		     !exec_queue_pending_disable(q))) {
+		drm_err(&xe->drm, "Unexpected engine state 0x%04x",
+			atomic_read(&q->guc->state));
+		return -EPROTO;
+	}
+
+	trace_xe_exec_queue_scheduling_done(q);
+
+	if (exec_queue_pending_enable(q)) {
+		q->guc->resume_time = ktime_get();
+		clear_exec_queue_pending_enable(q);
+		smp_wmb();
+		wake_up_all(&guc->ct.wq);
+	} else {
+		clear_exec_queue_pending_disable(q);
+		if (q->guc->suspend_pending) {
+			suspend_fence_signal(q);
+		} else {
+			if (exec_queue_banned(q)) {
+				smp_wmb();
+				wake_up_all(&guc->ct.wq);
+			}
+			deregister_exec_queue(guc, q);
+		}
+	}
+
+	return 0;
+}
+
+int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_exec_queue *q;
+	u32 guc_id = msg[0];
+
+	if (unlikely(len < 1)) {
+		drm_err(&xe->drm, "Invalid length %u", len);
+		return -EPROTO;
+	}
+
+	q = g2h_exec_queue_lookup(guc, guc_id);
+	if (unlikely(!q))
+		return -EPROTO;
+
+	if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) ||
+	    exec_queue_pending_enable(q) || exec_queue_enabled(q)) {
+		drm_err(&xe->drm, "Unexpected engine state 0x%04x",
+			atomic_read(&q->guc->state));
+		return -EPROTO;
+	}
+
+	trace_xe_exec_queue_deregister_done(q);
+
+	clear_exec_queue_registered(q);
+
+	if (exec_queue_banned(q) || xe_exec_queue_is_lr(q))
+		xe_exec_queue_put(q);
+	else
+		__guc_exec_queue_fini(guc, q);
+
+	return 0;
+}
+
+int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_exec_queue *q;
+	u32 guc_id = msg[0];
+
+	if (unlikely(len < 1)) {
+		drm_err(&xe->drm, "Invalid length %u", len);
+		return -EPROTO;
+	}
+
+	q = g2h_exec_queue_lookup(guc, guc_id);
+	if (unlikely(!q))
+		return -EPROTO;
+
+	drm_info(&xe->drm, "Engine reset: guc_id=%d", guc_id);
+
+	/* FIXME: Do error capture, most likely async */
+
+	trace_xe_exec_queue_reset(q);
+
+	/*
+	 * A banned engine is a NOP at this point (came from
+	 * guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel
+	 * jobs by setting timeout of the job to the minimum value kicking
+	 * guc_exec_queue_timedout_job.
+	 */
+	set_exec_queue_reset(q);
+	if (!exec_queue_banned(q))
+		xe_guc_exec_queue_trigger_cleanup(q);
+
+	return 0;
+}
+
+int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
+					       u32 len)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_exec_queue *q;
+	u32 guc_id = msg[0];
+
+	if (unlikely(len < 1)) {
+		drm_err(&xe->drm, "Invalid length %u", len);
+		return -EPROTO;
+	}
+
+	q = g2h_exec_queue_lookup(guc, guc_id);
+	if (unlikely(!q))
+		return -EPROTO;
+
+	drm_dbg(&xe->drm, "Engine memory cat error: guc_id=%d", guc_id);
+	trace_xe_exec_queue_memory_cat_error(q);
+
+	/* Treat the same as engine reset */
+	set_exec_queue_reset(q);
+	if (!exec_queue_banned(q))
+		xe_guc_exec_queue_trigger_cleanup(q);
+
+	return 0;
+}
+
+int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	u8 guc_class, instance;
+	u32 reason;
+
+	if (unlikely(len != 3)) {
+		drm_err(&xe->drm, "Invalid length %u", len);
+		return -EPROTO;
+	}
+
+	guc_class = msg[0];
+	instance = msg[1];
+	reason = msg[2];
+
+	/* Unexpected failure of a hardware feature, log an actual error */
+	drm_err(&xe->drm, "GuC engine reset request failed on %d:%d because 0x%08X",
+		guc_class, instance, reason);
+
+	xe_gt_reset_async(guc_to_gt(guc));
+
+	return 0;
+}
+
+static void
+guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q,
+				   struct xe_guc_submit_exec_queue_snapshot *snapshot)
+{
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct iosys_map map = xe_lrc_parallel_map(q->lrc);
+	int i;
+
+	snapshot->guc.wqi_head = q->guc->wqi_head;
+	snapshot->guc.wqi_tail = q->guc->wqi_tail;
+	snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head);
+	snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail);
+	snapshot->parallel.wq_desc.status = parallel_read(xe, map,
+							  wq_desc.wq_status);
+
+	if (snapshot->parallel.wq_desc.head !=
+	    snapshot->parallel.wq_desc.tail) {
+		for (i = snapshot->parallel.wq_desc.head;
+		     i != snapshot->parallel.wq_desc.tail;
+		     i = (i + sizeof(u32)) % WQ_SIZE)
+			snapshot->parallel.wq[i / sizeof(u32)] =
+				parallel_read(xe, map, wq[i / sizeof(u32)]);
+	}
+}
+
+static void
+guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot,
+				 struct drm_printer *p)
+{
+	int i;
+
+	drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n",
+		   snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head);
+	drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n",
+		   snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail);
+	drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status);
+
+	if (snapshot->parallel.wq_desc.head !=
+	    snapshot->parallel.wq_desc.tail) {
+		for (i = snapshot->parallel.wq_desc.head;
+		     i != snapshot->parallel.wq_desc.tail;
+		     i = (i + sizeof(u32)) % WQ_SIZE)
+			drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32),
+				   snapshot->parallel.wq[i / sizeof(u32)]);
+	}
+}
+
+/**
+ * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine.
+ * @q: Xe exec queue.
+ *
+ * This can be printed out in a later stage like during dev_coredump
+ * analysis.
+ *
+ * Returns: a GuC Submit Engine snapshot object that must be freed by the
+ * caller, using `xe_guc_exec_queue_snapshot_free`.
+ */
+struct xe_guc_submit_exec_queue_snapshot *
+xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q)
+{
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_gpu_scheduler *sched = &q->guc->sched;
+	struct xe_sched_job *job;
+	struct xe_guc_submit_exec_queue_snapshot *snapshot;
+	int i;
+
+	snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC);
+
+	if (!snapshot) {
+		drm_err(&xe->drm, "Skipping GuC Engine snapshot entirely.\n");
+		return NULL;
+	}
+
+	snapshot->guc.id = q->guc->id;
+	memcpy(&snapshot->name, &q->name, sizeof(snapshot->name));
+	snapshot->class = q->class;
+	snapshot->logical_mask = q->logical_mask;
+	snapshot->width = q->width;
+	snapshot->refcount = kref_read(&q->refcount);
+	snapshot->sched_timeout = sched->base.timeout;
+	snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us;
+	snapshot->sched_props.preempt_timeout_us =
+		q->sched_props.preempt_timeout_us;
+
+	snapshot->lrc = kmalloc_array(q->width, sizeof(struct lrc_snapshot),
+				      GFP_ATOMIC);
+
+	if (!snapshot->lrc) {
+		drm_err(&xe->drm, "Skipping GuC Engine LRC snapshot.\n");
+	} else {
+		for (i = 0; i < q->width; ++i) {
+			struct xe_lrc *lrc = q->lrc + i;
+
+			snapshot->lrc[i].context_desc =
+				lower_32_bits(xe_lrc_ggtt_addr(lrc));
+			snapshot->lrc[i].head = xe_lrc_ring_head(lrc);
+			snapshot->lrc[i].tail.internal = lrc->ring.tail;
+			snapshot->lrc[i].tail.memory =
+				xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL);
+			snapshot->lrc[i].start_seqno = xe_lrc_start_seqno(lrc);
+			snapshot->lrc[i].seqno = xe_lrc_seqno(lrc);
+		}
+	}
+
+	snapshot->schedule_state = atomic_read(&q->guc->state);
+	snapshot->exec_queue_flags = q->flags;
+
+	snapshot->parallel_execution = xe_exec_queue_is_parallel(q);
+	if (snapshot->parallel_execution)
+		guc_exec_queue_wq_snapshot_capture(q, snapshot);
+
+	spin_lock(&sched->base.job_list_lock);
+	snapshot->pending_list_size = list_count_nodes(&sched->base.pending_list);
+	snapshot->pending_list = kmalloc_array(snapshot->pending_list_size,
+					       sizeof(struct pending_list_snapshot),
+					       GFP_ATOMIC);
+
+	if (!snapshot->pending_list) {
+		drm_err(&xe->drm, "Skipping GuC Engine pending_list snapshot.\n");
+	} else {
+		i = 0;
+		list_for_each_entry(job, &sched->base.pending_list, drm.list) {
+			snapshot->pending_list[i].seqno =
+				xe_sched_job_seqno(job);
+			snapshot->pending_list[i].fence =
+				dma_fence_is_signaled(job->fence) ? 1 : 0;
+			snapshot->pending_list[i].finished =
+				dma_fence_is_signaled(&job->drm.s_fence->finished)
+				? 1 : 0;
+			i++;
+		}
+	}
+
+	spin_unlock(&sched->base.job_list_lock);
+
+	return snapshot;
+}
+
+/**
+ * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot.
+ * @snapshot: GuC Submit Engine snapshot object.
+ * @p: drm_printer where it will be printed out.
+ *
+ * This function prints out a given GuC Submit Engine snapshot object.
+ */
+void
+xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot,
+				 struct drm_printer *p)
+{
+	int i;
+
+	if (!snapshot)
+		return;
+
+	drm_printf(p, "\nGuC ID: %d\n", snapshot->guc.id);
+	drm_printf(p, "\tName: %s\n", snapshot->name);
+	drm_printf(p, "\tClass: %d\n", snapshot->class);
+	drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask);
+	drm_printf(p, "\tWidth: %d\n", snapshot->width);
+	drm_printf(p, "\tRef: %d\n", snapshot->refcount);
+	drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout);
+	drm_printf(p, "\tTimeslice: %u (us)\n",
+		   snapshot->sched_props.timeslice_us);
+	drm_printf(p, "\tPreempt timeout: %u (us)\n",
+		   snapshot->sched_props.preempt_timeout_us);
+
+	for (i = 0; snapshot->lrc && i < snapshot->width; ++i) {
+		drm_printf(p, "\tHW Context Desc: 0x%08x\n",
+			   snapshot->lrc[i].context_desc);
+		drm_printf(p, "\tLRC Head: (memory) %u\n",
+			   snapshot->lrc[i].head);
+		drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n",
+			   snapshot->lrc[i].tail.internal,
+			   snapshot->lrc[i].tail.memory);
+		drm_printf(p, "\tStart seqno: (memory) %d\n",
+			   snapshot->lrc[i].start_seqno);
+		drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->lrc[i].seqno);
+	}
+	drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state);
+	drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags);
+
+	if (snapshot->parallel_execution)
+		guc_exec_queue_wq_snapshot_print(snapshot, p);
+
+	for (i = 0; snapshot->pending_list && i < snapshot->pending_list_size;
+	     i++)
+		drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n",
+			   snapshot->pending_list[i].seqno,
+			   snapshot->pending_list[i].fence,
+			   snapshot->pending_list[i].finished);
+}
+
+/**
+ * xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given
+ * snapshot.
+ * @snapshot: GuC Submit Engine snapshot object.
+ *
+ * This function free all the memory that needed to be allocated at capture
+ * time.
+ */
+void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot)
+{
+	if (!snapshot)
+		return;
+
+	kfree(snapshot->lrc);
+	kfree(snapshot->pending_list);
+	kfree(snapshot);
+}
+
+static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p)
+{
+	struct xe_guc_submit_exec_queue_snapshot *snapshot;
+
+	snapshot = xe_guc_exec_queue_snapshot_capture(q);
+	xe_guc_exec_queue_snapshot_print(snapshot, p);
+	xe_guc_exec_queue_snapshot_free(snapshot);
+}
+
+/**
+ * xe_guc_submit_print - GuC Submit Print.
+ * @guc: GuC.
+ * @p: drm_printer where it will be printed out.
+ *
+ * This function capture and prints snapshots of **all** GuC Engines.
+ */
+void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p)
+{
+	struct xe_exec_queue *q;
+	unsigned long index;
+
+	if (!xe_device_uc_enabled(guc_to_xe(guc)))
+		return;
+
+	mutex_lock(&guc->submission_state.lock);
+	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
+		guc_exec_queue_print(q, p);
+	mutex_unlock(&guc->submission_state.lock);
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h
new file mode 100644
index 000000000000..fc97869c5b86
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_submit.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_SUBMIT_H_
+#define _XE_GUC_SUBMIT_H_
+
+#include <linux/types.h>
+
+struct drm_printer;
+struct xe_exec_queue;
+struct xe_guc;
+
+int xe_guc_submit_init(struct xe_guc *guc);
+
+int xe_guc_submit_reset_prepare(struct xe_guc *guc);
+void xe_guc_submit_reset_wait(struct xe_guc *guc);
+int xe_guc_submit_stop(struct xe_guc *guc);
+int xe_guc_submit_start(struct xe_guc *guc);
+
+int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
+int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
+int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len);
+int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
+					       u32 len);
+int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len);
+
+struct xe_guc_submit_exec_queue_snapshot *
+xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q);
+void
+xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot,
+				 struct drm_printer *p);
+void
+xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot);
+void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_submit_types.h b/drivers/gpu/drm/xe/xe_guc_submit_types.h
new file mode 100644
index 000000000000..649b0a852692
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_submit_types.h
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GUC_SUBMIT_TYPES_H_
+#define _XE_GUC_SUBMIT_TYPES_H_
+
+#include "xe_hw_engine_types.h"
+
+/* Work item for submitting workloads into work queue of GuC. */
+#define WQ_STATUS_ACTIVE		1
+#define WQ_STATUS_SUSPENDED		2
+#define WQ_STATUS_CMD_ERROR		3
+#define WQ_STATUS_ENGINE_ID_NOT_USED	4
+#define WQ_STATUS_SUSPENDED_FROM_RESET	5
+#define WQ_TYPE_NOOP			0x4
+#define WQ_TYPE_MULTI_LRC		0x5
+#define WQ_TYPE_MASK			GENMASK(7, 0)
+#define WQ_LEN_MASK			GENMASK(26, 16)
+
+#define WQ_GUC_ID_MASK			GENMASK(15, 0)
+#define WQ_RING_TAIL_MASK		GENMASK(28, 18)
+
+#define PARALLEL_SCRATCH_SIZE	2048
+#define WQ_SIZE			(PARALLEL_SCRATCH_SIZE / 2)
+#define WQ_OFFSET		(PARALLEL_SCRATCH_SIZE - WQ_SIZE)
+#define CACHELINE_BYTES		64
+
+struct guc_sched_wq_desc {
+	u32 head;
+	u32 tail;
+	u32 error_offset;
+	u32 wq_status;
+	u32 reserved[28];
+} __packed;
+
+struct sync_semaphore {
+	u32 semaphore;
+	u8 unused[CACHELINE_BYTES - sizeof(u32)];
+};
+
+/**
+ * struct guc_submit_parallel_scratch - A scratch shared mapped buffer.
+ */
+struct guc_submit_parallel_scratch {
+	/** @wq_desc: Guc scheduler workqueue descriptor */
+	struct guc_sched_wq_desc wq_desc;
+
+	/** @go: Go Semaphore */
+	struct sync_semaphore go;
+	/** @join: Joined semaphore for the relevant hw engine instances */
+	struct sync_semaphore join[XE_HW_ENGINE_MAX_INSTANCE];
+
+	/** @unused: Unused/Reserved memory space */
+	u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) -
+		  sizeof(struct sync_semaphore) *
+		  (XE_HW_ENGINE_MAX_INSTANCE + 1)];
+
+	/** @wq: Workqueue info */
+	u32 wq[WQ_SIZE / sizeof(u32)];
+};
+
+struct lrc_snapshot {
+	u32 context_desc;
+	u32 head;
+	struct {
+		u32 internal;
+		u32 memory;
+	} tail;
+	u32 start_seqno;
+	u32 seqno;
+};
+
+struct pending_list_snapshot {
+	u32 seqno;
+	bool fence;
+	bool finished;
+};
+
+/**
+ * struct xe_guc_submit_exec_queue_snapshot - Snapshot for devcoredump
+ */
+struct xe_guc_submit_exec_queue_snapshot {
+	/** @name: name of this exec queue */
+	char name[MAX_FENCE_NAME_LEN];
+	/** @class: class of this exec queue */
+	enum xe_engine_class class;
+	/**
+	 * @logical_mask: logical mask of where job submitted to exec queue can run
+	 */
+	u32 logical_mask;
+	/** @width: width (number BB submitted per exec) of this exec queue */
+	u16 width;
+	/** @refcount: ref count of this exec queue */
+	u32 refcount;
+	/**
+	 * @sched_timeout: the time after which a job is removed from the
+	 * scheduler.
+	 */
+	long sched_timeout;
+
+	/** @sched_props: scheduling properties */
+	struct {
+		/** @timeslice_us: timeslice period in micro-seconds */
+		u32 timeslice_us;
+		/** @preempt_timeout_us: preemption timeout in micro-seconds */
+		u32 preempt_timeout_us;
+	} sched_props;
+
+	/** @lrc: LRC Snapshot */
+	struct lrc_snapshot *lrc;
+
+	/** @schedule_state: Schedule State at the moment of Crash */
+	u32 schedule_state;
+	/** @exec_queue_flags: Flags of the faulty exec_queue */
+	unsigned long exec_queue_flags;
+
+	/** @guc: GuC Engine Snapshot */
+	struct {
+		/** @wqi_head: work queue item head */
+		u32 wqi_head;
+		/** @wqi_tail: work queue item tail */
+		u32 wqi_tail;
+		/** @id: GuC id for this exec_queue */
+		u16 id;
+	} guc;
+
+	/**
+	 * @parallel_execution: Indication if the failure was during parallel
+	 * execution
+	 */
+	bool parallel_execution;
+	/** @parallel: snapshot of the useful parallel scratch */
+	struct {
+		/** @wq_desc: Workqueue description */
+		struct {
+			/** @head: Workqueue Head */
+			u32 head;
+			/** @tail: Workqueue Tail */
+			u32 tail;
+			/** @status: Workqueue Status */
+			u32 status;
+		} wq_desc;
+		/** @wq: Workqueue Items */
+		u32 wq[WQ_SIZE / sizeof(u32)];
+	} parallel;
+
+	/** @pending_list_size: Size of the pending list snapshot array */
+	int pending_list_size;
+	/** @pending_list: snapshot of the pending list info */
+	struct pending_list_snapshot *pending_list;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h
new file mode 100644
index 000000000000..cd80802e8918
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_types.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_TYPES_H_
+#define _XE_GUC_TYPES_H_
+
+#include <linux/idr.h>
+#include <linux/xarray.h>
+
+#include "regs/xe_reg_defs.h"
+#include "xe_guc_ads_types.h"
+#include "xe_guc_ct_types.h"
+#include "xe_guc_fwif.h"
+#include "xe_guc_log_types.h"
+#include "xe_guc_pc_types.h"
+#include "xe_uc_fw_types.h"
+
+/**
+ * struct xe_guc - Graphic micro controller
+ */
+struct xe_guc {
+	/** @fw: Generic uC firmware management */
+	struct xe_uc_fw fw;
+	/** @log: GuC log */
+	struct xe_guc_log log;
+	/** @ads: GuC ads */
+	struct xe_guc_ads ads;
+	/** @ct: GuC ct */
+	struct xe_guc_ct ct;
+	/** @pc: GuC Power Conservation */
+	struct xe_guc_pc pc;
+	/** @submission_state: GuC submission state */
+	struct {
+		/** @exec_queue_lookup: Lookup an xe_engine from guc_id */
+		struct xarray exec_queue_lookup;
+		/** @guc_ids: used to allocate new guc_ids, single-lrc */
+		struct ida guc_ids;
+		/** @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc */
+		unsigned long *guc_ids_bitmap;
+		/** @stopped: submissions are stopped */
+		atomic_t stopped;
+		/** @lock: protects submission state */
+		struct mutex lock;
+		/** @suspend: suspend fence state */
+		struct {
+			/** @lock: suspend fences lock */
+			spinlock_t lock;
+			/** @context: suspend fences context */
+			u64 context;
+			/** @seqno: suspend fences seqno */
+			u32 seqno;
+		} suspend;
+#ifdef CONFIG_PROVE_LOCKING
+#define NUM_SUBMIT_WQ	256
+		/** @submit_wq_pool: submission ordered workqueues pool */
+		struct workqueue_struct *submit_wq_pool[NUM_SUBMIT_WQ];
+		/** @submit_wq_idx: submission ordered workqueue index */
+		int submit_wq_idx;
+#endif
+		/** @enabled: submission is enabled */
+		bool enabled;
+	} submission_state;
+	/** @hwconfig: Hardware config state */
+	struct {
+		/** @bo: buffer object of the hardware config */
+		struct xe_bo *bo;
+		/** @size: size of the hardware config */
+		u32 size;
+	} hwconfig;
+
+	/**
+	 * @notify_reg: Register which is written to notify GuC of H2G messages
+	 */
+	struct xe_reg notify_reg;
+	/** @params: Control params for fw initialization */
+	u32 params[GUC_CTL_MAX_DWORDS];
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.c b/drivers/gpu/drm/xe/xe_heci_gsc.c
new file mode 100644
index 000000000000..bfdd33b9b23b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_heci_gsc.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright(c) 2023, Intel Corporation. All rights reserved.
+ */
+
+#include <linux/irq.h>
+#include <linux/mei_aux.h>
+#include <linux/pci.h>
+#include <linux/sizes.h>
+
+#include "xe_device_types.h"
+#include "xe_drv.h"
+#include "xe_heci_gsc.h"
+#include "xe_platform_types.h"
+
+#define GSC_BAR_LENGTH  0x00000FFC
+
+#define DG1_GSC_HECI2_BASE			0x259000
+#define PVC_GSC_HECI2_BASE			0x285000
+#define DG2_GSC_HECI2_BASE			0x374000
+
+static void heci_gsc_irq_mask(struct irq_data *d)
+{
+	/* generic irq handling */
+}
+
+static void heci_gsc_irq_unmask(struct irq_data *d)
+{
+	/* generic irq handling */
+}
+
+static struct irq_chip heci_gsc_irq_chip = {
+	.name = "gsc_irq_chip",
+	.irq_mask = heci_gsc_irq_mask,
+	.irq_unmask = heci_gsc_irq_unmask,
+};
+
+static int heci_gsc_irq_init(int irq)
+{
+	irq_set_chip_and_handler_name(irq, &heci_gsc_irq_chip,
+				      handle_simple_irq, "heci_gsc_irq_handler");
+
+	return irq_set_chip_data(irq, NULL);
+}
+
+/**
+ * struct heci_gsc_def - graphics security controller heci interface definitions
+ *
+ * @name: name of the heci device
+ * @bar: address of the mmio bar
+ * @bar_size: size of the mmio bar
+ * @use_polling: indication of using polling mode for the device
+ * @slow_firmware: indication of whether the device is slow (needs longer timeouts)
+ */
+struct heci_gsc_def {
+	const char *name;
+	unsigned long bar;
+	size_t bar_size;
+	bool use_polling;
+	bool slow_firmware;
+};
+
+/* gsc resources and definitions */
+static const struct heci_gsc_def heci_gsc_def_dg1 = {
+	.name = "mei-gscfi",
+	.bar = DG1_GSC_HECI2_BASE,
+	.bar_size = GSC_BAR_LENGTH,
+};
+
+static const struct heci_gsc_def heci_gsc_def_dg2 = {
+	.name = "mei-gscfi",
+	.bar = DG2_GSC_HECI2_BASE,
+	.bar_size = GSC_BAR_LENGTH,
+};
+
+static const struct heci_gsc_def heci_gsc_def_pvc = {
+	.name = "mei-gscfi",
+	.bar = PVC_GSC_HECI2_BASE,
+	.bar_size = GSC_BAR_LENGTH,
+	.slow_firmware = true,
+};
+
+static void heci_gsc_release_dev(struct device *dev)
+{
+	struct auxiliary_device *aux_dev = to_auxiliary_dev(dev);
+	struct mei_aux_device *adev = auxiliary_dev_to_mei_aux_dev(aux_dev);
+
+	kfree(adev);
+}
+
+void xe_heci_gsc_fini(struct xe_device *xe)
+{
+	struct xe_heci_gsc *heci_gsc = &xe->heci_gsc;
+
+	if (!HAS_HECI_GSCFI(xe))
+		return;
+
+	if (heci_gsc->adev) {
+		struct auxiliary_device *aux_dev = &heci_gsc->adev->aux_dev;
+
+		auxiliary_device_delete(aux_dev);
+		auxiliary_device_uninit(aux_dev);
+		heci_gsc->adev = NULL;
+	}
+
+	if (heci_gsc->irq >= 0)
+		irq_free_desc(heci_gsc->irq);
+	heci_gsc->irq = -1;
+}
+
+static int heci_gsc_irq_setup(struct xe_device *xe)
+{
+	struct xe_heci_gsc *heci_gsc = &xe->heci_gsc;
+	int ret;
+
+	heci_gsc->irq = irq_alloc_desc(0);
+	if (heci_gsc->irq < 0) {
+		drm_err(&xe->drm, "gsc irq error %d\n", heci_gsc->irq);
+		return heci_gsc->irq;
+	}
+
+	ret = heci_gsc_irq_init(heci_gsc->irq);
+	if (ret < 0)
+		drm_err(&xe->drm, "gsc irq init failed %d\n", ret);
+
+	return ret;
+}
+
+static int heci_gsc_add_device(struct xe_device *xe, const struct heci_gsc_def *def)
+{
+	struct xe_heci_gsc *heci_gsc = &xe->heci_gsc;
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	struct auxiliary_device *aux_dev;
+	struct mei_aux_device *adev;
+	int ret;
+
+	adev = kzalloc(sizeof(*adev), GFP_KERNEL);
+	if (!adev)
+		return -ENOMEM;
+	adev->irq = heci_gsc->irq;
+	adev->bar.parent = &pdev->resource[0];
+	adev->bar.start = def->bar + pdev->resource[0].start;
+	adev->bar.end = adev->bar.start + def->bar_size - 1;
+	adev->bar.flags = IORESOURCE_MEM;
+	adev->bar.desc = IORES_DESC_NONE;
+	adev->slow_firmware = def->slow_firmware;
+
+	aux_dev = &adev->aux_dev;
+	aux_dev->name = def->name;
+	aux_dev->id = (pci_domain_nr(pdev->bus) << 16) |
+		      PCI_DEVID(pdev->bus->number, pdev->devfn);
+	aux_dev->dev.parent = &pdev->dev;
+	aux_dev->dev.release = heci_gsc_release_dev;
+
+	ret = auxiliary_device_init(aux_dev);
+	if (ret < 0) {
+		drm_err(&xe->drm, "gsc aux init failed %d\n", ret);
+		kfree(adev);
+		return ret;
+	}
+
+	heci_gsc->adev = adev; /* needed by the notifier */
+	ret = auxiliary_device_add(aux_dev);
+	if (ret < 0) {
+		drm_err(&xe->drm, "gsc aux add failed %d\n", ret);
+		heci_gsc->adev = NULL;
+
+		/* adev will be freed with the put_device() and .release sequence */
+		auxiliary_device_uninit(aux_dev);
+	}
+	return ret;
+}
+
+void xe_heci_gsc_init(struct xe_device *xe)
+{
+	struct xe_heci_gsc *heci_gsc = &xe->heci_gsc;
+	const struct heci_gsc_def *def;
+	int ret;
+
+	if (!HAS_HECI_GSCFI(xe))
+		return;
+
+	heci_gsc->irq = -1;
+
+	if (xe->info.platform == XE_PVC) {
+		def = &heci_gsc_def_pvc;
+	} else if (xe->info.platform == XE_DG2) {
+		def = &heci_gsc_def_dg2;
+	} else if (xe->info.platform == XE_DG1) {
+		def = &heci_gsc_def_dg1;
+	} else {
+		drm_warn_once(&xe->drm, "Unknown platform\n");
+		return;
+	}
+
+	if (!def->name) {
+		drm_warn_once(&xe->drm, "HECI is not implemented!\n");
+		return;
+	}
+
+	if (!def->use_polling) {
+		ret = heci_gsc_irq_setup(xe);
+		if (ret)
+			goto fail;
+	}
+
+	ret = heci_gsc_add_device(xe, def);
+	if (ret)
+		goto fail;
+
+	return;
+fail:
+	xe_heci_gsc_fini(xe);
+}
+
+void xe_heci_gsc_irq_handler(struct xe_device *xe, u32 iir)
+{
+	int ret;
+
+	if ((iir & GSC_IRQ_INTF(1)) == 0)
+		return;
+
+	if (!HAS_HECI_GSCFI(xe)) {
+		drm_warn_once(&xe->drm, "GSC irq: not supported");
+		return;
+	}
+
+	if (xe->heci_gsc.irq < 0)
+		return;
+
+	ret = generic_handle_irq(xe->heci_gsc.irq);
+	if (ret)
+		drm_err_ratelimited(&xe->drm, "error handling GSC irq: %d\n", ret);
+}
diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.h b/drivers/gpu/drm/xe/xe_heci_gsc.h
new file mode 100644
index 000000000000..9db454478fae
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_heci_gsc.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright(c) 2023, Intel Corporation. All rights reserved.
+ */
+#ifndef __XE_HECI_GSC_DEV_H__
+#define __XE_HECI_GSC_DEV_H__
+
+#include <linux/types.h>
+
+struct xe_device;
+struct mei_aux_device;
+
+/*
+ * The HECI1 bit corresponds to bit15 and HECI2 to bit14.
+ * The reason for this is to allow growth for more interfaces in the future.
+ */
+#define GSC_IRQ_INTF(_x)  BIT(15 - (_x))
+
+/**
+ * struct xe_heci_gsc - graphics security controller for xe, HECI interface
+ *
+ * @adev : pointer to mei auxiliary device structure
+ * @irq : irq number
+ *
+ */
+struct xe_heci_gsc {
+	struct mei_aux_device *adev;
+	int irq;
+};
+
+void xe_heci_gsc_init(struct xe_device *xe);
+void xe_heci_gsc_fini(struct xe_device *xe);
+void xe_heci_gsc_irq_handler(struct xe_device *xe, u32 iir);
+
+#endif /* __XE_HECI_GSC_DEV_H__ */
diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c
new file mode 100644
index 000000000000..eca109791c6a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_huc.c
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_huc.h"
+
+#include <drm/drm_managed.h>
+
+#include "abi/gsc_pxp_commands_abi.h"
+#include "regs/xe_gsc_regs.h"
+#include "regs/xe_guc_regs.h"
+#include "xe_assert.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_force_wake.h"
+#include "xe_gsc_submit.h"
+#include "xe_gt.h"
+#include "xe_guc.h"
+#include "xe_map.h"
+#include "xe_mmio.h"
+#include "xe_uc_fw.h"
+
+static struct xe_gt *
+huc_to_gt(struct xe_huc *huc)
+{
+	return container_of(huc, struct xe_gt, uc.huc);
+}
+
+static struct xe_device *
+huc_to_xe(struct xe_huc *huc)
+{
+	return gt_to_xe(huc_to_gt(huc));
+}
+
+static struct xe_guc *
+huc_to_guc(struct xe_huc *huc)
+{
+	return &container_of(huc, struct xe_uc, huc)->guc;
+}
+
+static void free_gsc_pkt(struct drm_device *drm, void *arg)
+{
+	struct xe_huc *huc = arg;
+
+	xe_bo_unpin_map_no_vm(huc->gsc_pkt);
+	huc->gsc_pkt = NULL;
+}
+
+#define PXP43_HUC_AUTH_INOUT_SIZE SZ_4K
+static int huc_alloc_gsc_pkt(struct xe_huc *huc)
+{
+	struct xe_gt *gt = huc_to_gt(huc);
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_bo *bo;
+	int err;
+
+	/* we use a single object for both input and output */
+	bo = xe_bo_create_pin_map(xe, gt_to_tile(gt), NULL,
+				  PXP43_HUC_AUTH_INOUT_SIZE * 2,
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_SYSTEM_BIT |
+				  XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	huc->gsc_pkt = bo;
+
+	err = drmm_add_action_or_reset(&xe->drm, free_gsc_pkt, huc);
+	if (err) {
+		free_gsc_pkt(&xe->drm, huc);
+		return err;
+	}
+
+	return 0;
+}
+
+int xe_huc_init(struct xe_huc *huc)
+{
+	struct xe_gt *gt = huc_to_gt(huc);
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_device *xe = gt_to_xe(gt);
+	int ret;
+
+	huc->fw.type = XE_UC_FW_TYPE_HUC;
+
+	/* On platforms with a media GT the HuC is only available there */
+	if (tile->media_gt && (gt != tile->media_gt)) {
+		xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_NOT_SUPPORTED);
+		return 0;
+	}
+
+	ret = xe_uc_fw_init(&huc->fw);
+	if (ret)
+		goto out;
+
+	if (!xe_uc_fw_is_enabled(&huc->fw))
+		return 0;
+
+	if (huc->fw.has_gsc_headers) {
+		ret = huc_alloc_gsc_pkt(huc);
+		if (ret)
+			goto out;
+	}
+
+	xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOADABLE);
+
+	return 0;
+
+out:
+	drm_err(&xe->drm, "HuC init failed with %d", ret);
+	return ret;
+}
+
+int xe_huc_upload(struct xe_huc *huc)
+{
+	if (!xe_uc_fw_is_loadable(&huc->fw))
+		return 0;
+	return xe_uc_fw_upload(&huc->fw, 0, HUC_UKERNEL);
+}
+
+#define huc_auth_msg_wr(xe_, map_, offset_, field_, val_) \
+	xe_map_wr_field(xe_, map_, offset_, struct pxp43_new_huc_auth_in, field_, val_)
+#define huc_auth_msg_rd(xe_, map_, offset_, field_) \
+	xe_map_rd_field(xe_, map_, offset_, struct pxp43_huc_auth_out, field_)
+
+static u32 huc_emit_pxp_auth_msg(struct xe_device *xe, struct iosys_map *map,
+				 u32 wr_offset, u32 huc_offset, u32 huc_size)
+{
+	xe_map_memset(xe, map, wr_offset, 0, sizeof(struct pxp43_new_huc_auth_in));
+
+	huc_auth_msg_wr(xe, map, wr_offset, header.api_version, PXP_APIVER(4, 3));
+	huc_auth_msg_wr(xe, map, wr_offset, header.command_id, PXP43_CMDID_NEW_HUC_AUTH);
+	huc_auth_msg_wr(xe, map, wr_offset, header.status, 0);
+	huc_auth_msg_wr(xe, map, wr_offset, header.buffer_len,
+			sizeof(struct pxp43_new_huc_auth_in) - sizeof(struct pxp_cmd_header));
+	huc_auth_msg_wr(xe, map, wr_offset, huc_base_address, huc_offset);
+	huc_auth_msg_wr(xe, map, wr_offset, huc_size, huc_size);
+
+	return wr_offset + sizeof(struct pxp43_new_huc_auth_in);
+}
+
+static int huc_auth_via_gsccs(struct xe_huc *huc)
+{
+	struct xe_gt *gt = huc_to_gt(huc);
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_bo *pkt = huc->gsc_pkt;
+	u32 wr_offset;
+	u32 rd_offset;
+	u64 ggtt_offset;
+	u32 out_status;
+	int retry = 5;
+	int err = 0;
+
+	if (!pkt)
+		return -ENODEV;
+
+	ggtt_offset = xe_bo_ggtt_addr(pkt);
+
+	wr_offset = xe_gsc_emit_header(xe, &pkt->vmap, 0, HECI_MEADDRESS_PXP, 0,
+				       sizeof(struct pxp43_new_huc_auth_in));
+	wr_offset = huc_emit_pxp_auth_msg(xe, &pkt->vmap, wr_offset,
+					  xe_bo_ggtt_addr(huc->fw.bo),
+					  huc->fw.bo->size);
+	do {
+		err = xe_gsc_pkt_submit_kernel(&gt->uc.gsc, ggtt_offset, wr_offset,
+					       ggtt_offset + PXP43_HUC_AUTH_INOUT_SIZE,
+					       PXP43_HUC_AUTH_INOUT_SIZE);
+		if (err)
+			break;
+
+		if (xe_gsc_check_and_update_pending(xe, &pkt->vmap, 0, &pkt->vmap,
+						    PXP43_HUC_AUTH_INOUT_SIZE)) {
+			err = -EBUSY;
+			msleep(50);
+		}
+	} while (--retry && err == -EBUSY);
+
+	if (err) {
+		drm_err(&xe->drm, "failed to submit GSC request to auth: %d\n", err);
+		return err;
+	}
+
+	err = xe_gsc_read_out_header(xe, &pkt->vmap, PXP43_HUC_AUTH_INOUT_SIZE,
+				     sizeof(struct pxp43_huc_auth_out), &rd_offset);
+	if (err) {
+		drm_err(&xe->drm, "HuC: invalid GSC reply for auth (err=%d)\n", err);
+		return err;
+	}
+
+	/*
+	 * The GSC will return PXP_STATUS_OP_NOT_PERMITTED if the HuC is already
+	 * authenticated. If the same error is ever returned with HuC not loaded
+	 * we'll still catch it when we check the authentication bit later.
+	 */
+	out_status = huc_auth_msg_rd(xe, &pkt->vmap, rd_offset, header.status);
+	if (out_status != PXP_STATUS_SUCCESS && out_status != PXP_STATUS_OP_NOT_PERMITTED) {
+		drm_err(&xe->drm, "auth failed with GSC error = 0x%x\n", out_status);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static const struct {
+	const char *name;
+	struct xe_reg reg;
+	u32 val;
+} huc_auth_modes[XE_HUC_AUTH_TYPES_COUNT] = {
+	[XE_HUC_AUTH_VIA_GUC] = { "GuC",
+				  HUC_KERNEL_LOAD_INFO,
+				  HUC_LOAD_SUCCESSFUL },
+	[XE_HUC_AUTH_VIA_GSC] = { "GSC",
+				  HECI_FWSTS5(MTL_GSC_HECI1_BASE),
+				  HECI1_FWSTS5_HUC_AUTH_DONE },
+};
+
+bool xe_huc_is_authenticated(struct xe_huc *huc, enum xe_huc_auth_types type)
+{
+	struct xe_gt *gt = huc_to_gt(huc);
+
+	return xe_mmio_read32(gt, huc_auth_modes[type].reg) & huc_auth_modes[type].val;
+}
+
+int xe_huc_auth(struct xe_huc *huc, enum xe_huc_auth_types type)
+{
+	struct xe_device *xe = huc_to_xe(huc);
+	struct xe_gt *gt = huc_to_gt(huc);
+	struct xe_guc *guc = huc_to_guc(huc);
+	int ret;
+
+	if (!xe_uc_fw_is_loadable(&huc->fw))
+		return 0;
+
+	/* On newer platforms the HuC survives reset, so no need to re-auth */
+	if (xe_huc_is_authenticated(huc, type)) {
+		xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_RUNNING);
+		return 0;
+	}
+
+	if (!xe_uc_fw_is_loaded(&huc->fw))
+		return -ENOEXEC;
+
+	switch (type) {
+	case XE_HUC_AUTH_VIA_GUC:
+		ret = xe_guc_auth_huc(guc, xe_bo_ggtt_addr(huc->fw.bo) +
+				      xe_uc_fw_rsa_offset(&huc->fw));
+		break;
+	case XE_HUC_AUTH_VIA_GSC:
+		ret = huc_auth_via_gsccs(huc);
+		break;
+	default:
+		XE_WARN_ON(type);
+		return -EINVAL;
+	}
+	if (ret) {
+		drm_err(&xe->drm, "Failed to trigger HuC auth via %s: %d\n",
+			huc_auth_modes[type].name, ret);
+		goto fail;
+	}
+
+	ret = xe_mmio_wait32(gt, huc_auth_modes[type].reg, huc_auth_modes[type].val,
+			     huc_auth_modes[type].val, 100000, NULL, false);
+	if (ret) {
+		drm_err(&xe->drm, "HuC: Firmware not verified %d\n", ret);
+		goto fail;
+	}
+
+	xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_RUNNING);
+	drm_dbg(&xe->drm, "HuC authenticated via %s\n", huc_auth_modes[type].name);
+
+	return 0;
+
+fail:
+	drm_err(&xe->drm, "HuC: Auth via %s failed: %d\n",
+		huc_auth_modes[type].name, ret);
+	xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOAD_FAIL);
+
+	return ret;
+}
+
+void xe_huc_sanitize(struct xe_huc *huc)
+{
+	if (!xe_uc_fw_is_loadable(&huc->fw))
+		return;
+	xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOADABLE);
+}
+
+void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p)
+{
+	struct xe_gt *gt = huc_to_gt(huc);
+	int err;
+
+	xe_uc_fw_print(&huc->fw, p);
+
+	if (!xe_uc_fw_is_enabled(&huc->fw))
+		return;
+
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		return;
+
+	drm_printf(p, "\nHuC status: 0x%08x\n",
+		   xe_mmio_read32(gt, HUC_KERNEL_LOAD_INFO));
+
+	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
diff --git a/drivers/gpu/drm/xe/xe_huc.h b/drivers/gpu/drm/xe/xe_huc.h
new file mode 100644
index 000000000000..532017230287
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_huc.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_HUC_H_
+#define _XE_HUC_H_
+
+#include "xe_huc_types.h"
+
+struct drm_printer;
+
+enum xe_huc_auth_types {
+	XE_HUC_AUTH_VIA_GUC = 0,
+	XE_HUC_AUTH_VIA_GSC,
+	XE_HUC_AUTH_TYPES_COUNT
+};
+
+int xe_huc_init(struct xe_huc *huc);
+int xe_huc_upload(struct xe_huc *huc);
+int xe_huc_auth(struct xe_huc *huc, enum xe_huc_auth_types type);
+bool xe_huc_is_authenticated(struct xe_huc *huc, enum xe_huc_auth_types type);
+void xe_huc_sanitize(struct xe_huc *huc);
+void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_huc_debugfs.c b/drivers/gpu/drm/xe/xe_huc_debugfs.c
new file mode 100644
index 000000000000..18585a7eeb9d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_huc_debugfs.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_huc_debugfs.h"
+
+#include <drm/drm_debugfs.h>
+#include <drm/drm_managed.h>
+
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_huc.h"
+#include "xe_macros.h"
+
+static struct xe_gt *
+huc_to_gt(struct xe_huc *huc)
+{
+	return container_of(huc, struct xe_gt, uc.huc);
+}
+
+static struct xe_device *
+huc_to_xe(struct xe_huc *huc)
+{
+	return gt_to_xe(huc_to_gt(huc));
+}
+
+static struct xe_huc *node_to_huc(struct drm_info_node *node)
+{
+	return node->info_ent->data;
+}
+
+static int huc_info(struct seq_file *m, void *data)
+{
+	struct xe_huc *huc = node_to_huc(m->private);
+	struct xe_device *xe = huc_to_xe(huc);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_device_mem_access_get(xe);
+	xe_huc_print_info(huc, &p);
+	xe_device_mem_access_put(xe);
+
+	return 0;
+}
+
+static const struct drm_info_list debugfs_list[] = {
+	{"huc_info", huc_info, 0},
+};
+
+void xe_huc_debugfs_register(struct xe_huc *huc, struct dentry *parent)
+{
+	struct drm_minor *minor = huc_to_xe(huc)->drm.primary;
+	struct drm_info_list *local;
+	int i;
+
+#define DEBUGFS_SIZE	(ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list))
+	local = drmm_kmalloc(&huc_to_xe(huc)->drm, DEBUGFS_SIZE, GFP_KERNEL);
+	if (!local)
+		return;
+
+	memcpy(local, debugfs_list, DEBUGFS_SIZE);
+#undef DEBUGFS_SIZE
+
+	for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i)
+		local[i].data = huc;
+
+	drm_debugfs_create_files(local,
+				 ARRAY_SIZE(debugfs_list),
+				 parent, minor);
+}
diff --git a/drivers/gpu/drm/xe/xe_huc_debugfs.h b/drivers/gpu/drm/xe/xe_huc_debugfs.h
new file mode 100644
index 000000000000..ec58f1818804
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_huc_debugfs.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_HUC_DEBUGFS_H_
+#define _XE_HUC_DEBUGFS_H_
+
+struct dentry;
+struct xe_huc;
+
+void xe_huc_debugfs_register(struct xe_huc *huc, struct dentry *parent);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_huc_types.h b/drivers/gpu/drm/xe/xe_huc_types.h
new file mode 100644
index 000000000000..cfbaa5e0dfca
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_huc_types.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_HUC_TYPES_H_
+#define _XE_HUC_TYPES_H_
+
+#include "xe_uc_fw_types.h"
+
+struct xe_bo;
+
+/**
+ * struct xe_huc - HuC
+ */
+struct xe_huc {
+	/** @fw: Generic uC firmware management */
+	struct xe_uc_fw fw;
+
+	/** @gsc_pkt: bo to store the packet for auth via GSC */
+	struct xe_bo *gsc_pkt;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
new file mode 100644
index 000000000000..1fa5cf5eea97
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -0,0 +1,883 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_hw_engine.h"
+
+#include <drm/drm_managed.h>
+
+#include "regs/xe_engine_regs.h"
+#include "regs/xe_gt_regs.h"
+#include "xe_assert.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_execlist.h"
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_gt_ccs_mode.h"
+#include "xe_gt_topology.h"
+#include "xe_hw_fence.h"
+#include "xe_irq.h"
+#include "xe_lrc.h"
+#include "xe_macros.h"
+#include "xe_mmio.h"
+#include "xe_reg_sr.h"
+#include "xe_rtp.h"
+#include "xe_sched_job.h"
+#include "xe_tuning.h"
+#include "xe_uc_fw.h"
+#include "xe_wa.h"
+
+#define MAX_MMIO_BASES 3
+struct engine_info {
+	const char *name;
+	unsigned int class : 8;
+	unsigned int instance : 8;
+	enum xe_force_wake_domains domain;
+	u32 mmio_base;
+};
+
+static const struct engine_info engine_infos[] = {
+	[XE_HW_ENGINE_RCS0] = {
+		.name = "rcs0",
+		.class = XE_ENGINE_CLASS_RENDER,
+		.instance = 0,
+		.domain = XE_FW_RENDER,
+		.mmio_base = RENDER_RING_BASE,
+	},
+	[XE_HW_ENGINE_BCS0] = {
+		.name = "bcs0",
+		.class = XE_ENGINE_CLASS_COPY,
+		.instance = 0,
+		.domain = XE_FW_RENDER,
+		.mmio_base = BLT_RING_BASE,
+	},
+	[XE_HW_ENGINE_BCS1] = {
+		.name = "bcs1",
+		.class = XE_ENGINE_CLASS_COPY,
+		.instance = 1,
+		.domain = XE_FW_RENDER,
+		.mmio_base = XEHPC_BCS1_RING_BASE,
+	},
+	[XE_HW_ENGINE_BCS2] = {
+		.name = "bcs2",
+		.class = XE_ENGINE_CLASS_COPY,
+		.instance = 2,
+		.domain = XE_FW_RENDER,
+		.mmio_base = XEHPC_BCS2_RING_BASE,
+	},
+	[XE_HW_ENGINE_BCS3] = {
+		.name = "bcs3",
+		.class = XE_ENGINE_CLASS_COPY,
+		.instance = 3,
+		.domain = XE_FW_RENDER,
+		.mmio_base = XEHPC_BCS3_RING_BASE,
+	},
+	[XE_HW_ENGINE_BCS4] = {
+		.name = "bcs4",
+		.class = XE_ENGINE_CLASS_COPY,
+		.instance = 4,
+		.domain = XE_FW_RENDER,
+		.mmio_base = XEHPC_BCS4_RING_BASE,
+	},
+	[XE_HW_ENGINE_BCS5] = {
+		.name = "bcs5",
+		.class = XE_ENGINE_CLASS_COPY,
+		.instance = 5,
+		.domain = XE_FW_RENDER,
+		.mmio_base = XEHPC_BCS5_RING_BASE,
+	},
+	[XE_HW_ENGINE_BCS6] = {
+		.name = "bcs6",
+		.class = XE_ENGINE_CLASS_COPY,
+		.instance = 6,
+		.domain = XE_FW_RENDER,
+		.mmio_base = XEHPC_BCS6_RING_BASE,
+	},
+	[XE_HW_ENGINE_BCS7] = {
+		.name = "bcs7",
+		.class = XE_ENGINE_CLASS_COPY,
+		.instance = 7,
+		.domain = XE_FW_RENDER,
+		.mmio_base = XEHPC_BCS7_RING_BASE,
+	},
+	[XE_HW_ENGINE_BCS8] = {
+		.name = "bcs8",
+		.class = XE_ENGINE_CLASS_COPY,
+		.instance = 8,
+		.domain = XE_FW_RENDER,
+		.mmio_base = XEHPC_BCS8_RING_BASE,
+	},
+
+	[XE_HW_ENGINE_VCS0] = {
+		.name = "vcs0",
+		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
+		.instance = 0,
+		.domain = XE_FW_MEDIA_VDBOX0,
+		.mmio_base = BSD_RING_BASE,
+	},
+	[XE_HW_ENGINE_VCS1] = {
+		.name = "vcs1",
+		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
+		.instance = 1,
+		.domain = XE_FW_MEDIA_VDBOX1,
+		.mmio_base = BSD2_RING_BASE,
+	},
+	[XE_HW_ENGINE_VCS2] = {
+		.name = "vcs2",
+		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
+		.instance = 2,
+		.domain = XE_FW_MEDIA_VDBOX2,
+		.mmio_base = BSD3_RING_BASE,
+	},
+	[XE_HW_ENGINE_VCS3] = {
+		.name = "vcs3",
+		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
+		.instance = 3,
+		.domain = XE_FW_MEDIA_VDBOX3,
+		.mmio_base = BSD4_RING_BASE,
+	},
+	[XE_HW_ENGINE_VCS4] = {
+		.name = "vcs4",
+		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
+		.instance = 4,
+		.domain = XE_FW_MEDIA_VDBOX4,
+		.mmio_base = XEHP_BSD5_RING_BASE,
+	},
+	[XE_HW_ENGINE_VCS5] = {
+		.name = "vcs5",
+		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
+		.instance = 5,
+		.domain = XE_FW_MEDIA_VDBOX5,
+		.mmio_base = XEHP_BSD6_RING_BASE,
+	},
+	[XE_HW_ENGINE_VCS6] = {
+		.name = "vcs6",
+		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
+		.instance = 6,
+		.domain = XE_FW_MEDIA_VDBOX6,
+		.mmio_base = XEHP_BSD7_RING_BASE,
+	},
+	[XE_HW_ENGINE_VCS7] = {
+		.name = "vcs7",
+		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
+		.instance = 7,
+		.domain = XE_FW_MEDIA_VDBOX7,
+		.mmio_base = XEHP_BSD8_RING_BASE,
+	},
+	[XE_HW_ENGINE_VECS0] = {
+		.name = "vecs0",
+		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
+		.instance = 0,
+		.domain = XE_FW_MEDIA_VEBOX0,
+		.mmio_base = VEBOX_RING_BASE,
+	},
+	[XE_HW_ENGINE_VECS1] = {
+		.name = "vecs1",
+		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
+		.instance = 1,
+		.domain = XE_FW_MEDIA_VEBOX1,
+		.mmio_base = VEBOX2_RING_BASE,
+	},
+	[XE_HW_ENGINE_VECS2] = {
+		.name = "vecs2",
+		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
+		.instance = 2,
+		.domain = XE_FW_MEDIA_VEBOX2,
+		.mmio_base = XEHP_VEBOX3_RING_BASE,
+	},
+	[XE_HW_ENGINE_VECS3] = {
+		.name = "vecs3",
+		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
+		.instance = 3,
+		.domain = XE_FW_MEDIA_VEBOX3,
+		.mmio_base = XEHP_VEBOX4_RING_BASE,
+	},
+	[XE_HW_ENGINE_CCS0] = {
+		.name = "ccs0",
+		.class = XE_ENGINE_CLASS_COMPUTE,
+		.instance = 0,
+		.domain = XE_FW_RENDER,
+		.mmio_base = COMPUTE0_RING_BASE,
+	},
+	[XE_HW_ENGINE_CCS1] = {
+		.name = "ccs1",
+		.class = XE_ENGINE_CLASS_COMPUTE,
+		.instance = 1,
+		.domain = XE_FW_RENDER,
+		.mmio_base = COMPUTE1_RING_BASE,
+	},
+	[XE_HW_ENGINE_CCS2] = {
+		.name = "ccs2",
+		.class = XE_ENGINE_CLASS_COMPUTE,
+		.instance = 2,
+		.domain = XE_FW_RENDER,
+		.mmio_base = COMPUTE2_RING_BASE,
+	},
+	[XE_HW_ENGINE_CCS3] = {
+		.name = "ccs3",
+		.class = XE_ENGINE_CLASS_COMPUTE,
+		.instance = 3,
+		.domain = XE_FW_RENDER,
+		.mmio_base = COMPUTE3_RING_BASE,
+	},
+	[XE_HW_ENGINE_GSCCS0] = {
+		.name = "gsccs0",
+		.class = XE_ENGINE_CLASS_OTHER,
+		.instance = OTHER_GSC_INSTANCE,
+		.domain = XE_FW_GSC,
+		.mmio_base = GSCCS_RING_BASE,
+	},
+};
+
+static void hw_engine_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_hw_engine *hwe = arg;
+
+	if (hwe->exl_port)
+		xe_execlist_port_destroy(hwe->exl_port);
+	xe_lrc_finish(&hwe->kernel_lrc);
+
+	hwe->gt = NULL;
+}
+
+static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg,
+				   u32 val)
+{
+	xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base));
+	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
+
+	reg.addr += hwe->mmio_base;
+
+	xe_mmio_write32(hwe->gt, reg, val);
+}
+
+static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg)
+{
+	xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base));
+	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
+
+	reg.addr += hwe->mmio_base;
+
+	return xe_mmio_read32(hwe->gt, reg);
+}
+
+void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
+{
+	u32 ccs_mask =
+		xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE);
+
+	if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask)
+		xe_mmio_write32(hwe->gt, RCU_MODE,
+				_MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
+
+	hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0);
+	hw_engine_mmio_write32(hwe, RING_HWS_PGA(0),
+			       xe_bo_ggtt_addr(hwe->hwsp));
+	hw_engine_mmio_write32(hwe, RING_MODE(0),
+			       _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
+	hw_engine_mmio_write32(hwe, RING_MI_MODE(0),
+			       _MASKED_BIT_DISABLE(STOP_RING));
+	hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
+}
+
+static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt *gt,
+						 const struct xe_hw_engine *hwe)
+{
+	return xe_gt_ccs_mode_enabled(gt) &&
+	       xe_rtp_match_first_render_or_compute(gt, hwe);
+}
+
+void
+xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
+{
+	struct xe_gt *gt = hwe->gt;
+	const u8 mocs_write_idx = gt->mocs.uc_index;
+	const u8 mocs_read_idx = gt->mocs.uc_index;
+	u32 blit_cctl_val = REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, mocs_write_idx) |
+			    REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, mocs_read_idx);
+	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
+	const struct xe_rtp_entry_sr lrc_was[] = {
+		/*
+		 * Some blitter commands do not have a field for MOCS, those
+		 * commands will use MOCS index pointed by BLIT_CCTL.
+		 * BLIT_CCTL registers are needed to be programmed to un-cached.
+		 */
+		{ XE_RTP_NAME("BLIT_CCTL_default_MOCS"),
+		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED),
+			       ENGINE_CLASS(COPY)),
+		  XE_RTP_ACTIONS(FIELD_SET(BLIT_CCTL(0),
+				 BLIT_CCTL_DST_MOCS_MASK |
+				 BLIT_CCTL_SRC_MOCS_MASK,
+				 blit_cctl_val,
+				 XE_RTP_ACTION_FLAG(ENGINE_BASE)))
+		},
+		/* Use Fixed slice CCS mode */
+		{ XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"),
+		  XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)),
+		  XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE,
+					   RCU_MODE_FIXED_SLICE_CCS_MODE))
+		},
+		{}
+	};
+
+	xe_rtp_process_to_sr(&ctx, lrc_was, &hwe->reg_lrc);
+}
+
+static void
+hw_engine_setup_default_state(struct xe_hw_engine *hwe)
+{
+	struct xe_gt *gt = hwe->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	/*
+	 * RING_CMD_CCTL specifies the default MOCS entry that will be
+	 * used by the command streamer when executing commands that
+	 * don't have a way to explicitly specify a MOCS setting.
+	 * The default should usually reference whichever MOCS entry
+	 * corresponds to uncached behavior, although use of a WB cached
+	 * entry is recommended by the spec in certain circumstances on
+	 * specific platforms.
+	 * Bspec: 72161
+	 */
+	const u8 mocs_write_idx = gt->mocs.uc_index;
+	const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE &&
+				 (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) ?
+				 gt->mocs.wb_index : gt->mocs.uc_index;
+	u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) |
+				REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx);
+	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
+	const struct xe_rtp_entry_sr engine_entries[] = {
+		{ XE_RTP_NAME("RING_CMD_CCTL_default_MOCS"),
+		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED)),
+		  XE_RTP_ACTIONS(FIELD_SET(RING_CMD_CCTL(0),
+					   CMD_CCTL_WRITE_OVERRIDE_MASK |
+					   CMD_CCTL_READ_OVERRIDE_MASK,
+					   ring_cmd_cctl_val,
+					   XE_RTP_ACTION_FLAG(ENGINE_BASE)))
+		},
+		/*
+		 * To allow the GSC engine to go idle on MTL we need to enable
+		 * idle messaging and set the hysteresis value (we use 0xA=5us
+		 * as recommended in spec). On platforms after MTL this is
+		 * enabled by default.
+		 */
+		{ XE_RTP_NAME("MTL GSCCS IDLE MSG enable"),
+		  XE_RTP_RULES(MEDIA_VERSION(1300), ENGINE_CLASS(OTHER)),
+		  XE_RTP_ACTIONS(CLR(RING_PSMI_CTL(0),
+				     IDLE_MSG_DISABLE,
+				     XE_RTP_ACTION_FLAG(ENGINE_BASE)),
+				 FIELD_SET(RING_PWRCTX_MAXCNT(0),
+					   IDLE_WAIT_TIME,
+					   0xA,
+					   XE_RTP_ACTION_FLAG(ENGINE_BASE)))
+		},
+		{}
+	};
+
+	xe_rtp_process_to_sr(&ctx, engine_entries, &hwe->reg_sr);
+}
+
+static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
+				 enum xe_hw_engine_id id)
+{
+	const struct engine_info *info;
+
+	if (WARN_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name))
+		return;
+
+	if (!(gt->info.engine_mask & BIT(id)))
+		return;
+
+	info = &engine_infos[id];
+
+	xe_gt_assert(gt, !hwe->gt);
+
+	hwe->gt = gt;
+	hwe->class = info->class;
+	hwe->instance = info->instance;
+	hwe->mmio_base = info->mmio_base;
+	hwe->domain = info->domain;
+	hwe->name = info->name;
+	hwe->fence_irq = &gt->fence_irq[info->class];
+	hwe->engine_id = id;
+
+	hwe->eclass = &gt->eclass[hwe->class];
+	if (!hwe->eclass->sched_props.job_timeout_ms) {
+		hwe->eclass->sched_props.job_timeout_ms = 5 * 1000;
+		hwe->eclass->sched_props.job_timeout_min = XE_HW_ENGINE_JOB_TIMEOUT_MIN;
+		hwe->eclass->sched_props.job_timeout_max = XE_HW_ENGINE_JOB_TIMEOUT_MAX;
+		hwe->eclass->sched_props.timeslice_us = 1 * 1000;
+		hwe->eclass->sched_props.timeslice_min = XE_HW_ENGINE_TIMESLICE_MIN;
+		hwe->eclass->sched_props.timeslice_max = XE_HW_ENGINE_TIMESLICE_MAX;
+		hwe->eclass->sched_props.preempt_timeout_us = XE_HW_ENGINE_PREEMPT_TIMEOUT;
+		hwe->eclass->sched_props.preempt_timeout_min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN;
+		hwe->eclass->sched_props.preempt_timeout_max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX;
+		/* Record default props */
+		hwe->eclass->defaults = hwe->eclass->sched_props;
+	}
+
+	xe_reg_sr_init(&hwe->reg_sr, hwe->name, gt_to_xe(gt));
+	xe_tuning_process_engine(hwe);
+	xe_wa_process_engine(hwe);
+	hw_engine_setup_default_state(hwe);
+
+	xe_reg_sr_init(&hwe->reg_whitelist, hwe->name, gt_to_xe(gt));
+	xe_reg_whitelist_process_engine(hwe);
+}
+
+static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
+			  enum xe_hw_engine_id id)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_tile *tile = gt_to_tile(gt);
+	int err;
+
+	xe_gt_assert(gt, id < ARRAY_SIZE(engine_infos) && engine_infos[id].name);
+	xe_gt_assert(gt, gt->info.engine_mask & BIT(id));
+
+	xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
+	xe_reg_sr_apply_whitelist(hwe);
+
+	hwe->hwsp = xe_managed_bo_create_pin_map(xe, tile, SZ_4K,
+						 XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+						 XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(hwe->hwsp)) {
+		err = PTR_ERR(hwe->hwsp);
+		goto err_name;
+	}
+
+	err = xe_lrc_init(&hwe->kernel_lrc, hwe, NULL, NULL, SZ_16K);
+	if (err)
+		goto err_hwsp;
+
+	if (!xe_device_uc_enabled(xe)) {
+		hwe->exl_port = xe_execlist_port_create(xe, hwe);
+		if (IS_ERR(hwe->exl_port)) {
+			err = PTR_ERR(hwe->exl_port);
+			goto err_kernel_lrc;
+		}
+	}
+
+	if (xe_device_uc_enabled(xe))
+		xe_hw_engine_enable_ring(hwe);
+
+	/* We reserve the highest BCS instance for USM */
+	if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY)
+		gt->usm.reserved_bcs_instance = hwe->instance;
+
+	err = drmm_add_action_or_reset(&xe->drm, hw_engine_fini, hwe);
+	if (err)
+		return err;
+
+	return 0;
+
+err_kernel_lrc:
+	xe_lrc_finish(&hwe->kernel_lrc);
+err_hwsp:
+	xe_bo_unpin_map_no_vm(hwe->hwsp);
+err_name:
+	hwe->name = NULL;
+
+	return err;
+}
+
+static void hw_engine_setup_logical_mapping(struct xe_gt *gt)
+{
+	int class;
+
+	/* FIXME: Doing a simple logical mapping that works for most hardware */
+	for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
+		struct xe_hw_engine *hwe;
+		enum xe_hw_engine_id id;
+		int logical_instance = 0;
+
+		for_each_hw_engine(hwe, gt, id)
+			if (hwe->class == class)
+				hwe->logical_instance = logical_instance++;
+	}
+}
+
+static void read_media_fuses(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 media_fuse;
+	u16 vdbox_mask;
+	u16 vebox_mask;
+	int i, j;
+
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+
+	media_fuse = xe_mmio_read32(gt, GT_VEBOX_VDBOX_DISABLE);
+
+	/*
+	 * Pre-Xe_HP platforms had register bits representing absent engines,
+	 * whereas Xe_HP and beyond have bits representing present engines.
+	 * Invert the polarity on old platforms so that we can use common
+	 * handling below.
+	 */
+	if (GRAPHICS_VERx100(xe) < 1250)
+		media_fuse = ~media_fuse;
+
+	vdbox_mask = REG_FIELD_GET(GT_VDBOX_DISABLE_MASK, media_fuse);
+	vebox_mask = REG_FIELD_GET(GT_VEBOX_DISABLE_MASK, media_fuse);
+
+	for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
+		if (!(gt->info.engine_mask & BIT(i)))
+			continue;
+
+		if (!(BIT(j) & vdbox_mask)) {
+			gt->info.engine_mask &= ~BIT(i);
+			drm_info(&xe->drm, "vcs%u fused off\n", j);
+		}
+	}
+
+	for (i = XE_HW_ENGINE_VECS0, j = 0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) {
+		if (!(gt->info.engine_mask & BIT(i)))
+			continue;
+
+		if (!(BIT(j) & vebox_mask)) {
+			gt->info.engine_mask &= ~BIT(i);
+			drm_info(&xe->drm, "vecs%u fused off\n", j);
+		}
+	}
+}
+
+static void read_copy_fuses(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 bcs_mask;
+
+	if (GRAPHICS_VERx100(xe) < 1260 || GRAPHICS_VERx100(xe) >= 1270)
+		return;
+
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+
+	bcs_mask = xe_mmio_read32(gt, MIRROR_FUSE3);
+	bcs_mask = REG_FIELD_GET(MEML3_EN_MASK, bcs_mask);
+
+	/* BCS0 is always present; only BCS1-BCS8 may be fused off */
+	for (int i = XE_HW_ENGINE_BCS1, j = 0; i <= XE_HW_ENGINE_BCS8; ++i, ++j) {
+		if (!(gt->info.engine_mask & BIT(i)))
+			continue;
+
+		if (!(BIT(j / 2) & bcs_mask)) {
+			gt->info.engine_mask &= ~BIT(i);
+			drm_info(&xe->drm, "bcs%u fused off\n", j);
+		}
+	}
+}
+
+static void read_compute_fuses_from_dss(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	/*
+	 * CCS fusing based on DSS masks only applies to platforms that can
+	 * have more than one CCS.
+	 */
+	if (hweight64(gt->info.engine_mask &
+		      GENMASK_ULL(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0)) <= 1)
+		return;
+
+	/*
+	 * CCS availability on Xe_HP is inferred from the presence of DSS in
+	 * each quadrant.
+	 */
+	for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) {
+		if (!(gt->info.engine_mask & BIT(i)))
+			continue;
+
+		if (!xe_gt_topology_has_dss_in_quadrant(gt, j)) {
+			gt->info.engine_mask &= ~BIT(i);
+			drm_info(&xe->drm, "ccs%u fused off\n", j);
+		}
+	}
+}
+
+static void read_compute_fuses_from_reg(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 ccs_mask;
+
+	ccs_mask = xe_mmio_read32(gt, XEHP_FUSE4);
+	ccs_mask = REG_FIELD_GET(CCS_EN_MASK, ccs_mask);
+
+	for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) {
+		if (!(gt->info.engine_mask & BIT(i)))
+			continue;
+
+		if ((ccs_mask & BIT(j)) == 0) {
+			gt->info.engine_mask &= ~BIT(i);
+			drm_info(&xe->drm, "ccs%u fused off\n", j);
+		}
+	}
+}
+
+static void read_compute_fuses(struct xe_gt *gt)
+{
+	if (GRAPHICS_VER(gt_to_xe(gt)) >= 20)
+		read_compute_fuses_from_reg(gt);
+	else
+		read_compute_fuses_from_dss(gt);
+}
+
+static void check_gsc_availability(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	if (!(gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0)))
+		return;
+
+	/*
+	 * The GSCCS is only used to communicate with the GSC FW, so if we don't
+	 * have the FW there is nothing we need the engine for and can therefore
+	 * skip its initialization.
+	 */
+	if (!xe_uc_fw_is_available(&gt->uc.gsc.fw)) {
+		gt->info.engine_mask &= ~BIT(XE_HW_ENGINE_GSCCS0);
+		drm_info(&xe->drm, "gsccs disabled due to lack of FW\n");
+	}
+}
+
+int xe_hw_engines_init_early(struct xe_gt *gt)
+{
+	int i;
+
+	read_media_fuses(gt);
+	read_copy_fuses(gt);
+	read_compute_fuses(gt);
+	check_gsc_availability(gt);
+
+	BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT < XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN);
+	BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT > XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX);
+
+	for (i = 0; i < ARRAY_SIZE(gt->hw_engines); i++)
+		hw_engine_init_early(gt, &gt->hw_engines[i], i);
+
+	return 0;
+}
+
+int xe_hw_engines_init(struct xe_gt *gt)
+{
+	int err;
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+
+	for_each_hw_engine(hwe, gt, id) {
+		err = hw_engine_init(gt, hwe, id);
+		if (err)
+			return err;
+	}
+
+	hw_engine_setup_logical_mapping(gt);
+
+	return 0;
+}
+
+void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec)
+{
+	wake_up_all(&gt_to_xe(hwe->gt)->ufence_wq);
+
+	if (hwe->irq_handler)
+		hwe->irq_handler(hwe, intr_vec);
+
+	if (intr_vec & GT_RENDER_USER_INTERRUPT)
+		xe_hw_fence_irq_run(hwe->fence_irq);
+}
+
+/**
+ * xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine.
+ * @hwe: Xe HW Engine.
+ *
+ * This can be printed out in a later stage like during dev_coredump
+ * analysis.
+ *
+ * Returns: a Xe HW Engine snapshot object that must be freed by the
+ * caller, using `xe_hw_engine_snapshot_free`.
+ */
+struct xe_hw_engine_snapshot *
+xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
+{
+	struct xe_hw_engine_snapshot *snapshot;
+	int len;
+
+	if (!xe_hw_engine_is_valid(hwe))
+		return NULL;
+
+	snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC);
+
+	if (!snapshot)
+		return NULL;
+
+	len = strlen(hwe->name) + 1;
+	snapshot->name = kzalloc(len, GFP_ATOMIC);
+	if (snapshot->name)
+		strscpy(snapshot->name, hwe->name, len);
+
+	snapshot->class = hwe->class;
+	snapshot->logical_instance = hwe->logical_instance;
+	snapshot->forcewake.domain = hwe->domain;
+	snapshot->forcewake.ref = xe_force_wake_ref(gt_to_fw(hwe->gt),
+						    hwe->domain);
+	snapshot->mmio_base = hwe->mmio_base;
+
+	snapshot->reg.ring_hwstam = hw_engine_mmio_read32(hwe, RING_HWSTAM(0));
+	snapshot->reg.ring_hws_pga = hw_engine_mmio_read32(hwe,
+							   RING_HWS_PGA(0));
+	snapshot->reg.ring_execlist_status_lo =
+		hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0));
+	snapshot->reg.ring_execlist_status_hi =
+		hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0));
+	snapshot->reg.ring_execlist_sq_contents_lo =
+		hw_engine_mmio_read32(hwe,
+				      RING_EXECLIST_SQ_CONTENTS_LO(0));
+	snapshot->reg.ring_execlist_sq_contents_hi =
+		hw_engine_mmio_read32(hwe,
+				      RING_EXECLIST_SQ_CONTENTS_HI(0));
+	snapshot->reg.ring_start = hw_engine_mmio_read32(hwe, RING_START(0));
+	snapshot->reg.ring_head =
+		hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR;
+	snapshot->reg.ring_tail =
+		hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR;
+	snapshot->reg.ring_ctl = hw_engine_mmio_read32(hwe, RING_CTL(0));
+	snapshot->reg.ring_mi_mode =
+		hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
+	snapshot->reg.ring_mode = hw_engine_mmio_read32(hwe, RING_MODE(0));
+	snapshot->reg.ring_imr = hw_engine_mmio_read32(hwe, RING_IMR(0));
+	snapshot->reg.ring_esr = hw_engine_mmio_read32(hwe, RING_ESR(0));
+	snapshot->reg.ring_emr = hw_engine_mmio_read32(hwe, RING_EMR(0));
+	snapshot->reg.ring_eir = hw_engine_mmio_read32(hwe, RING_EIR(0));
+	snapshot->reg.ring_acthd_udw =
+		hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0));
+	snapshot->reg.ring_acthd = hw_engine_mmio_read32(hwe, RING_ACTHD(0));
+	snapshot->reg.ring_bbaddr_udw =
+		hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0));
+	snapshot->reg.ring_bbaddr = hw_engine_mmio_read32(hwe, RING_BBADDR(0));
+	snapshot->reg.ring_dma_fadd_udw =
+		hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0));
+	snapshot->reg.ring_dma_fadd =
+		hw_engine_mmio_read32(hwe, RING_DMA_FADD(0));
+	snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, RING_IPEHR(0));
+
+	if (snapshot->class == XE_ENGINE_CLASS_COMPUTE)
+		snapshot->reg.rcu_mode = xe_mmio_read32(hwe->gt, RCU_MODE);
+
+	return snapshot;
+}
+
+/**
+ * xe_hw_engine_snapshot_print - Print out a given Xe HW Engine snapshot.
+ * @snapshot: Xe HW Engine snapshot object.
+ * @p: drm_printer where it will be printed out.
+ *
+ * This function prints out a given Xe HW Engine snapshot object.
+ */
+void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
+				 struct drm_printer *p)
+{
+	if (!snapshot)
+		return;
+
+	drm_printf(p, "%s (physical), logical instance=%d\n",
+		   snapshot->name ? snapshot->name : "",
+		   snapshot->logical_instance);
+	drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n",
+		   snapshot->forcewake.domain, snapshot->forcewake.ref);
+	drm_printf(p, "\tHWSTAM: 0x%08x\n", snapshot->reg.ring_hwstam);
+	drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n", snapshot->reg.ring_hws_pga);
+	drm_printf(p, "\tRING_EXECLIST_STATUS_LO: 0x%08x\n",
+		   snapshot->reg.ring_execlist_status_lo);
+	drm_printf(p, "\tRING_EXECLIST_STATUS_HI: 0x%08x\n",
+		   snapshot->reg.ring_execlist_status_hi);
+	drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_LO: 0x%08x\n",
+		   snapshot->reg.ring_execlist_sq_contents_lo);
+	drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_HI: 0x%08x\n",
+		   snapshot->reg.ring_execlist_sq_contents_hi);
+	drm_printf(p, "\tRING_START: 0x%08x\n", snapshot->reg.ring_start);
+	drm_printf(p, "\tRING_HEAD:  0x%08x\n", snapshot->reg.ring_head);
+	drm_printf(p, "\tRING_TAIL:  0x%08x\n", snapshot->reg.ring_tail);
+	drm_printf(p, "\tRING_CTL: 0x%08x\n", snapshot->reg.ring_ctl);
+	drm_printf(p, "\tRING_MI_MODE: 0x%08x\n", snapshot->reg.ring_mi_mode);
+	drm_printf(p, "\tRING_MODE: 0x%08x\n",
+		   snapshot->reg.ring_mode);
+	drm_printf(p, "\tRING_IMR:   0x%08x\n", snapshot->reg.ring_imr);
+	drm_printf(p, "\tRING_ESR:   0x%08x\n", snapshot->reg.ring_esr);
+	drm_printf(p, "\tRING_EMR:   0x%08x\n", snapshot->reg.ring_emr);
+	drm_printf(p, "\tRING_EIR:   0x%08x\n", snapshot->reg.ring_eir);
+	drm_printf(p, "\tACTHD:  0x%08x_%08x\n", snapshot->reg.ring_acthd_udw,
+		   snapshot->reg.ring_acthd);
+	drm_printf(p, "\tBBADDR: 0x%08x_%08x\n", snapshot->reg.ring_bbaddr_udw,
+		   snapshot->reg.ring_bbaddr);
+	drm_printf(p, "\tDMA_FADDR: 0x%08x_%08x\n",
+		   snapshot->reg.ring_dma_fadd_udw,
+		   snapshot->reg.ring_dma_fadd);
+	drm_printf(p, "\tIPEHR: 0x%08x\n\n", snapshot->reg.ipehr);
+	if (snapshot->class == XE_ENGINE_CLASS_COMPUTE)
+		drm_printf(p, "\tRCU_MODE: 0x%08x\n",
+			   snapshot->reg.rcu_mode);
+}
+
+/**
+ * xe_hw_engine_snapshot_free - Free all allocated objects for a given snapshot.
+ * @snapshot: Xe HW Engine snapshot object.
+ *
+ * This function free all the memory that needed to be allocated at capture
+ * time.
+ */
+void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot)
+{
+	if (!snapshot)
+		return;
+
+	kfree(snapshot->name);
+	kfree(snapshot);
+}
+
+/**
+ * xe_hw_engine_print - Xe HW Engine Print.
+ * @hwe: Hardware Engine.
+ * @p: drm_printer.
+ *
+ * This function quickly capture a snapshot and immediately print it out.
+ */
+void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p)
+{
+	struct xe_hw_engine_snapshot *snapshot;
+
+	snapshot = xe_hw_engine_snapshot_capture(hwe);
+	xe_hw_engine_snapshot_print(snapshot, p);
+	xe_hw_engine_snapshot_free(snapshot);
+}
+
+u32 xe_hw_engine_mask_per_class(struct xe_gt *gt,
+				enum xe_engine_class engine_class)
+{
+	u32 mask = 0;
+	enum xe_hw_engine_id id;
+
+	for (id = 0; id < XE_NUM_HW_ENGINES; ++id) {
+		if (engine_infos[id].class == engine_class &&
+		    gt->info.engine_mask & BIT(id))
+			mask |= BIT(engine_infos[id].instance);
+	}
+	return mask;
+}
+
+bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe)
+{
+	struct xe_gt *gt = hwe->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+
+	if (hwe->class == XE_ENGINE_CLASS_OTHER)
+		return true;
+
+	/* Check for engines disabled by ccs_mode setting */
+	if (xe_gt_ccs_mode_enabled(gt) &&
+	    hwe->class == XE_ENGINE_CLASS_COMPUTE &&
+	    hwe->logical_instance >= gt->ccs_mode)
+		return true;
+
+	return xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY &&
+		hwe->instance == gt->usm.reserved_bcs_instance;
+}
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h
new file mode 100644
index 000000000000..71968ee2f600
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_engine.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_HW_ENGINE_H_
+#define _XE_HW_ENGINE_H_
+
+#include "xe_hw_engine_types.h"
+
+struct drm_printer;
+
+#ifdef CONFIG_DRM_XE_JOB_TIMEOUT_MIN
+#define XE_HW_ENGINE_JOB_TIMEOUT_MIN CONFIG_DRM_XE_JOB_TIMEOUT_MIN
+#else
+#define XE_HW_ENGINE_JOB_TIMEOUT_MIN 1
+#endif
+#ifdef CONFIG_DRM_XE_JOB_TIMEOUT_MAX
+#define XE_HW_ENGINE_JOB_TIMEOUT_MAX CONFIG_DRM_XE_JOB_TIMEOUT_MAX
+#else
+#define XE_HW_ENGINE_JOB_TIMEOUT_MAX (10 * 1000)
+#endif
+#ifdef CONFIG_DRM_XE_TIMESLICE_MIN
+#define XE_HW_ENGINE_TIMESLICE_MIN CONFIG_DRM_XE_TIMESLICE_MIN
+#else
+#define XE_HW_ENGINE_TIMESLICE_MIN 1
+#endif
+#ifdef CONFIG_DRM_XE_TIMESLICE_MAX
+#define XE_HW_ENGINE_TIMESLICE_MAX CONFIG_DRM_XE_TIMESLICE_MAX
+#else
+#define XE_HW_ENGINE_TIMESLICE_MAX (10 * 1000 * 1000)
+#endif
+#ifdef CONFIG_DRM_XE_PREEMPT_TIMEOUT
+#define XE_HW_ENGINE_PREEMPT_TIMEOUT CONFIG_DRM_XE_PREEMPT_TIMEOUT
+#else
+#define XE_HW_ENGINE_PREEMPT_TIMEOUT (640 * 1000)
+#endif
+#ifdef CONFIG_DRM_XE_PREEMPT_TIMEOUT_MIN
+#define XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN CONFIG_DRM_XE_PREEMPT_TIMEOUT_MIN
+#else
+#define XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN 1
+#endif
+#ifdef CONFIG_DRM_XE_PREEMPT_TIMEOUT_MAX
+#define XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX CONFIG_DRM_XE_PREEMPT_TIMEOUT_MAX
+#else
+#define XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX (10 * 1000 * 1000)
+#endif
+
+int xe_hw_engines_init_early(struct xe_gt *gt);
+int xe_hw_engines_init(struct xe_gt *gt);
+void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec);
+void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe);
+u32 xe_hw_engine_mask_per_class(struct xe_gt *gt,
+				enum xe_engine_class engine_class);
+
+struct xe_hw_engine_snapshot *
+xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe);
+void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot);
+void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
+				 struct drm_printer *p);
+void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p);
+void xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe);
+
+bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe);
+static inline bool xe_hw_engine_is_valid(struct xe_hw_engine *hwe)
+{
+	return hwe->name;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
new file mode 100644
index 000000000000..e49bc14f0ecf
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
@@ -0,0 +1,675 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+
+#include "xe_gt.h"
+#include "xe_hw_engine_class_sysfs.h"
+
+#define MAX_ENGINE_CLASS_NAME_LEN    16
+static int xe_add_hw_engine_class_defaults(struct xe_device *xe,
+					   struct kobject *parent);
+
+/**
+ * xe_hw_engine_timeout_in_range - Helper to check if timeout is in range
+ * @timeout: timeout to validate
+ * @min: min value of valid range
+ * @max: max value of valid range
+ *
+ * This helper helps to validate if timeout is in min-max range of HW engine
+ * scheduler.
+ *
+ * Returns: Returns false value for failure and true for success.
+ */
+bool xe_hw_engine_timeout_in_range(u64 timeout, u64 min, u64 max)
+{
+	return timeout >= min && timeout <= max;
+}
+
+static void kobj_xe_hw_engine_release(struct kobject *kobj)
+{
+	kfree(kobj);
+}
+
+static const struct kobj_type kobj_xe_hw_engine_type = {
+	.release = kobj_xe_hw_engine_release,
+	.sysfs_ops = &kobj_sysfs_ops
+};
+
+static ssize_t job_timeout_max_store(struct kobject *kobj,
+				     struct kobj_attribute *attr,
+				     const char *buf, size_t count)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+	u32 timeout;
+	int err;
+
+	err = kstrtou32(buf, 0, &timeout);
+	if (err)
+		return err;
+
+	if (timeout < eclass->sched_props.job_timeout_min)
+		return -EINVAL;
+
+	if (!xe_hw_engine_timeout_in_range(timeout,
+					   XE_HW_ENGINE_JOB_TIMEOUT_MIN,
+					   XE_HW_ENGINE_JOB_TIMEOUT_MAX))
+		return -EINVAL;
+
+	WRITE_ONCE(eclass->sched_props.job_timeout_max, timeout);
+
+	return count;
+}
+
+static ssize_t job_timeout_max_show(struct kobject *kobj,
+				    struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+
+	return sprintf(buf, "%u\n", eclass->sched_props.job_timeout_max);
+}
+
+static struct kobj_attribute job_timeout_max_attr =
+__ATTR(job_timeout_max, 0644, job_timeout_max_show, job_timeout_max_store);
+
+static ssize_t job_timeout_min_store(struct kobject *kobj,
+				     struct kobj_attribute *attr,
+				     const char *buf, size_t count)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+	u32 timeout;
+	int err;
+
+	err = kstrtou32(buf, 0, &timeout);
+	if (err)
+		return err;
+
+	if (timeout > eclass->sched_props.job_timeout_max)
+		return -EINVAL;
+
+	if (!xe_hw_engine_timeout_in_range(timeout,
+					   XE_HW_ENGINE_JOB_TIMEOUT_MIN,
+					   XE_HW_ENGINE_JOB_TIMEOUT_MAX))
+		return -EINVAL;
+
+	WRITE_ONCE(eclass->sched_props.job_timeout_min, timeout);
+
+	return count;
+}
+
+static ssize_t job_timeout_min_show(struct kobject *kobj,
+				    struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+
+	return sprintf(buf, "%u\n", eclass->sched_props.job_timeout_min);
+}
+
+static struct kobj_attribute job_timeout_min_attr =
+__ATTR(job_timeout_min, 0644, job_timeout_min_show, job_timeout_min_store);
+
+static ssize_t job_timeout_store(struct kobject *kobj,
+				 struct kobj_attribute *attr,
+				 const char *buf, size_t count)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+	u32 min = eclass->sched_props.job_timeout_min;
+	u32 max = eclass->sched_props.job_timeout_max;
+	u32 timeout;
+	int err;
+
+	err = kstrtou32(buf, 0, &timeout);
+	if (err)
+		return err;
+
+	if (!xe_hw_engine_timeout_in_range(timeout, min, max))
+		return -EINVAL;
+
+	WRITE_ONCE(eclass->sched_props.job_timeout_ms, timeout);
+
+	return count;
+}
+
+static ssize_t job_timeout_show(struct kobject *kobj,
+				struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+
+	return sprintf(buf, "%u\n", eclass->sched_props.job_timeout_ms);
+}
+
+static struct kobj_attribute job_timeout_attr =
+__ATTR(job_timeout_ms, 0644, job_timeout_show, job_timeout_store);
+
+static ssize_t job_timeout_default(struct kobject *kobj,
+				   struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
+
+	return sprintf(buf, "%u\n", eclass->defaults.job_timeout_ms);
+}
+
+static struct kobj_attribute job_timeout_def =
+__ATTR(job_timeout_ms, 0444, job_timeout_default, NULL);
+
+static ssize_t job_timeout_min_default(struct kobject *kobj,
+				       struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
+
+	return sprintf(buf, "%u\n", eclass->defaults.job_timeout_min);
+}
+
+static struct kobj_attribute job_timeout_min_def =
+__ATTR(job_timeout_min, 0444, job_timeout_min_default, NULL);
+
+static ssize_t job_timeout_max_default(struct kobject *kobj,
+				       struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
+
+	return sprintf(buf, "%u\n", eclass->defaults.job_timeout_max);
+}
+
+static struct kobj_attribute job_timeout_max_def =
+__ATTR(job_timeout_max, 0444, job_timeout_max_default, NULL);
+
+static ssize_t timeslice_duration_store(struct kobject *kobj,
+					struct kobj_attribute *attr,
+					const char *buf, size_t count)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+	u32 min = eclass->sched_props.timeslice_min;
+	u32 max = eclass->sched_props.timeslice_max;
+	u32 duration;
+	int err;
+
+	err = kstrtou32(buf, 0, &duration);
+	if (err)
+		return err;
+
+	if (!xe_hw_engine_timeout_in_range(duration, min, max))
+		return -EINVAL;
+
+	WRITE_ONCE(eclass->sched_props.timeslice_us, duration);
+
+	return count;
+}
+
+static ssize_t timeslice_duration_max_store(struct kobject *kobj,
+					    struct kobj_attribute *attr,
+					    const char *buf, size_t count)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+	u32 duration;
+	int err;
+
+	err = kstrtou32(buf, 0, &duration);
+	if (err)
+		return err;
+
+	if (duration < eclass->sched_props.timeslice_min)
+		return -EINVAL;
+
+	if (!xe_hw_engine_timeout_in_range(duration,
+					   XE_HW_ENGINE_TIMESLICE_MIN,
+					   XE_HW_ENGINE_TIMESLICE_MAX))
+		return -EINVAL;
+
+	WRITE_ONCE(eclass->sched_props.timeslice_max, duration);
+
+	return count;
+}
+
+static ssize_t timeslice_duration_max_show(struct kobject *kobj,
+					   struct kobj_attribute *attr,
+					   char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+
+	return sprintf(buf, "%u\n", eclass->sched_props.timeslice_max);
+}
+
+static struct kobj_attribute timeslice_duration_max_attr =
+	__ATTR(timeslice_duration_max, 0644, timeslice_duration_max_show,
+	       timeslice_duration_max_store);
+
+static ssize_t timeslice_duration_min_store(struct kobject *kobj,
+					    struct kobj_attribute *attr,
+					    const char *buf, size_t count)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+	u32 duration;
+	int err;
+
+	err = kstrtou32(buf, 0, &duration);
+	if (err)
+		return err;
+
+	if (duration > eclass->sched_props.timeslice_max)
+		return -EINVAL;
+
+	if (!xe_hw_engine_timeout_in_range(duration,
+					   XE_HW_ENGINE_TIMESLICE_MIN,
+					   XE_HW_ENGINE_TIMESLICE_MAX))
+		return -EINVAL;
+
+	WRITE_ONCE(eclass->sched_props.timeslice_min, duration);
+
+	return count;
+}
+
+static ssize_t timeslice_duration_min_show(struct kobject *kobj,
+					   struct kobj_attribute *attr,
+					   char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+
+	return sprintf(buf, "%u\n", eclass->sched_props.timeslice_min);
+}
+
+static struct kobj_attribute timeslice_duration_min_attr =
+	__ATTR(timeslice_duration_min, 0644, timeslice_duration_min_show,
+	       timeslice_duration_min_store);
+
+static ssize_t timeslice_duration_show(struct kobject *kobj,
+				       struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+
+	return sprintf(buf, "%u\n", eclass->sched_props.timeslice_us);
+}
+
+static struct kobj_attribute timeslice_duration_attr =
+	__ATTR(timeslice_duration_us, 0644, timeslice_duration_show,
+	       timeslice_duration_store);
+
+static ssize_t timeslice_default(struct kobject *kobj,
+				 struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
+
+	return sprintf(buf, "%u\n", eclass->defaults.timeslice_us);
+}
+
+static struct kobj_attribute timeslice_duration_def =
+__ATTR(timeslice_duration_us, 0444, timeslice_default, NULL);
+
+static ssize_t timeslice_min_default(struct kobject *kobj,
+				     struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
+
+	return sprintf(buf, "%u\n", eclass->defaults.timeslice_min);
+}
+
+static struct kobj_attribute timeslice_duration_min_def =
+__ATTR(timeslice_duration_min, 0444, timeslice_min_default, NULL);
+
+static ssize_t timeslice_max_default(struct kobject *kobj,
+				     struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
+
+	return sprintf(buf, "%u\n", eclass->defaults.timeslice_max);
+}
+
+static struct kobj_attribute timeslice_duration_max_def =
+__ATTR(timeslice_duration_max, 0444, timeslice_max_default, NULL);
+
+static ssize_t preempt_timeout_store(struct kobject *kobj,
+				     struct kobj_attribute *attr,
+				     const char *buf, size_t count)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+	u32 min = eclass->sched_props.preempt_timeout_min;
+	u32 max = eclass->sched_props.preempt_timeout_max;
+	u32 timeout;
+	int err;
+
+	err = kstrtou32(buf, 0, &timeout);
+	if (err)
+		return err;
+
+	if (!xe_hw_engine_timeout_in_range(timeout, min, max))
+		return -EINVAL;
+
+	WRITE_ONCE(eclass->sched_props.preempt_timeout_us, timeout);
+
+	return count;
+}
+
+static ssize_t preempt_timeout_show(struct kobject *kobj,
+				    struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+
+	return sprintf(buf, "%u\n", eclass->sched_props.preempt_timeout_us);
+}
+
+static struct kobj_attribute preempt_timeout_attr =
+__ATTR(preempt_timeout_us, 0644, preempt_timeout_show, preempt_timeout_store);
+
+static ssize_t preempt_timeout_default(struct kobject *kobj,
+				       struct kobj_attribute *attr,
+				       char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
+
+	return sprintf(buf, "%u\n", eclass->defaults.preempt_timeout_us);
+}
+
+static struct kobj_attribute preempt_timeout_def =
+__ATTR(preempt_timeout_us, 0444, preempt_timeout_default, NULL);
+
+static ssize_t preempt_timeout_min_default(struct kobject *kobj,
+					   struct kobj_attribute *attr,
+					   char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
+
+	return sprintf(buf, "%u\n", eclass->defaults.preempt_timeout_min);
+}
+
+static struct kobj_attribute preempt_timeout_min_def =
+__ATTR(preempt_timeout_min, 0444, preempt_timeout_min_default, NULL);
+
+static ssize_t preempt_timeout_max_default(struct kobject *kobj,
+					   struct kobj_attribute *attr,
+					   char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
+
+	return sprintf(buf, "%u\n", eclass->defaults.preempt_timeout_max);
+}
+
+static struct kobj_attribute preempt_timeout_max_def =
+__ATTR(preempt_timeout_max, 0444, preempt_timeout_max_default, NULL);
+
+static ssize_t preempt_timeout_max_store(struct kobject *kobj,
+					 struct kobj_attribute *attr,
+					 const char *buf, size_t count)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+	u32 timeout;
+	int err;
+
+	err = kstrtou32(buf, 0, &timeout);
+	if (err)
+		return err;
+
+	if (timeout < eclass->sched_props.preempt_timeout_min)
+		return -EINVAL;
+
+	if (!xe_hw_engine_timeout_in_range(timeout,
+					   XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN,
+					   XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX))
+		return -EINVAL;
+
+	WRITE_ONCE(eclass->sched_props.preempt_timeout_max, timeout);
+
+	return count;
+}
+
+static ssize_t preempt_timeout_max_show(struct kobject *kobj,
+					struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+
+	return sprintf(buf, "%u\n", eclass->sched_props.preempt_timeout_max);
+}
+
+static struct kobj_attribute preempt_timeout_max_attr =
+	__ATTR(preempt_timeout_max, 0644, preempt_timeout_max_show,
+	       preempt_timeout_max_store);
+
+static ssize_t preempt_timeout_min_store(struct kobject *kobj,
+					 struct kobj_attribute *attr,
+					 const char *buf, size_t count)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+	u32 timeout;
+	int err;
+
+	err = kstrtou32(buf, 0, &timeout);
+	if (err)
+		return err;
+
+	if (timeout > eclass->sched_props.preempt_timeout_max)
+		return -EINVAL;
+
+	if (!xe_hw_engine_timeout_in_range(timeout,
+					   XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN,
+					   XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX))
+		return -EINVAL;
+
+	WRITE_ONCE(eclass->sched_props.preempt_timeout_min, timeout);
+
+	return count;
+}
+
+static ssize_t preempt_timeout_min_show(struct kobject *kobj,
+					struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+
+	return sprintf(buf, "%u\n", eclass->sched_props.preempt_timeout_min);
+}
+
+static struct kobj_attribute preempt_timeout_min_attr =
+	__ATTR(preempt_timeout_min, 0644, preempt_timeout_min_show,
+	       preempt_timeout_min_store);
+
+static const struct attribute *defaults[] = {
+	&job_timeout_def.attr,
+	&job_timeout_min_def.attr,
+	&job_timeout_max_def.attr,
+	&timeslice_duration_def.attr,
+	&timeslice_duration_min_def.attr,
+	&timeslice_duration_max_def.attr,
+	&preempt_timeout_def.attr,
+	&preempt_timeout_min_def.attr,
+	&preempt_timeout_max_def.attr,
+	NULL
+};
+
+static const struct attribute *files[] = {
+	&job_timeout_attr.attr,
+	&job_timeout_min_attr.attr,
+	&job_timeout_max_attr.attr,
+	&timeslice_duration_attr.attr,
+	&timeslice_duration_min_attr.attr,
+	&timeslice_duration_max_attr.attr,
+	&preempt_timeout_attr.attr,
+	&preempt_timeout_min_attr.attr,
+	&preempt_timeout_max_attr.attr,
+	NULL
+};
+
+static void kobj_xe_hw_engine_class_fini(struct drm_device *drm, void *arg)
+{
+	struct kobject *kobj = arg;
+
+	sysfs_remove_files(kobj, files);
+	kobject_put(kobj);
+}
+
+	static struct kobj_eclass *
+kobj_xe_hw_engine_class(struct xe_device *xe, struct kobject *parent, char *name)
+{
+	struct kobj_eclass *keclass;
+	int err = 0;
+
+	keclass = kzalloc(sizeof(*keclass), GFP_KERNEL);
+	if (!keclass)
+		return NULL;
+
+	kobject_init(&keclass->base, &kobj_xe_hw_engine_type);
+	if (kobject_add(&keclass->base, parent, "%s", name)) {
+		kobject_put(&keclass->base);
+		return NULL;
+	}
+
+	err = drmm_add_action_or_reset(&xe->drm, kobj_xe_hw_engine_class_fini,
+				       &keclass->base);
+	if (err)
+		drm_warn(&xe->drm,
+			 "%s: drmm_add_action_or_reset failed, err: %d\n",
+			 __func__, err);
+	return keclass;
+}
+
+static void hw_engine_class_defaults_fini(struct drm_device *drm, void *arg)
+{
+	struct kobject *kobj = arg;
+
+	sysfs_remove_files(kobj, defaults);
+	kobject_put(kobj);
+}
+
+static int xe_add_hw_engine_class_defaults(struct xe_device *xe,
+					   struct kobject *parent)
+{
+	struct kobject *kobj;
+	int err = 0;
+
+	kobj = kzalloc(sizeof(*kobj), GFP_KERNEL);
+	if (!kobj)
+		return -ENOMEM;
+
+	kobject_init(kobj, &kobj_xe_hw_engine_type);
+	err = kobject_add(kobj, parent, "%s", ".defaults");
+	if (err)
+		goto err_object;
+
+	err = sysfs_create_files(kobj, defaults);
+	if (err)
+		goto err_object;
+
+	err = drmm_add_action_or_reset(&xe->drm, hw_engine_class_defaults_fini,
+				       kobj);
+	if (err)
+		drm_warn(&xe->drm,
+			 "%s: drmm_add_action_or_reset failed, err: %d\n",
+			 __func__, err);
+	return err;
+err_object:
+	kobject_put(kobj);
+	return err;
+}
+
+static void xe_hw_engine_sysfs_kobj_release(struct kobject *kobj)
+{
+	kfree(kobj);
+}
+
+static const struct kobj_type xe_hw_engine_sysfs_kobj_type = {
+	.release = xe_hw_engine_sysfs_kobj_release,
+	.sysfs_ops = &kobj_sysfs_ops,
+};
+
+static void hw_engine_class_sysfs_fini(struct drm_device *drm, void *arg)
+{
+	struct kobject *kobj = arg;
+
+	kobject_put(kobj);
+}
+
+/**
+ * xe_hw_engine_class_sysfs_init - Init HW engine classes on GT.
+ * @gt: Xe GT.
+ *
+ * This routine creates sysfs for HW engine classes and adds methods
+ * to get/set different scheduling properties for HW engines class.
+ *
+ * Returns: Returns error value for failure and 0 for success.
+ */
+int xe_hw_engine_class_sysfs_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	struct kobject *kobj;
+	u16 class_mask = 0;
+	int err = 0;
+
+	kobj = kzalloc(sizeof(*kobj), GFP_KERNEL);
+	if (!kobj)
+		return -ENOMEM;
+
+	kobject_init(kobj, &xe_hw_engine_sysfs_kobj_type);
+
+	err = kobject_add(kobj, gt->sysfs, "engines");
+	if (err)
+		goto err_object;
+
+	for_each_hw_engine(hwe, gt, id) {
+		char name[MAX_ENGINE_CLASS_NAME_LEN];
+		struct kobj_eclass *keclass;
+
+		if (hwe->class == XE_ENGINE_CLASS_OTHER ||
+		    hwe->class == XE_ENGINE_CLASS_MAX)
+			continue;
+
+		if ((class_mask >> hwe->class) & 1)
+			continue;
+
+		class_mask |= 1 << hwe->class;
+
+		switch (hwe->class) {
+		case XE_ENGINE_CLASS_RENDER:
+			strcpy(name, "rcs");
+			break;
+		case XE_ENGINE_CLASS_VIDEO_DECODE:
+			strcpy(name, "vcs");
+			break;
+		case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+			strcpy(name, "vecs");
+			break;
+		case XE_ENGINE_CLASS_COPY:
+			strcpy(name, "bcs");
+			break;
+		case XE_ENGINE_CLASS_COMPUTE:
+			strcpy(name, "ccs");
+			break;
+		default:
+			err = -EINVAL;
+			goto err_object;
+		}
+
+		keclass = kobj_xe_hw_engine_class(xe, kobj, name);
+		if (!keclass) {
+			err = -EINVAL;
+			goto err_object;
+		}
+
+		keclass->eclass = hwe->eclass;
+		err = xe_add_hw_engine_class_defaults(xe, &keclass->base);
+		if (err) {
+			drm_warn(&xe->drm,
+				 "Add .defaults to engines failed!, err: %d\n",
+				 err);
+			goto err_object;
+		}
+
+		err = sysfs_create_files(&keclass->base, files);
+		if (err)
+			goto err_object;
+	}
+
+	err = drmm_add_action_or_reset(&xe->drm, hw_engine_class_sysfs_fini,
+				       kobj);
+	if (err)
+		drm_warn(&xe->drm,
+			 "%s: drmm_add_action_or_reset failed, err: %d\n",
+			 __func__, err);
+
+	return err;
+err_object:
+	kobject_put(kobj);
+	return err;
+}
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h
new file mode 100644
index 000000000000..ec5ba673b314
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_ENGINE_CLASS_SYSFS_H_
+#define _XE_ENGINE_CLASS_SYSFS_H_
+
+#include <linux/kobject.h>
+
+struct xe_gt;
+struct xe_hw_engine_class_intf;
+
+int xe_hw_engine_class_sysfs_init(struct xe_gt *gt);
+bool xe_hw_engine_timeout_in_range(u64 timeout, u64 min, u64 max);
+
+/**
+ * struct kobj_eclass - A eclass's kobject struct that connects the kobject and the
+ * eclass.
+ *
+ * When dealing with multiple eclass, this struct helps to understand which eclass
+ * needs to be addressed on a given sysfs call.
+ */
+struct kobj_eclass {
+	/** @base: The actual kobject */
+	struct kobject base;
+	/** @eclass: A pointer to the hw engine class interface */
+	struct xe_hw_engine_class_intf *eclass;
+};
+
+static inline struct xe_hw_engine_class_intf *kobj_to_eclass(struct kobject *kobj)
+{
+	return container_of(kobj, struct kobj_eclass, base)->eclass;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h
new file mode 100644
index 000000000000..39908dec042a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h
@@ -0,0 +1,225 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_HW_ENGINE_TYPES_H_
+#define _XE_HW_ENGINE_TYPES_H_
+
+#include "xe_force_wake_types.h"
+#include "xe_lrc_types.h"
+#include "xe_reg_sr_types.h"
+
+/* See "Engine ID Definition" struct in the Icelake PRM */
+enum xe_engine_class {
+	XE_ENGINE_CLASS_RENDER = 0,
+	XE_ENGINE_CLASS_VIDEO_DECODE = 1,
+	XE_ENGINE_CLASS_VIDEO_ENHANCE = 2,
+	XE_ENGINE_CLASS_COPY = 3,
+	XE_ENGINE_CLASS_OTHER = 4,
+	XE_ENGINE_CLASS_COMPUTE = 5,
+	XE_ENGINE_CLASS_MAX = 6,
+};
+
+enum xe_hw_engine_id {
+	XE_HW_ENGINE_RCS0,
+#define XE_HW_ENGINE_RCS_MASK	GENMASK_ULL(XE_HW_ENGINE_RCS0, XE_HW_ENGINE_RCS0)
+	XE_HW_ENGINE_BCS0,
+	XE_HW_ENGINE_BCS1,
+	XE_HW_ENGINE_BCS2,
+	XE_HW_ENGINE_BCS3,
+	XE_HW_ENGINE_BCS4,
+	XE_HW_ENGINE_BCS5,
+	XE_HW_ENGINE_BCS6,
+	XE_HW_ENGINE_BCS7,
+	XE_HW_ENGINE_BCS8,
+#define XE_HW_ENGINE_BCS_MASK	GENMASK_ULL(XE_HW_ENGINE_BCS8, XE_HW_ENGINE_BCS0)
+	XE_HW_ENGINE_VCS0,
+	XE_HW_ENGINE_VCS1,
+	XE_HW_ENGINE_VCS2,
+	XE_HW_ENGINE_VCS3,
+	XE_HW_ENGINE_VCS4,
+	XE_HW_ENGINE_VCS5,
+	XE_HW_ENGINE_VCS6,
+	XE_HW_ENGINE_VCS7,
+#define XE_HW_ENGINE_VCS_MASK	GENMASK_ULL(XE_HW_ENGINE_VCS7, XE_HW_ENGINE_VCS0)
+	XE_HW_ENGINE_VECS0,
+	XE_HW_ENGINE_VECS1,
+	XE_HW_ENGINE_VECS2,
+	XE_HW_ENGINE_VECS3,
+#define XE_HW_ENGINE_VECS_MASK	GENMASK_ULL(XE_HW_ENGINE_VECS3, XE_HW_ENGINE_VECS0)
+	XE_HW_ENGINE_CCS0,
+	XE_HW_ENGINE_CCS1,
+	XE_HW_ENGINE_CCS2,
+	XE_HW_ENGINE_CCS3,
+#define XE_HW_ENGINE_CCS_MASK	GENMASK_ULL(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0)
+	XE_HW_ENGINE_GSCCS0,
+#define XE_HW_ENGINE_GSCCS_MASK	GENMASK_ULL(XE_HW_ENGINE_GSCCS0, XE_HW_ENGINE_GSCCS0)
+	XE_NUM_HW_ENGINES,
+};
+
+/* FIXME: s/XE_HW_ENGINE_MAX_INSTANCE/XE_HW_ENGINE_MAX_COUNT */
+#define XE_HW_ENGINE_MAX_INSTANCE	9
+
+struct xe_bo;
+struct xe_execlist_port;
+struct xe_gt;
+
+/**
+ * struct xe_hw_engine_class_intf - per hw engine class struct interface
+ *
+ * Contains all the hw engine properties per engine class.
+ *
+ * @sched_props: scheduling properties
+ * @defaults: default scheduling properties
+ */
+struct xe_hw_engine_class_intf {
+	/**
+	 * @sched_props: scheduling properties
+	 * @defaults: default scheduling properties
+	 */
+	struct {
+		/** @set_job_timeout: Set job timeout in ms for engine */
+		u32 job_timeout_ms;
+		/** @job_timeout_min: Min job timeout in ms for engine */
+		u32 job_timeout_min;
+		/** @job_timeout_max: Max job timeout in ms for engine */
+		u32 job_timeout_max;
+		/** @timeslice_us: timeslice period in micro-seconds */
+		u32 timeslice_us;
+		/** @timeslice_min: min timeslice period in micro-seconds */
+		u32 timeslice_min;
+		/** @timeslice_max: max timeslice period in micro-seconds */
+		u32 timeslice_max;
+		/** @preempt_timeout_us: preemption timeout in micro-seconds */
+		u32 preempt_timeout_us;
+		/** @preempt_timeout_min: min preemption timeout in micro-seconds */
+		u32 preempt_timeout_min;
+		/** @preempt_timeout_max: max preemption timeout in micro-seconds */
+		u32 preempt_timeout_max;
+	} sched_props, defaults;
+};
+
+/**
+ * struct xe_hw_engine - Hardware engine
+ *
+ * Contains all the hardware engine state for physical instances.
+ */
+struct xe_hw_engine {
+	/** @gt: graphics tile this hw engine belongs to */
+	struct xe_gt *gt;
+	/** @name: name of this hw engine */
+	const char *name;
+	/** @class: class of this hw engine */
+	enum xe_engine_class class;
+	/** @instance: physical instance of this hw engine */
+	u16 instance;
+	/** @logical_instance: logical instance of this hw engine */
+	u16 logical_instance;
+	/** @mmio_base: MMIO base address of this hw engine*/
+	u32 mmio_base;
+	/**
+	 * @reg_sr: table with registers to be restored on GT init/resume/reset
+	 */
+	struct xe_reg_sr reg_sr;
+	/**
+	 * @reg_whitelist: table with registers to be whitelisted
+	 */
+	struct xe_reg_sr reg_whitelist;
+	/**
+	 * @reg_lrc: LRC workaround registers
+	 */
+	struct xe_reg_sr reg_lrc;
+	/** @domain: force wake domain of this hw engine */
+	enum xe_force_wake_domains domain;
+	/** @hwsp: hardware status page buffer object */
+	struct xe_bo *hwsp;
+	/** @kernel_lrc: Kernel LRC (should be replaced /w an xe_engine) */
+	struct xe_lrc kernel_lrc;
+	/** @exl_port: execlists port */
+	struct xe_execlist_port *exl_port;
+	/** @fence_irq: fence IRQ to run when a hw engine IRQ is received */
+	struct xe_hw_fence_irq *fence_irq;
+	/** @irq_handler: IRQ handler to run when hw engine IRQ is received */
+	void (*irq_handler)(struct xe_hw_engine *hwe, u16 intr_vec);
+	/** @engine_id: id  for this hw engine */
+	enum xe_hw_engine_id engine_id;
+	/** @eclass: pointer to per hw engine class interface */
+	struct xe_hw_engine_class_intf *eclass;
+};
+
+/**
+ * struct xe_hw_engine_snapshot - Hardware engine snapshot
+ *
+ * Contains the snapshot of useful hardware engine info and registers.
+ */
+struct xe_hw_engine_snapshot {
+	/** @name: name of the hw engine */
+	char *name;
+	/** @class: class of this hw engine */
+	enum xe_engine_class class;
+	/** @logical_instance: logical instance of this hw engine */
+	u16 logical_instance;
+	/** @forcewake: Force Wake information snapshot */
+	struct {
+		/** @domain: force wake domain of this hw engine */
+		enum xe_force_wake_domains domain;
+		/** @ref: Forcewake ref for the above domain */
+		int ref;
+	} forcewake;
+	/** @mmio_base: MMIO base address of this hw engine*/
+	u32 mmio_base;
+	/** @reg: Useful MMIO register snapshot */
+	struct {
+		/** @ring_hwstam: RING_HWSTAM */
+		u32 ring_hwstam;
+		/** @ring_hws_pga: RING_HWS_PGA */
+		u32 ring_hws_pga;
+		/** @ring_execlist_status_lo: RING_EXECLIST_STATUS_LO */
+		u32 ring_execlist_status_lo;
+		/** @ring_execlist_status_hi: RING_EXECLIST_STATUS_HI */
+		u32 ring_execlist_status_hi;
+		/** @ring_execlist_sq_contents_lo: RING_EXECLIST_SQ_CONTENTS */
+		u32 ring_execlist_sq_contents_lo;
+		/** @ring_execlist_sq_contents_hi: RING_EXECLIST_SQ_CONTENTS + 4 */
+		u32 ring_execlist_sq_contents_hi;
+		/** @ring_start: RING_START */
+		u32 ring_start;
+		/** @ring_head: RING_HEAD */
+		u32 ring_head;
+		/** @ring_tail: RING_TAIL */
+		u32 ring_tail;
+		/** @ring_ctl: RING_CTL */
+		u32 ring_ctl;
+		/** @ring_mi_mode: RING_MI_MODE */
+		u32 ring_mi_mode;
+		/** @ring_mode: RING_MODE */
+		u32 ring_mode;
+		/** @ring_imr: RING_IMR */
+		u32 ring_imr;
+		/** @ring_esr: RING_ESR */
+		u32 ring_esr;
+		/** @ring_emr: RING_EMR */
+		u32 ring_emr;
+		/** @ring_eir: RING_EIR */
+		u32 ring_eir;
+		/** @ring_acthd_udw: RING_ACTHD_UDW */
+		u32 ring_acthd_udw;
+		/** @ring_acthd: RING_ACTHD */
+		u32 ring_acthd;
+		/** @ring_bbaddr_udw: RING_BBADDR_UDW */
+		u32 ring_bbaddr_udw;
+		/** @ring_bbaddr: RING_BBADDR */
+		u32 ring_bbaddr;
+		/** @ring_dma_fadd_udw: RING_DMA_FADD_UDW */
+		u32 ring_dma_fadd_udw;
+		/** @ring_dma_fadd: RING_DMA_FADD */
+		u32 ring_dma_fadd;
+		/** @ipehr: IPEHR */
+		u32 ipehr;
+		/** @rcu_mode: RCU_MODE */
+		u32 rcu_mode;
+	} reg;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c
new file mode 100644
index 000000000000..a5de3e7b0bd6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_fence.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_hw_fence.h"
+
+#include <linux/device.h>
+#include <linux/slab.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_hw_engine.h"
+#include "xe_macros.h"
+#include "xe_map.h"
+#include "xe_trace.h"
+
+static struct kmem_cache *xe_hw_fence_slab;
+
+int __init xe_hw_fence_module_init(void)
+{
+	xe_hw_fence_slab = kmem_cache_create("xe_hw_fence",
+					     sizeof(struct xe_hw_fence), 0,
+					     SLAB_HWCACHE_ALIGN, NULL);
+	if (!xe_hw_fence_slab)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void xe_hw_fence_module_exit(void)
+{
+	rcu_barrier();
+	kmem_cache_destroy(xe_hw_fence_slab);
+}
+
+static struct xe_hw_fence *fence_alloc(void)
+{
+	return kmem_cache_zalloc(xe_hw_fence_slab, GFP_KERNEL);
+}
+
+static void fence_free(struct rcu_head *rcu)
+{
+	struct xe_hw_fence *fence =
+		container_of(rcu, struct xe_hw_fence, dma.rcu);
+
+	if (!WARN_ON_ONCE(!fence))
+		kmem_cache_free(xe_hw_fence_slab, fence);
+}
+
+static void hw_fence_irq_run_cb(struct irq_work *work)
+{
+	struct xe_hw_fence_irq *irq = container_of(work, typeof(*irq), work);
+	struct xe_hw_fence *fence, *next;
+	bool tmp;
+
+	tmp = dma_fence_begin_signalling();
+	spin_lock(&irq->lock);
+	if (irq->enabled) {
+		list_for_each_entry_safe(fence, next, &irq->pending, irq_link) {
+			struct dma_fence *dma_fence = &fence->dma;
+
+			trace_xe_hw_fence_try_signal(fence);
+			if (dma_fence_is_signaled_locked(dma_fence)) {
+				trace_xe_hw_fence_signal(fence);
+				list_del_init(&fence->irq_link);
+				dma_fence_put(dma_fence);
+			}
+		}
+	}
+	spin_unlock(&irq->lock);
+	dma_fence_end_signalling(tmp);
+}
+
+void xe_hw_fence_irq_init(struct xe_hw_fence_irq *irq)
+{
+	spin_lock_init(&irq->lock);
+	init_irq_work(&irq->work, hw_fence_irq_run_cb);
+	INIT_LIST_HEAD(&irq->pending);
+	irq->enabled = true;
+}
+
+void xe_hw_fence_irq_finish(struct xe_hw_fence_irq *irq)
+{
+	struct xe_hw_fence *fence, *next;
+	unsigned long flags;
+	int err;
+	bool tmp;
+
+	if (XE_WARN_ON(!list_empty(&irq->pending))) {
+		tmp = dma_fence_begin_signalling();
+		spin_lock_irqsave(&irq->lock, flags);
+		list_for_each_entry_safe(fence, next, &irq->pending, irq_link) {
+			list_del_init(&fence->irq_link);
+			err = dma_fence_signal_locked(&fence->dma);
+			dma_fence_put(&fence->dma);
+			XE_WARN_ON(err);
+		}
+		spin_unlock_irqrestore(&irq->lock, flags);
+		dma_fence_end_signalling(tmp);
+	}
+}
+
+void xe_hw_fence_irq_run(struct xe_hw_fence_irq *irq)
+{
+	irq_work_queue(&irq->work);
+}
+
+void xe_hw_fence_irq_stop(struct xe_hw_fence_irq *irq)
+{
+	spin_lock_irq(&irq->lock);
+	irq->enabled = false;
+	spin_unlock_irq(&irq->lock);
+}
+
+void xe_hw_fence_irq_start(struct xe_hw_fence_irq *irq)
+{
+	spin_lock_irq(&irq->lock);
+	irq->enabled = true;
+	spin_unlock_irq(&irq->lock);
+
+	irq_work_queue(&irq->work);
+}
+
+void xe_hw_fence_ctx_init(struct xe_hw_fence_ctx *ctx, struct xe_gt *gt,
+			  struct xe_hw_fence_irq *irq, const char *name)
+{
+	ctx->gt = gt;
+	ctx->irq = irq;
+	ctx->dma_fence_ctx = dma_fence_context_alloc(1);
+	ctx->next_seqno = XE_FENCE_INITIAL_SEQNO;
+	sprintf(ctx->name, "%s", name);
+}
+
+void xe_hw_fence_ctx_finish(struct xe_hw_fence_ctx *ctx)
+{
+}
+
+static struct xe_hw_fence *to_xe_hw_fence(struct dma_fence *fence);
+
+static struct xe_hw_fence_irq *xe_hw_fence_irq(struct xe_hw_fence *fence)
+{
+	return container_of(fence->dma.lock, struct xe_hw_fence_irq, lock);
+}
+
+static const char *xe_hw_fence_get_driver_name(struct dma_fence *dma_fence)
+{
+	struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence);
+
+	return dev_name(gt_to_xe(fence->ctx->gt)->drm.dev);
+}
+
+static const char *xe_hw_fence_get_timeline_name(struct dma_fence *dma_fence)
+{
+	struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence);
+
+	return fence->ctx->name;
+}
+
+static bool xe_hw_fence_signaled(struct dma_fence *dma_fence)
+{
+	struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence);
+	struct xe_device *xe = gt_to_xe(fence->ctx->gt);
+	u32 seqno = xe_map_rd(xe, &fence->seqno_map, 0, u32);
+
+	return dma_fence->error ||
+		!__dma_fence_is_later(dma_fence->seqno, seqno, dma_fence->ops);
+}
+
+static bool xe_hw_fence_enable_signaling(struct dma_fence *dma_fence)
+{
+	struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence);
+	struct xe_hw_fence_irq *irq = xe_hw_fence_irq(fence);
+
+	dma_fence_get(dma_fence);
+	list_add_tail(&fence->irq_link, &irq->pending);
+
+	/* SW completed (no HW IRQ) so kick handler to signal fence */
+	if (xe_hw_fence_signaled(dma_fence))
+		xe_hw_fence_irq_run(irq);
+
+	return true;
+}
+
+static void xe_hw_fence_release(struct dma_fence *dma_fence)
+{
+	struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence);
+
+	trace_xe_hw_fence_free(fence);
+	XE_WARN_ON(!list_empty(&fence->irq_link));
+	call_rcu(&dma_fence->rcu, fence_free);
+}
+
+static const struct dma_fence_ops xe_hw_fence_ops = {
+	.get_driver_name = xe_hw_fence_get_driver_name,
+	.get_timeline_name = xe_hw_fence_get_timeline_name,
+	.enable_signaling = xe_hw_fence_enable_signaling,
+	.signaled = xe_hw_fence_signaled,
+	.release = xe_hw_fence_release,
+};
+
+static struct xe_hw_fence *to_xe_hw_fence(struct dma_fence *fence)
+{
+	if (XE_WARN_ON(fence->ops != &xe_hw_fence_ops))
+		return NULL;
+
+	return container_of(fence, struct xe_hw_fence, dma);
+}
+
+struct xe_hw_fence *xe_hw_fence_create(struct xe_hw_fence_ctx *ctx,
+				       struct iosys_map seqno_map)
+{
+	struct xe_hw_fence *fence;
+
+	fence = fence_alloc();
+	if (!fence)
+		return ERR_PTR(-ENOMEM);
+
+	fence->ctx = ctx;
+	fence->seqno_map = seqno_map;
+	INIT_LIST_HEAD(&fence->irq_link);
+
+	dma_fence_init(&fence->dma, &xe_hw_fence_ops, &ctx->irq->lock,
+		       ctx->dma_fence_ctx, ctx->next_seqno++);
+
+	trace_xe_hw_fence_create(fence);
+
+	return fence;
+}
diff --git a/drivers/gpu/drm/xe/xe_hw_fence.h b/drivers/gpu/drm/xe/xe_hw_fence.h
new file mode 100644
index 000000000000..cfe5fd603787
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_fence.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_HW_FENCE_H_
+#define _XE_HW_FENCE_H_
+
+#include "xe_hw_fence_types.h"
+
+/* Cause an early wrap to catch wrapping errors */
+#define XE_FENCE_INITIAL_SEQNO (-127)
+
+int xe_hw_fence_module_init(void);
+void xe_hw_fence_module_exit(void);
+
+void xe_hw_fence_irq_init(struct xe_hw_fence_irq *irq);
+void xe_hw_fence_irq_finish(struct xe_hw_fence_irq *irq);
+void xe_hw_fence_irq_run(struct xe_hw_fence_irq *irq);
+void xe_hw_fence_irq_stop(struct xe_hw_fence_irq *irq);
+void xe_hw_fence_irq_start(struct xe_hw_fence_irq *irq);
+
+void xe_hw_fence_ctx_init(struct xe_hw_fence_ctx *ctx, struct xe_gt *gt,
+			  struct xe_hw_fence_irq *irq, const char *name);
+void xe_hw_fence_ctx_finish(struct xe_hw_fence_ctx *ctx);
+
+struct xe_hw_fence *xe_hw_fence_create(struct xe_hw_fence_ctx *ctx,
+				       struct iosys_map seqno_map);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_hw_fence_types.h b/drivers/gpu/drm/xe/xe_hw_fence_types.h
new file mode 100644
index 000000000000..b33c4956e8ea
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_fence_types.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_HW_FENCE_TYPES_H_
+#define _XE_HW_FENCE_TYPES_H_
+
+#include <linux/dma-fence.h>
+#include <linux/iosys-map.h>
+#include <linux/irq_work.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+struct xe_gt;
+
+/**
+ * struct xe_hw_fence_irq - hardware fence IRQ handler
+ *
+ * One per engine class, signals completed xe_hw_fences, triggered via hw engine
+ * interrupt. On each trigger, search list of pending fences and signal.
+ */
+struct xe_hw_fence_irq {
+	/** @lock: protects all xe_hw_fences + pending list */
+	spinlock_t lock;
+	/** @work: IRQ worker run to signal the fences */
+	struct irq_work work;
+	/** @pending: list of pending xe_hw_fences */
+	struct list_head pending;
+	/** @enabled: fence signaling enabled */
+	bool enabled;
+};
+
+#define MAX_FENCE_NAME_LEN	16
+
+/**
+ * struct xe_hw_fence_ctx - hardware fence context
+ *
+ * The context for a hardware fence. 1 to 1 relationship with xe_engine. Points
+ * to a xe_hw_fence_irq, maintains serial seqno.
+ */
+struct xe_hw_fence_ctx {
+	/** @gt: graphics tile of hardware fence context */
+	struct xe_gt *gt;
+	/** @irq: fence irq handler */
+	struct xe_hw_fence_irq *irq;
+	/** @dma_fence_ctx: dma fence context for hardware fence */
+	u64 dma_fence_ctx;
+	/** @next_seqno: next seqno for hardware fence */
+	u32 next_seqno;
+	/** @name: name of hardware fence context */
+	char name[MAX_FENCE_NAME_LEN];
+};
+
+/**
+ * struct xe_hw_fence - hardware fence
+ *
+ * Used to indicate a xe_sched_job is complete via a seqno written to memory.
+ * Signals on error or seqno past.
+ */
+struct xe_hw_fence {
+	/** @dma: base dma fence for hardware fence context */
+	struct dma_fence dma;
+	/** @ctx: hardware fence context */
+	struct xe_hw_fence_ctx *ctx;
+	/** @seqno_map: I/O map for seqno */
+	struct iosys_map seqno_map;
+	/** @irq_link: Link in struct xe_hw_fence_irq.pending */
+	struct list_head irq_link;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
new file mode 100644
index 000000000000..174ed2185481
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -0,0 +1,776 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/hwmon-sysfs.h>
+#include <linux/hwmon.h>
+#include <linux/types.h>
+
+#include <drm/drm_managed.h>
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_mchbar_regs.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_hwmon.h"
+#include "xe_mmio.h"
+#include "xe_pcode.h"
+#include "xe_pcode_api.h"
+
+enum xe_hwmon_reg {
+	REG_PKG_RAPL_LIMIT,
+	REG_PKG_POWER_SKU,
+	REG_PKG_POWER_SKU_UNIT,
+	REG_GT_PERF_STATUS,
+	REG_PKG_ENERGY_STATUS,
+};
+
+enum xe_hwmon_reg_operation {
+	REG_READ32,
+	REG_RMW32,
+	REG_READ64,
+};
+
+/*
+ * SF_* - scale factors for particular quantities according to hwmon spec.
+ */
+#define SF_POWER	1000000		/* microwatts */
+#define SF_CURR		1000		/* milliamperes */
+#define SF_VOLTAGE	1000		/* millivolts */
+#define SF_ENERGY	1000000		/* microjoules */
+#define SF_TIME		1000		/* milliseconds */
+
+/**
+ * struct xe_hwmon_energy_info - to accumulate energy
+ */
+struct xe_hwmon_energy_info {
+	/** @reg_val_prev: previous energy reg val */
+	u32 reg_val_prev;
+	/** @accum_energy: accumulated energy */
+	long accum_energy;
+};
+
+/**
+ * struct xe_hwmon - xe hwmon data structure
+ */
+struct xe_hwmon {
+	/** @hwmon_dev: hwmon device for xe */
+	struct device *hwmon_dev;
+	/** @gt: primary gt */
+	struct xe_gt *gt;
+	/** @hwmon_lock: lock for rw attributes*/
+	struct mutex hwmon_lock;
+	/** @scl_shift_power: pkg power unit */
+	int scl_shift_power;
+	/** @scl_shift_energy: pkg energy unit */
+	int scl_shift_energy;
+	/** @scl_shift_time: pkg time unit */
+	int scl_shift_time;
+	/** @ei: Energy info for energy1_input */
+	struct xe_hwmon_energy_info ei;
+};
+
+static u32 xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg)
+{
+	struct xe_device *xe = gt_to_xe(hwmon->gt);
+	struct xe_reg reg = XE_REG(0);
+
+	switch (hwmon_reg) {
+	case REG_PKG_RAPL_LIMIT:
+		if (xe->info.platform == XE_DG2)
+			reg = PCU_CR_PACKAGE_RAPL_LIMIT;
+		else if (xe->info.platform == XE_PVC)
+			reg = PVC_GT0_PACKAGE_RAPL_LIMIT;
+		break;
+	case REG_PKG_POWER_SKU:
+		if (xe->info.platform == XE_DG2)
+			reg = PCU_CR_PACKAGE_POWER_SKU;
+		else if (xe->info.platform == XE_PVC)
+			reg = PVC_GT0_PACKAGE_POWER_SKU;
+		break;
+	case REG_PKG_POWER_SKU_UNIT:
+		if (xe->info.platform == XE_DG2)
+			reg = PCU_CR_PACKAGE_POWER_SKU_UNIT;
+		else if (xe->info.platform == XE_PVC)
+			reg = PVC_GT0_PACKAGE_POWER_SKU_UNIT;
+		break;
+	case REG_GT_PERF_STATUS:
+		if (xe->info.platform == XE_DG2)
+			reg = GT_PERF_STATUS;
+		break;
+	case REG_PKG_ENERGY_STATUS:
+		if (xe->info.platform == XE_DG2)
+			reg = PCU_CR_PACKAGE_ENERGY_STATUS;
+		else if (xe->info.platform == XE_PVC)
+			reg = PVC_GT0_PLATFORM_ENERGY_STATUS;
+		break;
+	default:
+		drm_warn(&xe->drm, "Unknown xe hwmon reg id: %d\n", hwmon_reg);
+		break;
+	}
+
+	return reg.raw;
+}
+
+static void xe_hwmon_process_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg,
+				 enum xe_hwmon_reg_operation operation, u64 *value,
+				 u32 clr, u32 set)
+{
+	struct xe_reg reg;
+
+	reg.raw = xe_hwmon_get_reg(hwmon, hwmon_reg);
+
+	if (!reg.raw)
+		return;
+
+	switch (operation) {
+	case REG_READ32:
+		*value = xe_mmio_read32(hwmon->gt, reg);
+		break;
+	case REG_RMW32:
+		*value = xe_mmio_rmw32(hwmon->gt, reg, clr, set);
+		break;
+	case REG_READ64:
+		*value = xe_mmio_read64_2x32(hwmon->gt, reg);
+		break;
+	default:
+		drm_warn(&gt_to_xe(hwmon->gt)->drm, "Invalid xe hwmon reg operation: %d\n",
+			 operation);
+		break;
+	}
+}
+
+#define PL1_DISABLE 0
+
+/*
+ * HW allows arbitrary PL1 limits to be set but silently clamps these values to
+ * "typical but not guaranteed" min/max values in REG_PKG_POWER_SKU. Follow the
+ * same pattern for sysfs, allow arbitrary PL1 limits to be set but display
+ * clamped values when read.
+ */
+static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, long *value)
+{
+	u64 reg_val, min, max;
+
+	mutex_lock(&hwmon->hwmon_lock);
+
+	xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_READ32, &reg_val, 0, 0);
+	/* Check if PL1 limit is disabled */
+	if (!(reg_val & PKG_PWR_LIM_1_EN)) {
+		*value = PL1_DISABLE;
+		goto unlock;
+	}
+
+	reg_val = REG_FIELD_GET(PKG_PWR_LIM_1, reg_val);
+	*value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power);
+
+	xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU, REG_READ64, &reg_val, 0, 0);
+	min = REG_FIELD_GET(PKG_MIN_PWR, reg_val);
+	min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power);
+	max = REG_FIELD_GET(PKG_MAX_PWR, reg_val);
+	max = mul_u64_u32_shr(max, SF_POWER, hwmon->scl_shift_power);
+
+	if (min && max)
+		*value = clamp_t(u64, *value, min, max);
+unlock:
+	mutex_unlock(&hwmon->hwmon_lock);
+}
+
+static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, long value)
+{
+	int ret = 0;
+	u64 reg_val;
+
+	mutex_lock(&hwmon->hwmon_lock);
+
+	/* Disable PL1 limit and verify, as limit cannot be disabled on all platforms */
+	if (value == PL1_DISABLE) {
+		xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW32, &reg_val,
+				     PKG_PWR_LIM_1_EN, 0);
+		xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_READ32, &reg_val,
+				     PKG_PWR_LIM_1_EN, 0);
+
+		if (reg_val & PKG_PWR_LIM_1_EN) {
+			ret = -EOPNOTSUPP;
+			goto unlock;
+		}
+	}
+
+	/* Computation in 64-bits to avoid overflow. Round to nearest. */
+	reg_val = DIV_ROUND_CLOSEST_ULL((u64)value << hwmon->scl_shift_power, SF_POWER);
+	reg_val = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, reg_val);
+
+	xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW32, &reg_val,
+			     PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, reg_val);
+unlock:
+	mutex_unlock(&hwmon->hwmon_lock);
+	return ret;
+}
+
+static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, long *value)
+{
+	u64 reg_val;
+
+	xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU, REG_READ32, &reg_val, 0, 0);
+	reg_val = REG_FIELD_GET(PKG_TDP, reg_val);
+	*value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power);
+}
+
+/*
+ * xe_hwmon_energy_get - Obtain energy value
+ *
+ * The underlying energy hardware register is 32-bits and is subject to
+ * overflow. How long before overflow? For example, with an example
+ * scaling bit shift of 14 bits (see register *PACKAGE_POWER_SKU_UNIT) and
+ * a power draw of 1000 watts, the 32-bit counter will overflow in
+ * approximately 4.36 minutes.
+ *
+ * Examples:
+ *    1 watt:  (2^32 >> 14) /    1 W / (60 * 60 * 24) secs/day -> 3 days
+ * 1000 watts: (2^32 >> 14) / 1000 W / 60             secs/min -> 4.36 minutes
+ *
+ * The function significantly increases overflow duration (from 4.36
+ * minutes) by accumulating the energy register into a 'long' as allowed by
+ * the hwmon API. Using x86_64 128 bit arithmetic (see mul_u64_u32_shr()),
+ * a 'long' of 63 bits, SF_ENERGY of 1e6 (~20 bits) and
+ * hwmon->scl_shift_energy of 14 bits we have 57 (63 - 20 + 14) bits before
+ * energy1_input overflows. This at 1000 W is an overflow duration of 278 years.
+ */
+static void
+xe_hwmon_energy_get(struct xe_hwmon *hwmon, long *energy)
+{
+	struct xe_hwmon_energy_info *ei = &hwmon->ei;
+	u64 reg_val;
+
+	xe_hwmon_process_reg(hwmon, REG_PKG_ENERGY_STATUS, REG_READ32,
+			     &reg_val, 0, 0);
+
+	if (reg_val >= ei->reg_val_prev)
+		ei->accum_energy += reg_val - ei->reg_val_prev;
+	else
+		ei->accum_energy += UINT_MAX - ei->reg_val_prev + reg_val;
+
+	ei->reg_val_prev = reg_val;
+
+	*energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY,
+				  hwmon->scl_shift_energy);
+}
+
+static ssize_t
+xe_hwmon_power1_max_interval_show(struct device *dev, struct device_attribute *attr,
+				  char *buf)
+{
+	struct xe_hwmon *hwmon = dev_get_drvdata(dev);
+	u32 x, y, x_w = 2; /* 2 bits */
+	u64 r, tau4, out;
+
+	xe_device_mem_access_get(gt_to_xe(hwmon->gt));
+
+	mutex_lock(&hwmon->hwmon_lock);
+
+	xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT,
+			     REG_READ32, &r, 0, 0);
+
+	mutex_unlock(&hwmon->hwmon_lock);
+
+	xe_device_mem_access_put(gt_to_xe(hwmon->gt));
+
+	x = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_X, r);
+	y = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_Y, r);
+
+	/*
+	 * tau = 1.x * power(2,y), x = bits(23:22), y = bits(21:17)
+	 *     = (4 | x) << (y - 2)
+	 *
+	 * Here (y - 2) ensures a 1.x fixed point representation of 1.x
+	 * As x is 2 bits so 1.x can be 1.0, 1.25, 1.50, 1.75
+	 *
+	 * As y can be < 2, we compute tau4 = (4 | x) << y
+	 * and then add 2 when doing the final right shift to account for units
+	 */
+	tau4 = ((1 << x_w) | x) << y;
+
+	/* val in hwmon interface units (millisec) */
+	out = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
+
+	return sysfs_emit(buf, "%llu\n", out);
+}
+
+static ssize_t
+xe_hwmon_power1_max_interval_store(struct device *dev, struct device_attribute *attr,
+				   const char *buf, size_t count)
+{
+	struct xe_hwmon *hwmon = dev_get_drvdata(dev);
+	u32 x, y, rxy, x_w = 2; /* 2 bits */
+	u64 tau4, r, max_win;
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &val);
+	if (ret)
+		return ret;
+
+	/*
+	 * Max HW supported tau in '1.x * power(2,y)' format, x = 0, y = 0x12.
+	 * The hwmon->scl_shift_time default of 0xa results in a max tau of 256 seconds.
+	 *
+	 * The ideal scenario is for PKG_MAX_WIN to be read from the PKG_PWR_SKU register.
+	 * However, it is observed that existing discrete GPUs does not provide correct
+	 * PKG_MAX_WIN value, therefore a using default constant value. For future discrete GPUs
+	 * this may get resolved, in which case PKG_MAX_WIN should be obtained from PKG_PWR_SKU.
+	 */
+#define PKG_MAX_WIN_DEFAULT 0x12ull
+
+	/*
+	 * val must be < max in hwmon interface units. The steps below are
+	 * explained in xe_hwmon_power1_max_interval_show()
+	 */
+	r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT);
+	x = REG_FIELD_GET(PKG_MAX_WIN_X, r);
+	y = REG_FIELD_GET(PKG_MAX_WIN_Y, r);
+	tau4 = ((1 << x_w) | x) << y;
+	max_win = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
+
+	if (val > max_win)
+		return -EINVAL;
+
+	/* val in hw units */
+	val = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_time, SF_TIME);
+
+	/*
+	 * Convert val to 1.x * power(2,y)
+	 * y = ilog2(val)
+	 * x = (val - (1 << y)) >> (y - 2)
+	 */
+	if (!val) {
+		y = 0;
+		x = 0;
+	} else {
+		y = ilog2(val);
+		x = (val - (1ul << y)) << x_w >> y;
+	}
+
+	rxy = REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_X, x) | REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_Y, y);
+
+	xe_device_mem_access_get(gt_to_xe(hwmon->gt));
+
+	mutex_lock(&hwmon->hwmon_lock);
+
+	xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW32, (u64 *)&r,
+			     PKG_PWR_LIM_1_TIME, rxy);
+
+	mutex_unlock(&hwmon->hwmon_lock);
+
+	xe_device_mem_access_put(gt_to_xe(hwmon->gt));
+
+	return count;
+}
+
+static SENSOR_DEVICE_ATTR(power1_max_interval, 0664,
+			  xe_hwmon_power1_max_interval_show,
+			  xe_hwmon_power1_max_interval_store, 0);
+
+static struct attribute *hwmon_attributes[] = {
+	&sensor_dev_attr_power1_max_interval.dev_attr.attr,
+	NULL
+};
+
+static umode_t xe_hwmon_attributes_visible(struct kobject *kobj,
+					   struct attribute *attr, int index)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct xe_hwmon *hwmon = dev_get_drvdata(dev);
+	int ret = 0;
+
+	xe_device_mem_access_get(gt_to_xe(hwmon->gt));
+
+	if (attr == &sensor_dev_attr_power1_max_interval.dev_attr.attr)
+		ret = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT) ? attr->mode : 0;
+
+	xe_device_mem_access_put(gt_to_xe(hwmon->gt));
+
+	return ret;
+}
+
+static const struct attribute_group hwmon_attrgroup = {
+	.attrs = hwmon_attributes,
+	.is_visible = xe_hwmon_attributes_visible,
+};
+
+static const struct attribute_group *hwmon_groups[] = {
+	&hwmon_attrgroup,
+	NULL
+};
+
+static const struct hwmon_channel_info *hwmon_info[] = {
+	HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_CRIT),
+	HWMON_CHANNEL_INFO(curr, HWMON_C_CRIT),
+	HWMON_CHANNEL_INFO(in, HWMON_I_INPUT),
+	HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT),
+	NULL
+};
+
+/* I1 is exposed as power_crit or as curr_crit depending on bit 31 */
+static int xe_hwmon_pcode_read_i1(struct xe_gt *gt, u32 *uval)
+{
+	/* Avoid Illegal Subcommand error */
+	if (gt_to_xe(gt)->info.platform == XE_DG2)
+		return -ENXIO;
+
+	return xe_pcode_read(gt, PCODE_MBOX(PCODE_POWER_SETUP,
+			     POWER_SETUP_SUBCOMMAND_READ_I1, 0),
+			     uval, NULL);
+}
+
+static int xe_hwmon_pcode_write_i1(struct xe_gt *gt, u32 uval)
+{
+	return xe_pcode_write(gt, PCODE_MBOX(PCODE_POWER_SETUP,
+			      POWER_SETUP_SUBCOMMAND_WRITE_I1, 0),
+			      uval);
+}
+
+static int xe_hwmon_power_curr_crit_read(struct xe_hwmon *hwmon, long *value, u32 scale_factor)
+{
+	int ret;
+	u32 uval;
+
+	mutex_lock(&hwmon->hwmon_lock);
+
+	ret = xe_hwmon_pcode_read_i1(hwmon->gt, &uval);
+	if (ret)
+		goto unlock;
+
+	*value = mul_u64_u32_shr(REG_FIELD_GET(POWER_SETUP_I1_DATA_MASK, uval),
+				 scale_factor, POWER_SETUP_I1_SHIFT);
+unlock:
+	mutex_unlock(&hwmon->hwmon_lock);
+	return ret;
+}
+
+static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, long value, u32 scale_factor)
+{
+	int ret;
+	u32 uval;
+
+	mutex_lock(&hwmon->hwmon_lock);
+
+	uval = DIV_ROUND_CLOSEST_ULL(value << POWER_SETUP_I1_SHIFT, scale_factor);
+	ret = xe_hwmon_pcode_write_i1(hwmon->gt, uval);
+
+	mutex_unlock(&hwmon->hwmon_lock);
+	return ret;
+}
+
+static void xe_hwmon_get_voltage(struct xe_hwmon *hwmon, long *value)
+{
+	u64 reg_val;
+
+	xe_hwmon_process_reg(hwmon, REG_GT_PERF_STATUS,
+			     REG_READ32, &reg_val, 0, 0);
+	/* HW register value in units of 2.5 millivolt */
+	*value = DIV_ROUND_CLOSEST(REG_FIELD_GET(VOLTAGE_MASK, reg_val) * 2500, SF_VOLTAGE);
+}
+
+static umode_t
+xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int chan)
+{
+	u32 uval;
+
+	switch (attr) {
+	case hwmon_power_max:
+		return xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT) ? 0664 : 0;
+	case hwmon_power_rated_max:
+		return xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU) ? 0444 : 0;
+	case hwmon_power_crit:
+		return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) ||
+			!(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644;
+	default:
+		return 0;
+	}
+}
+
+static int
+xe_hwmon_power_read(struct xe_hwmon *hwmon, u32 attr, int chan, long *val)
+{
+	switch (attr) {
+	case hwmon_power_max:
+		xe_hwmon_power_max_read(hwmon, val);
+		return 0;
+	case hwmon_power_rated_max:
+		xe_hwmon_power_rated_max_read(hwmon, val);
+		return 0;
+	case hwmon_power_crit:
+		return xe_hwmon_power_curr_crit_read(hwmon, val, SF_POWER);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int
+xe_hwmon_power_write(struct xe_hwmon *hwmon, u32 attr, int chan, long val)
+{
+	switch (attr) {
+	case hwmon_power_max:
+		return xe_hwmon_power_max_write(hwmon, val);
+	case hwmon_power_crit:
+		return xe_hwmon_power_curr_crit_write(hwmon, val, SF_POWER);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static umode_t
+xe_hwmon_curr_is_visible(const struct xe_hwmon *hwmon, u32 attr)
+{
+	u32 uval;
+
+	switch (attr) {
+	case hwmon_curr_crit:
+		return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) ||
+			(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644;
+	default:
+		return 0;
+	}
+}
+
+static int
+xe_hwmon_curr_read(struct xe_hwmon *hwmon, u32 attr, long *val)
+{
+	switch (attr) {
+	case hwmon_curr_crit:
+		return xe_hwmon_power_curr_crit_read(hwmon, val, SF_CURR);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int
+xe_hwmon_curr_write(struct xe_hwmon *hwmon, u32 attr, long val)
+{
+	switch (attr) {
+	case hwmon_curr_crit:
+		return xe_hwmon_power_curr_crit_write(hwmon, val, SF_CURR);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static umode_t
+xe_hwmon_in_is_visible(struct xe_hwmon *hwmon, u32 attr)
+{
+	switch (attr) {
+	case hwmon_in_input:
+		return xe_hwmon_get_reg(hwmon, REG_GT_PERF_STATUS) ? 0444 : 0;
+	default:
+		return 0;
+	}
+}
+
+static int
+xe_hwmon_in_read(struct xe_hwmon *hwmon, u32 attr, long *val)
+{
+	switch (attr) {
+	case hwmon_in_input:
+		xe_hwmon_get_voltage(hwmon, val);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static umode_t
+xe_hwmon_energy_is_visible(struct xe_hwmon *hwmon, u32 attr)
+{
+	switch (attr) {
+	case hwmon_energy_input:
+		return xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS) ? 0444 : 0;
+	default:
+		return 0;
+	}
+}
+
+static int
+xe_hwmon_energy_read(struct xe_hwmon *hwmon, u32 attr, long *val)
+{
+	switch (attr) {
+	case hwmon_energy_input:
+		xe_hwmon_energy_get(hwmon, val);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static umode_t
+xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type,
+		    u32 attr, int channel)
+{
+	struct xe_hwmon *hwmon = (struct xe_hwmon *)drvdata;
+	int ret;
+
+	xe_device_mem_access_get(gt_to_xe(hwmon->gt));
+
+	switch (type) {
+	case hwmon_power:
+		ret = xe_hwmon_power_is_visible(hwmon, attr, channel);
+		break;
+	case hwmon_curr:
+		ret = xe_hwmon_curr_is_visible(hwmon, attr);
+		break;
+	case hwmon_in:
+		ret = xe_hwmon_in_is_visible(hwmon, attr);
+		break;
+	case hwmon_energy:
+		ret = xe_hwmon_energy_is_visible(hwmon, attr);
+		break;
+	default:
+		ret = 0;
+		break;
+	}
+
+	xe_device_mem_access_put(gt_to_xe(hwmon->gt));
+
+	return ret;
+}
+
+static int
+xe_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
+	      int channel, long *val)
+{
+	struct xe_hwmon *hwmon = dev_get_drvdata(dev);
+	int ret;
+
+	xe_device_mem_access_get(gt_to_xe(hwmon->gt));
+
+	switch (type) {
+	case hwmon_power:
+		ret = xe_hwmon_power_read(hwmon, attr, channel, val);
+		break;
+	case hwmon_curr:
+		ret = xe_hwmon_curr_read(hwmon, attr, val);
+		break;
+	case hwmon_in:
+		ret = xe_hwmon_in_read(hwmon, attr, val);
+		break;
+	case hwmon_energy:
+		ret = xe_hwmon_energy_read(hwmon, attr, val);
+		break;
+	default:
+		ret = -EOPNOTSUPP;
+		break;
+	}
+
+	xe_device_mem_access_put(gt_to_xe(hwmon->gt));
+
+	return ret;
+}
+
+static int
+xe_hwmon_write(struct device *dev, enum hwmon_sensor_types type, u32 attr,
+	       int channel, long val)
+{
+	struct xe_hwmon *hwmon = dev_get_drvdata(dev);
+	int ret;
+
+	xe_device_mem_access_get(gt_to_xe(hwmon->gt));
+
+	switch (type) {
+	case hwmon_power:
+		ret = xe_hwmon_power_write(hwmon, attr, channel, val);
+		break;
+	case hwmon_curr:
+		ret = xe_hwmon_curr_write(hwmon, attr, val);
+		break;
+	default:
+		ret = -EOPNOTSUPP;
+		break;
+	}
+
+	xe_device_mem_access_put(gt_to_xe(hwmon->gt));
+
+	return ret;
+}
+
+static const struct hwmon_ops hwmon_ops = {
+	.is_visible = xe_hwmon_is_visible,
+	.read = xe_hwmon_read,
+	.write = xe_hwmon_write,
+};
+
+static const struct hwmon_chip_info hwmon_chip_info = {
+	.ops = &hwmon_ops,
+	.info = hwmon_info,
+};
+
+static void
+xe_hwmon_get_preregistration_info(struct xe_device *xe)
+{
+	struct xe_hwmon *hwmon = xe->hwmon;
+	long energy;
+	u64 val_sku_unit = 0;
+
+	/*
+	 * The contents of register PKG_POWER_SKU_UNIT do not change,
+	 * so read it once and store the shift values.
+	 */
+	if (xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT)) {
+		xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU_UNIT,
+				     REG_READ32, &val_sku_unit, 0, 0);
+		hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit);
+		hwmon->scl_shift_energy = REG_FIELD_GET(PKG_ENERGY_UNIT, val_sku_unit);
+		hwmon->scl_shift_time = REG_FIELD_GET(PKG_TIME_UNIT, val_sku_unit);
+	}
+
+	/*
+	 * Initialize 'struct xe_hwmon_energy_info', i.e. set fields to the
+	 * first value of the energy register read
+	 */
+	if (xe_hwmon_is_visible(hwmon, hwmon_energy, hwmon_energy_input, 0))
+		xe_hwmon_energy_get(hwmon, &energy);
+}
+
+static void xe_hwmon_mutex_destroy(void *arg)
+{
+	struct xe_hwmon *hwmon = arg;
+
+	mutex_destroy(&hwmon->hwmon_lock);
+}
+
+void xe_hwmon_register(struct xe_device *xe)
+{
+	struct device *dev = xe->drm.dev;
+	struct xe_hwmon *hwmon;
+
+	/* hwmon is available only for dGfx */
+	if (!IS_DGFX(xe))
+		return;
+
+	hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL);
+	if (!hwmon)
+		return;
+
+	xe->hwmon = hwmon;
+
+	mutex_init(&hwmon->hwmon_lock);
+	if (devm_add_action_or_reset(dev, xe_hwmon_mutex_destroy, hwmon))
+		return;
+
+	/* primary GT to access device level properties */
+	hwmon->gt = xe->tiles[0].primary_gt;
+
+	xe_hwmon_get_preregistration_info(xe);
+
+	drm_dbg(&xe->drm, "Register xe hwmon interface\n");
+
+	/*  hwmon_dev points to device hwmon<i> */
+	hwmon->hwmon_dev = devm_hwmon_device_register_with_info(dev, "xe", hwmon,
+								&hwmon_chip_info,
+								hwmon_groups);
+
+	if (IS_ERR(hwmon->hwmon_dev)) {
+		drm_warn(&xe->drm, "Failed to register xe hwmon (%pe)\n", hwmon->hwmon_dev);
+		xe->hwmon = NULL;
+		return;
+	}
+}
+
diff --git a/drivers/gpu/drm/xe/xe_hwmon.h b/drivers/gpu/drm/xe/xe_hwmon.h
new file mode 100644
index 000000000000..c42a1de2cd7a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hwmon.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_HWMON_H_
+#define _XE_HWMON_H_
+
+#include <linux/types.h>
+
+struct xe_device;
+
+#if IS_REACHABLE(CONFIG_HWMON)
+void xe_hwmon_register(struct xe_device *xe);
+#else
+static inline void xe_hwmon_register(struct xe_device *xe) { };
+#endif
+
+#endif /* _XE_HWMON_H_ */
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
new file mode 100644
index 000000000000..d1f5ba4bb745
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -0,0 +1,666 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_irq.h"
+
+#include <linux/sched/clock.h>
+
+#include <drm/drm_managed.h>
+
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_regs.h"
+#include "xe_device.h"
+#include "xe_display.h"
+#include "xe_drv.h"
+#include "xe_gt.h"
+#include "xe_guc.h"
+#include "xe_hw_engine.h"
+#include "xe_mmio.h"
+
+/*
+ * Interrupt registers for a unit are always consecutive and ordered
+ * ISR, IMR, IIR, IER.
+ */
+#define IMR(offset)				XE_REG(offset + 0x4)
+#define IIR(offset)				XE_REG(offset + 0x8)
+#define IER(offset)				XE_REG(offset + 0xc)
+
+static void assert_iir_is_zero(struct xe_gt *mmio, struct xe_reg reg)
+{
+	u32 val = xe_mmio_read32(mmio, reg);
+
+	if (val == 0)
+		return;
+
+	drm_WARN(&gt_to_xe(mmio)->drm, 1,
+		 "Interrupt register 0x%x is not zero: 0x%08x\n",
+		 reg.addr, val);
+	xe_mmio_write32(mmio, reg, 0xffffffff);
+	xe_mmio_read32(mmio, reg);
+	xe_mmio_write32(mmio, reg, 0xffffffff);
+	xe_mmio_read32(mmio, reg);
+}
+
+/*
+ * Unmask and enable the specified interrupts.  Does not check current state,
+ * so any bits not specified here will become masked and disabled.
+ */
+static void unmask_and_enable(struct xe_tile *tile, u32 irqregs, u32 bits)
+{
+	struct xe_gt *mmio = tile->primary_gt;
+
+	/*
+	 * If we're just enabling an interrupt now, it shouldn't already
+	 * be raised in the IIR.
+	 */
+	assert_iir_is_zero(mmio, IIR(irqregs));
+
+	xe_mmio_write32(mmio, IER(irqregs), bits);
+	xe_mmio_write32(mmio, IMR(irqregs), ~bits);
+
+	/* Posting read */
+	xe_mmio_read32(mmio, IMR(irqregs));
+}
+
+/* Mask and disable all interrupts. */
+static void mask_and_disable(struct xe_tile *tile, u32 irqregs)
+{
+	struct xe_gt *mmio = tile->primary_gt;
+
+	xe_mmio_write32(mmio, IMR(irqregs), ~0);
+	/* Posting read */
+	xe_mmio_read32(mmio, IMR(irqregs));
+
+	xe_mmio_write32(mmio, IER(irqregs), 0);
+
+	/* IIR can theoretically queue up two events. Be paranoid. */
+	xe_mmio_write32(mmio, IIR(irqregs), ~0);
+	xe_mmio_read32(mmio, IIR(irqregs));
+	xe_mmio_write32(mmio, IIR(irqregs), ~0);
+	xe_mmio_read32(mmio, IIR(irqregs));
+}
+
+static u32 xelp_intr_disable(struct xe_device *xe)
+{
+	struct xe_gt *mmio = xe_root_mmio_gt(xe);
+
+	xe_mmio_write32(mmio, GFX_MSTR_IRQ, 0);
+
+	/*
+	 * Now with master disabled, get a sample of level indications
+	 * for this interrupt. Indications will be cleared on related acks.
+	 * New indications can and will light up during processing,
+	 * and will generate new interrupt after enabling master.
+	 */
+	return xe_mmio_read32(mmio, GFX_MSTR_IRQ);
+}
+
+static u32
+gu_misc_irq_ack(struct xe_device *xe, const u32 master_ctl)
+{
+	struct xe_gt *mmio = xe_root_mmio_gt(xe);
+	u32 iir;
+
+	if (!(master_ctl & GU_MISC_IRQ))
+		return 0;
+
+	iir = xe_mmio_read32(mmio, IIR(GU_MISC_IRQ_OFFSET));
+	if (likely(iir))
+		xe_mmio_write32(mmio, IIR(GU_MISC_IRQ_OFFSET), iir);
+
+	return iir;
+}
+
+static inline void xelp_intr_enable(struct xe_device *xe, bool stall)
+{
+	struct xe_gt *mmio = xe_root_mmio_gt(xe);
+
+	xe_mmio_write32(mmio, GFX_MSTR_IRQ, MASTER_IRQ);
+	if (stall)
+		xe_mmio_read32(mmio, GFX_MSTR_IRQ);
+}
+
+/* Enable/unmask the HWE interrupts for a specific GT's engines. */
+void xe_irq_enable_hwe(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 ccs_mask, bcs_mask;
+	u32 irqs, dmask, smask;
+	u32 gsc_mask = 0;
+
+	if (xe_device_uc_enabled(xe)) {
+		irqs = GT_RENDER_USER_INTERRUPT |
+			GT_RENDER_PIPECTL_NOTIFY_INTERRUPT;
+	} else {
+		irqs = GT_RENDER_USER_INTERRUPT |
+		       GT_CS_MASTER_ERROR_INTERRUPT |
+		       GT_CONTEXT_SWITCH_INTERRUPT |
+		       GT_WAIT_SEMAPHORE_INTERRUPT;
+	}
+
+	ccs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COMPUTE);
+	bcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COPY);
+
+	dmask = irqs << 16 | irqs;
+	smask = irqs << 16;
+
+	if (!xe_gt_is_media_type(gt)) {
+		/* Enable interrupts for each engine class */
+		xe_mmio_write32(gt, RENDER_COPY_INTR_ENABLE, dmask);
+		if (ccs_mask)
+			xe_mmio_write32(gt, CCS_RSVD_INTR_ENABLE, smask);
+
+		/* Unmask interrupts for each engine instance */
+		xe_mmio_write32(gt, RCS0_RSVD_INTR_MASK, ~smask);
+		xe_mmio_write32(gt, BCS_RSVD_INTR_MASK, ~smask);
+		if (bcs_mask & (BIT(1)|BIT(2)))
+			xe_mmio_write32(gt, XEHPC_BCS1_BCS2_INTR_MASK, ~dmask);
+		if (bcs_mask & (BIT(3)|BIT(4)))
+			xe_mmio_write32(gt, XEHPC_BCS3_BCS4_INTR_MASK, ~dmask);
+		if (bcs_mask & (BIT(5)|BIT(6)))
+			xe_mmio_write32(gt, XEHPC_BCS5_BCS6_INTR_MASK, ~dmask);
+		if (bcs_mask & (BIT(7)|BIT(8)))
+			xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK, ~dmask);
+		if (ccs_mask & (BIT(0)|BIT(1)))
+			xe_mmio_write32(gt, CCS0_CCS1_INTR_MASK, ~dmask);
+		if (ccs_mask & (BIT(2)|BIT(3)))
+			xe_mmio_write32(gt,  CCS2_CCS3_INTR_MASK, ~dmask);
+	}
+
+	if (xe_gt_is_media_type(gt) || MEDIA_VER(xe) < 13) {
+		/* Enable interrupts for each engine class */
+		xe_mmio_write32(gt, VCS_VECS_INTR_ENABLE, dmask);
+
+		/* Unmask interrupts for each engine instance */
+		xe_mmio_write32(gt, VCS0_VCS1_INTR_MASK, ~dmask);
+		xe_mmio_write32(gt, VCS2_VCS3_INTR_MASK, ~dmask);
+		xe_mmio_write32(gt, VECS0_VECS1_INTR_MASK, ~dmask);
+
+		if (xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_OTHER))
+			gsc_mask = irqs;
+		else if (HAS_HECI_GSCFI(xe))
+			gsc_mask = GSC_IRQ_INTF(1);
+		if (gsc_mask) {
+			xe_mmio_write32(gt, GUNIT_GSC_INTR_ENABLE, gsc_mask);
+			xe_mmio_write32(gt, GUNIT_GSC_INTR_MASK, ~gsc_mask);
+		}
+	}
+}
+
+static u32
+gt_engine_identity(struct xe_device *xe,
+		   struct xe_gt *mmio,
+		   const unsigned int bank,
+		   const unsigned int bit)
+{
+	u32 timeout_ts;
+	u32 ident;
+
+	lockdep_assert_held(&xe->irq.lock);
+
+	xe_mmio_write32(mmio, IIR_REG_SELECTOR(bank), BIT(bit));
+
+	/*
+	 * NB: Specs do not specify how long to spin wait,
+	 * so we do ~100us as an educated guess.
+	 */
+	timeout_ts = (local_clock() >> 10) + 100;
+	do {
+		ident = xe_mmio_read32(mmio, INTR_IDENTITY_REG(bank));
+	} while (!(ident & INTR_DATA_VALID) &&
+		 !time_after32(local_clock() >> 10, timeout_ts));
+
+	if (unlikely(!(ident & INTR_DATA_VALID))) {
+		drm_err(&xe->drm, "INTR_IDENTITY_REG%u:%u 0x%08x not valid!\n",
+			bank, bit, ident);
+		return 0;
+	}
+
+	xe_mmio_write32(mmio, INTR_IDENTITY_REG(bank), ident);
+
+	return ident;
+}
+
+#define   OTHER_MEDIA_GUC_INSTANCE           16
+
+static void
+gt_other_irq_handler(struct xe_gt *gt, const u8 instance, const u16 iir)
+{
+	if (instance == OTHER_GUC_INSTANCE && !xe_gt_is_media_type(gt))
+		return xe_guc_irq_handler(&gt->uc.guc, iir);
+	if (instance == OTHER_MEDIA_GUC_INSTANCE && xe_gt_is_media_type(gt))
+		return xe_guc_irq_handler(&gt->uc.guc, iir);
+
+	if (instance != OTHER_GUC_INSTANCE &&
+	    instance != OTHER_MEDIA_GUC_INSTANCE) {
+		WARN_ONCE(1, "unhandled other interrupt instance=0x%x, iir=0x%x\n",
+			  instance, iir);
+	}
+}
+
+static struct xe_gt *pick_engine_gt(struct xe_tile *tile,
+				    enum xe_engine_class class,
+				    unsigned int instance)
+{
+	struct xe_device *xe = tile_to_xe(tile);
+
+	if (MEDIA_VER(xe) < 13)
+		return tile->primary_gt;
+
+	if (class == XE_ENGINE_CLASS_VIDEO_DECODE ||
+	    class == XE_ENGINE_CLASS_VIDEO_ENHANCE)
+		return tile->media_gt;
+
+	if (class == XE_ENGINE_CLASS_OTHER &&
+	    (instance == OTHER_MEDIA_GUC_INSTANCE || instance == OTHER_GSC_INSTANCE))
+		return tile->media_gt;
+
+	return tile->primary_gt;
+}
+
+static void gt_irq_handler(struct xe_tile *tile,
+			   u32 master_ctl, unsigned long *intr_dw,
+			   u32 *identity)
+{
+	struct xe_device *xe = tile_to_xe(tile);
+	struct xe_gt *mmio = tile->primary_gt;
+	unsigned int bank, bit;
+	u16 instance, intr_vec;
+	enum xe_engine_class class;
+	struct xe_hw_engine *hwe;
+
+	spin_lock(&xe->irq.lock);
+
+	for (bank = 0; bank < 2; bank++) {
+		if (!(master_ctl & GT_DW_IRQ(bank)))
+			continue;
+
+		intr_dw[bank] = xe_mmio_read32(mmio, GT_INTR_DW(bank));
+		for_each_set_bit(bit, intr_dw + bank, 32)
+			identity[bit] = gt_engine_identity(xe, mmio, bank, bit);
+		xe_mmio_write32(mmio, GT_INTR_DW(bank), intr_dw[bank]);
+
+		for_each_set_bit(bit, intr_dw + bank, 32) {
+			struct xe_gt *engine_gt;
+
+			class = INTR_ENGINE_CLASS(identity[bit]);
+			instance = INTR_ENGINE_INSTANCE(identity[bit]);
+			intr_vec = INTR_ENGINE_INTR(identity[bit]);
+
+			engine_gt = pick_engine_gt(tile, class, instance);
+
+			hwe = xe_gt_hw_engine(engine_gt, class, instance, false);
+			if (hwe) {
+				xe_hw_engine_handle_irq(hwe, intr_vec);
+				continue;
+			}
+
+			if (class == XE_ENGINE_CLASS_OTHER) {
+				/* HECI GSCFI interrupts come from outside of GT */
+				if (HAS_HECI_GSCFI(xe) && instance == OTHER_GSC_INSTANCE)
+					xe_heci_gsc_irq_handler(xe, intr_vec);
+				else
+					gt_other_irq_handler(engine_gt, instance, intr_vec);
+				continue;
+			}
+		}
+	}
+
+	spin_unlock(&xe->irq.lock);
+}
+
+/*
+ * Top-level interrupt handler for Xe_LP platforms (which did not have
+ * a "master tile" interrupt register.
+ */
+static irqreturn_t xelp_irq_handler(int irq, void *arg)
+{
+	struct xe_device *xe = arg;
+	struct xe_tile *tile = xe_device_get_root_tile(xe);
+	u32 master_ctl, gu_misc_iir;
+	unsigned long intr_dw[2];
+	u32 identity[32];
+
+	spin_lock(&xe->irq.lock);
+	if (!xe->irq.enabled) {
+		spin_unlock(&xe->irq.lock);
+		return IRQ_NONE;
+	}
+	spin_unlock(&xe->irq.lock);
+
+	master_ctl = xelp_intr_disable(xe);
+	if (!master_ctl) {
+		xelp_intr_enable(xe, false);
+		return IRQ_NONE;
+	}
+
+	gt_irq_handler(tile, master_ctl, intr_dw, identity);
+
+	xe_display_irq_handler(xe, master_ctl);
+
+	gu_misc_iir = gu_misc_irq_ack(xe, master_ctl);
+
+	xelp_intr_enable(xe, false);
+
+	xe_display_irq_enable(xe, gu_misc_iir);
+
+	return IRQ_HANDLED;
+}
+
+static u32 dg1_intr_disable(struct xe_device *xe)
+{
+	struct xe_gt *mmio = xe_root_mmio_gt(xe);
+	u32 val;
+
+	/* First disable interrupts */
+	xe_mmio_write32(mmio, DG1_MSTR_TILE_INTR, 0);
+
+	/* Get the indication levels and ack the master unit */
+	val = xe_mmio_read32(mmio, DG1_MSTR_TILE_INTR);
+	if (unlikely(!val))
+		return 0;
+
+	xe_mmio_write32(mmio, DG1_MSTR_TILE_INTR, val);
+
+	return val;
+}
+
+static void dg1_intr_enable(struct xe_device *xe, bool stall)
+{
+	struct xe_gt *mmio = xe_root_mmio_gt(xe);
+
+	xe_mmio_write32(mmio, DG1_MSTR_TILE_INTR, DG1_MSTR_IRQ);
+	if (stall)
+		xe_mmio_read32(mmio, DG1_MSTR_TILE_INTR);
+}
+
+/*
+ * Top-level interrupt handler for Xe_LP+ and beyond.  These platforms have
+ * a "master tile" interrupt register which must be consulted before the
+ * "graphics master" interrupt register.
+ */
+static irqreturn_t dg1_irq_handler(int irq, void *arg)
+{
+	struct xe_device *xe = arg;
+	struct xe_tile *tile;
+	u32 master_tile_ctl, master_ctl = 0, gu_misc_iir = 0;
+	unsigned long intr_dw[2];
+	u32 identity[32];
+	u8 id;
+
+	/* TODO: This really shouldn't be copied+pasted */
+
+	spin_lock(&xe->irq.lock);
+	if (!xe->irq.enabled) {
+		spin_unlock(&xe->irq.lock);
+		return IRQ_NONE;
+	}
+	spin_unlock(&xe->irq.lock);
+
+	master_tile_ctl = dg1_intr_disable(xe);
+	if (!master_tile_ctl) {
+		dg1_intr_enable(xe, false);
+		return IRQ_NONE;
+	}
+
+	for_each_tile(tile, xe, id) {
+		struct xe_gt *mmio = tile->primary_gt;
+
+		if ((master_tile_ctl & DG1_MSTR_TILE(tile->id)) == 0)
+			continue;
+
+		master_ctl = xe_mmio_read32(mmio, GFX_MSTR_IRQ);
+
+		/*
+		 * We might be in irq handler just when PCIe DPC is initiated
+		 * and all MMIO reads will be returned with all 1's. Ignore this
+		 * irq as device is inaccessible.
+		 */
+		if (master_ctl == REG_GENMASK(31, 0)) {
+			dev_dbg(tile_to_xe(tile)->drm.dev,
+				"Ignore this IRQ as device might be in DPC containment.\n");
+			return IRQ_HANDLED;
+		}
+
+		xe_mmio_write32(mmio, GFX_MSTR_IRQ, master_ctl);
+
+		gt_irq_handler(tile, master_ctl, intr_dw, identity);
+
+		/*
+		 * Display interrupts (including display backlight operations
+		 * that get reported as Gunit GSE) would only be hooked up to
+		 * the primary tile.
+		 */
+		if (id == 0) {
+			xe_display_irq_handler(xe, master_ctl);
+			gu_misc_iir = gu_misc_irq_ack(xe, master_ctl);
+		}
+	}
+
+	dg1_intr_enable(xe, false);
+	xe_display_irq_enable(xe, gu_misc_iir);
+
+	return IRQ_HANDLED;
+}
+
+static void gt_irq_reset(struct xe_tile *tile)
+{
+	struct xe_gt *mmio = tile->primary_gt;
+
+	u32 ccs_mask = xe_hw_engine_mask_per_class(tile->primary_gt,
+						   XE_ENGINE_CLASS_COMPUTE);
+	u32 bcs_mask = xe_hw_engine_mask_per_class(tile->primary_gt,
+						   XE_ENGINE_CLASS_COPY);
+
+	/* Disable RCS, BCS, VCS and VECS class engines. */
+	xe_mmio_write32(mmio, RENDER_COPY_INTR_ENABLE, 0);
+	xe_mmio_write32(mmio, VCS_VECS_INTR_ENABLE, 0);
+	if (ccs_mask)
+		xe_mmio_write32(mmio, CCS_RSVD_INTR_ENABLE, 0);
+
+	/* Restore masks irqs on RCS, BCS, VCS and VECS engines. */
+	xe_mmio_write32(mmio, RCS0_RSVD_INTR_MASK,	~0);
+	xe_mmio_write32(mmio, BCS_RSVD_INTR_MASK,	~0);
+	if (bcs_mask & (BIT(1)|BIT(2)))
+		xe_mmio_write32(mmio, XEHPC_BCS1_BCS2_INTR_MASK, ~0);
+	if (bcs_mask & (BIT(3)|BIT(4)))
+		xe_mmio_write32(mmio, XEHPC_BCS3_BCS4_INTR_MASK, ~0);
+	if (bcs_mask & (BIT(5)|BIT(6)))
+		xe_mmio_write32(mmio, XEHPC_BCS5_BCS6_INTR_MASK, ~0);
+	if (bcs_mask & (BIT(7)|BIT(8)))
+		xe_mmio_write32(mmio, XEHPC_BCS7_BCS8_INTR_MASK, ~0);
+	xe_mmio_write32(mmio, VCS0_VCS1_INTR_MASK,	~0);
+	xe_mmio_write32(mmio, VCS2_VCS3_INTR_MASK,	~0);
+	xe_mmio_write32(mmio, VECS0_VECS1_INTR_MASK,	~0);
+	if (ccs_mask & (BIT(0)|BIT(1)))
+		xe_mmio_write32(mmio, CCS0_CCS1_INTR_MASK, ~0);
+	if (ccs_mask & (BIT(2)|BIT(3)))
+		xe_mmio_write32(mmio,  CCS2_CCS3_INTR_MASK, ~0);
+
+	if ((tile->media_gt &&
+	     xe_hw_engine_mask_per_class(tile->media_gt, XE_ENGINE_CLASS_OTHER)) ||
+	    HAS_HECI_GSCFI(tile_to_xe(tile))) {
+		xe_mmio_write32(mmio, GUNIT_GSC_INTR_ENABLE, 0);
+		xe_mmio_write32(mmio, GUNIT_GSC_INTR_MASK, ~0);
+	}
+
+	xe_mmio_write32(mmio, GPM_WGBOXPERF_INTR_ENABLE, 0);
+	xe_mmio_write32(mmio, GPM_WGBOXPERF_INTR_MASK,  ~0);
+	xe_mmio_write32(mmio, GUC_SG_INTR_ENABLE,	 0);
+	xe_mmio_write32(mmio, GUC_SG_INTR_MASK,		~0);
+}
+
+static void xelp_irq_reset(struct xe_tile *tile)
+{
+	xelp_intr_disable(tile_to_xe(tile));
+
+	gt_irq_reset(tile);
+
+	mask_and_disable(tile, PCU_IRQ_OFFSET);
+}
+
+static void dg1_irq_reset(struct xe_tile *tile)
+{
+	if (tile->id == 0)
+		dg1_intr_disable(tile_to_xe(tile));
+
+	gt_irq_reset(tile);
+
+	mask_and_disable(tile, PCU_IRQ_OFFSET);
+}
+
+static void dg1_irq_reset_mstr(struct xe_tile *tile)
+{
+	struct xe_gt *mmio = tile->primary_gt;
+
+	xe_mmio_write32(mmio, GFX_MSTR_IRQ, ~0);
+}
+
+static void xe_irq_reset(struct xe_device *xe)
+{
+	struct xe_tile *tile;
+	u8 id;
+
+	for_each_tile(tile, xe, id) {
+		if (GRAPHICS_VERx100(xe) >= 1210)
+			dg1_irq_reset(tile);
+		else
+			xelp_irq_reset(tile);
+	}
+
+	tile = xe_device_get_root_tile(xe);
+	mask_and_disable(tile, GU_MISC_IRQ_OFFSET);
+	xe_display_irq_reset(xe);
+
+	/*
+	 * The tile's top-level status register should be the last one
+	 * to be reset to avoid possible bit re-latching from lower
+	 * level interrupts.
+	 */
+	if (GRAPHICS_VERx100(xe) >= 1210) {
+		for_each_tile(tile, xe, id)
+			dg1_irq_reset_mstr(tile);
+	}
+}
+
+static void xe_irq_postinstall(struct xe_device *xe)
+{
+	xe_display_irq_postinstall(xe, xe_root_mmio_gt(xe));
+
+	/*
+	 * ASLE backlight operations are reported via GUnit GSE interrupts
+	 * on the root tile.
+	 */
+	unmask_and_enable(xe_device_get_root_tile(xe),
+			  GU_MISC_IRQ_OFFSET, GU_MISC_GSE);
+
+	/* Enable top-level interrupts */
+	if (GRAPHICS_VERx100(xe) >= 1210)
+		dg1_intr_enable(xe, true);
+	else
+		xelp_intr_enable(xe, true);
+}
+
+static irq_handler_t xe_irq_handler(struct xe_device *xe)
+{
+	if (GRAPHICS_VERx100(xe) >= 1210)
+		return dg1_irq_handler;
+	else
+		return xelp_irq_handler;
+}
+
+static void irq_uninstall(struct drm_device *drm, void *arg)
+{
+	struct xe_device *xe = arg;
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	int irq;
+
+	if (!xe->irq.enabled)
+		return;
+
+	xe->irq.enabled = false;
+	xe_irq_reset(xe);
+
+	irq = pci_irq_vector(pdev, 0);
+	free_irq(irq, xe);
+}
+
+int xe_irq_install(struct xe_device *xe)
+{
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	irq_handler_t irq_handler;
+	int err, irq;
+
+	irq_handler = xe_irq_handler(xe);
+	if (!irq_handler) {
+		drm_err(&xe->drm, "No supported interrupt handler");
+		return -EINVAL;
+	}
+
+	xe_irq_reset(xe);
+
+	err = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI | PCI_IRQ_MSIX);
+	if (err < 0) {
+		drm_err(&xe->drm, "MSI/MSIX: Failed to enable support %d\n", err);
+		return err;
+	}
+
+	irq = pci_irq_vector(pdev, 0);
+	err = request_irq(irq, irq_handler, IRQF_SHARED, DRIVER_NAME, xe);
+	if (err < 0) {
+		drm_err(&xe->drm, "Failed to request MSI/MSIX IRQ %d\n", err);
+		return err;
+	}
+
+	xe->irq.enabled = true;
+
+	xe_irq_postinstall(xe);
+
+	err = drmm_add_action_or_reset(&xe->drm, irq_uninstall, xe);
+	if (err)
+		goto free_irq_handler;
+
+	return 0;
+
+free_irq_handler:
+	free_irq(irq, xe);
+
+	return err;
+}
+
+void xe_irq_shutdown(struct xe_device *xe)
+{
+	irq_uninstall(&xe->drm, xe);
+}
+
+void xe_irq_suspend(struct xe_device *xe)
+{
+	int irq = to_pci_dev(xe->drm.dev)->irq;
+
+	spin_lock_irq(&xe->irq.lock);
+	xe->irq.enabled = false; /* no new irqs */
+	spin_unlock_irq(&xe->irq.lock);
+
+	synchronize_irq(irq); /* flush irqs */
+	xe_irq_reset(xe); /* turn irqs off */
+}
+
+void xe_irq_resume(struct xe_device *xe)
+{
+	struct xe_gt *gt;
+	int id;
+
+	/*
+	 * lock not needed:
+	 * 1. no irq will arrive before the postinstall
+	 * 2. display is not yet resumed
+	 */
+	xe->irq.enabled = true;
+	xe_irq_reset(xe);
+	xe_irq_postinstall(xe); /* turn irqs on */
+
+	for_each_gt(gt, xe, id)
+		xe_irq_enable_hwe(gt);
+}
diff --git a/drivers/gpu/drm/xe/xe_irq.h b/drivers/gpu/drm/xe/xe_irq.h
new file mode 100644
index 000000000000..bc42bc90d967
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_irq.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_IRQ_H_
+#define _XE_IRQ_H_
+
+struct xe_device;
+struct xe_tile;
+struct xe_gt;
+
+int xe_irq_install(struct xe_device *xe);
+void xe_irq_shutdown(struct xe_device *xe);
+void xe_irq_suspend(struct xe_device *xe);
+void xe_irq_resume(struct xe_device *xe);
+void xe_irq_enable_hwe(struct xe_gt *gt);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c
new file mode 100644
index 000000000000..0d7c5514e092
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_lmtt.c
@@ -0,0 +1,506 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/align.h>
+
+#include <drm/drm_managed.h>
+
+#include "regs/xe_sriov_regs.h"
+
+#include "xe_assert.h"
+#include "xe_bo.h"
+#include "xe_lmtt.h"
+#include "xe_map.h"
+#include "xe_mmio.h"
+#include "xe_res_cursor.h"
+#include "xe_sriov.h"
+#include "xe_sriov_printk.h"
+
+/**
+ * DOC: Local Memory Translation Table
+ *
+ * The Local Memory Translation Table (LMTT) provides additional abstraction
+ * when Virtual Function (VF) is accessing device Local Memory (VRAM).
+ *
+ * The Root LMTT Page Directory contains one entry for each VF. Entries are
+ * indexed by the function number (1-based, index 0 is unused).
+ *
+ * See `Two-Level LMTT Structure`_ and `Multi-Level LMTT Structure`_.
+ */
+
+#define lmtt_assert(lmtt, condition)	xe_tile_assert(lmtt_to_tile(lmtt), condition)
+#define lmtt_debug(lmtt, msg...)	xe_sriov_dbg_verbose(lmtt_to_xe(lmtt), "LMTT: " msg)
+
+static bool xe_has_multi_level_lmtt(struct xe_device *xe)
+{
+	return xe->info.platform == XE_PVC;
+}
+
+static struct xe_tile *lmtt_to_tile(struct xe_lmtt *lmtt)
+{
+	return container_of(lmtt, struct xe_tile, sriov.pf.lmtt);
+}
+
+static struct xe_device *lmtt_to_xe(struct xe_lmtt *lmtt)
+{
+	return tile_to_xe(lmtt_to_tile(lmtt));
+}
+
+static u64 lmtt_page_size(struct xe_lmtt *lmtt)
+{
+	return BIT_ULL(lmtt->ops->lmtt_pte_shift(0));
+}
+
+static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level)
+{
+	unsigned int num_entries = level ? lmtt->ops->lmtt_pte_num(level) : 0;
+	struct xe_lmtt_pt *pt;
+	struct xe_bo *bo;
+	int err;
+
+	pt = kzalloc(struct_size(pt, entries, num_entries), GFP_KERNEL);
+	if (!pt) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	bo = xe_bo_create_pin_map(lmtt_to_xe(lmtt), lmtt_to_tile(lmtt), NULL,
+				  PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) *
+					     lmtt->ops->lmtt_pte_num(level)),
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_VRAM_IF_DGFX(lmtt_to_tile(lmtt)) |
+				  XE_BO_CREATE_PINNED_BIT);
+	if (IS_ERR(bo)) {
+		err = PTR_ERR(bo);
+		goto out_free_pt;
+	}
+
+	lmtt_assert(lmtt, xe_bo_is_vram(bo));
+
+	pt->level = level;
+	pt->bo = bo;
+	return pt;
+
+out_free_pt:
+	kfree(pt);
+out:
+	return ERR_PTR(err);
+}
+
+static void lmtt_pt_free(struct xe_lmtt_pt *pt)
+{
+	xe_bo_unpin_map_no_vm(pt->bo);
+	kfree(pt);
+}
+
+static int lmtt_init_pd(struct xe_lmtt *lmtt)
+{
+	struct xe_lmtt_pt *pd;
+
+	lmtt_assert(lmtt, !lmtt->pd);
+	lmtt_assert(lmtt, lmtt->ops->lmtt_root_pd_level());
+
+	pd = lmtt_pt_alloc(lmtt, lmtt->ops->lmtt_root_pd_level());
+	if (IS_ERR(pd))
+		return PTR_ERR(pd);
+
+	lmtt->pd = pd;
+	return 0;
+}
+
+static void lmtt_fini_pd(struct xe_lmtt *lmtt)
+{
+	struct xe_lmtt_pt *pd = lmtt->pd;
+	unsigned int num_entries = lmtt->ops->lmtt_pte_num(pd->level);
+	unsigned int n = 0;
+
+	/* make sure we don't leak */
+	for (n = 0; n < num_entries; n++)
+		lmtt_assert(lmtt, !pd->entries[n]);
+
+	lmtt->pd = NULL;
+	lmtt_pt_free(pd);
+}
+
+static void fini_lmtt(struct drm_device *drm, void *arg)
+{
+	struct xe_lmtt *lmtt = arg;
+
+	lmtt_assert(lmtt, !(!!lmtt->ops ^ !!lmtt->pd));
+
+	if (!lmtt->pd)
+		return;
+
+	lmtt_fini_pd(lmtt);
+	lmtt->ops = NULL;
+}
+
+/**
+ * xe_lmtt_init - LMTT software initialization.
+ * @lmtt: the &xe_lmtt to initialize
+ *
+ * The LMTT initialization requires two steps.
+ *
+ * The xe_lmtt_init() checks if LMTT is required on current device and selects
+ * and initialize proper variant of the LMTT Root Directory. Currently supported
+ * variants are `Two-Level LMTT Structure`_ and `Multi-Level LMTT Structure`_.
+ *
+ * In next step xe_lmtt_init_hw() will register this directory on the hardware.
+ *
+ * Notes:
+ * The LMTT allocations are managed and will be implicitly released on driver unload.
+ * This function shall be called only once and only when running as a PF driver.
+ * Any LMTT initialization failure should block VFs enabling.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_lmtt_init(struct xe_lmtt *lmtt)
+{
+	struct xe_device *xe = lmtt_to_xe(lmtt);
+	int err;
+
+	lmtt_assert(lmtt, IS_SRIOV_PF(xe));
+	lmtt_assert(lmtt, !lmtt->ops);
+
+	if (!IS_DGFX(xe))
+		return 0;
+
+	if (xe_has_multi_level_lmtt(xe))
+		lmtt->ops = &lmtt_ml_ops;
+	else
+		lmtt->ops = &lmtt_2l_ops;
+
+	err = lmtt_init_pd(lmtt);
+	if (unlikely(err))
+		goto fail;
+
+	return drmm_add_action_or_reset(&xe->drm, fini_lmtt, lmtt);
+
+fail:
+	lmtt->ops = NULL;
+	return err;
+}
+
+static void lmtt_setup_dir_ptr(struct xe_lmtt *lmtt)
+{
+	struct xe_tile *tile = lmtt_to_tile(lmtt);
+	struct xe_device *xe = tile_to_xe(tile);
+	dma_addr_t offset = xe_bo_main_addr(lmtt->pd->bo, XE_PAGE_SIZE);
+
+	lmtt_debug(lmtt, "DIR offset %pad\n", &offset);
+	lmtt_assert(lmtt, xe_bo_is_vram(lmtt->pd->bo));
+	lmtt_assert(lmtt, IS_ALIGNED(offset, SZ_64K));
+
+	xe_mmio_write32(tile->primary_gt,
+			GRAPHICS_VER(xe) >= 20 ? XE2_LMEM_CFG : LMEM_CFG,
+			LMEM_EN | REG_FIELD_PREP(LMTT_DIR_PTR, offset / SZ_64K));
+}
+
+/**
+ * xe_lmtt_init_hw - Perform LMTT hardware initialization.
+ * @lmtt: the &xe_lmtt to initialize
+ *
+ * This function is a second step of the LMTT initialization.
+ * This function registers LMTT Root Directory prepared in xe_lmtt_init().
+ *
+ * This function shall be called after every hardware reset.
+ * This function shall be called only when running as a PF driver.
+ */
+void xe_lmtt_init_hw(struct xe_lmtt *lmtt)
+{
+	if (!lmtt->pd)
+		return;
+
+	lmtt_setup_dir_ptr(lmtt);
+}
+
+static void lmtt_write_pte(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pt,
+			   u64 pte, unsigned int idx)
+{
+	unsigned int level = pt->level;
+
+	lmtt_assert(lmtt, idx <= lmtt->ops->lmtt_pte_num(level));
+	lmtt_debug(lmtt, "WRITE level=%u index=%u pte=%#llx\n", level, idx, pte);
+
+	switch (lmtt->ops->lmtt_pte_size(level)) {
+	case sizeof(u32):
+		xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u32), u32, pte);
+		break;
+	case sizeof(u64):
+		xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u64), u64, pte);
+		break;
+	default:
+		lmtt_assert(lmtt, !!!"invalid pte size");
+	}
+}
+
+static void lmtt_destroy_pt(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pd)
+{
+	unsigned int num_entries = pd->level ? lmtt->ops->lmtt_pte_num(pd->level) : 0;
+	struct xe_lmtt_pt *pt;
+	unsigned int i;
+
+	for (i = 0; i < num_entries; i++) {
+		pt = pd->entries[i];
+		pd->entries[i] = NULL;
+		if (!pt)
+			continue;
+
+		lmtt_destroy_pt(lmtt, pt);
+	}
+
+	lmtt_pt_free(pd);
+}
+
+static void lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid)
+{
+	struct xe_lmtt_pt *pd = lmtt->pd;
+	struct xe_lmtt_pt *pt;
+
+	pt = pd->entries[vfid];
+	pd->entries[vfid] = NULL;
+	if (!pt)
+		return;
+
+	lmtt_write_pte(lmtt, pd, LMTT_PTE_INVALID, vfid);
+
+	lmtt_assert(lmtt, pd->level > 0);
+	lmtt_assert(lmtt, pt->level == pd->level - 1);
+	lmtt_destroy_pt(lmtt, pt);
+}
+
+static int __lmtt_alloc_range(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pd,
+			      u64 start, u64 end)
+{
+	u64 pte_addr_shift = BIT_ULL(lmtt->ops->lmtt_pte_shift(pd->level));
+	u64 offset;
+	int err;
+
+	lmtt_assert(lmtt, pd->level > 0);
+
+	offset = start;
+	while (offset < end) {
+		struct xe_lmtt_pt *pt;
+		u64 next, pde, pt_addr;
+		unsigned int idx;
+
+		pt = lmtt_pt_alloc(lmtt, pd->level - 1);
+		if (IS_ERR(pt))
+			return PTR_ERR(pt);
+
+		pt_addr = xe_bo_main_addr(pt->bo, XE_PAGE_SIZE);
+
+		idx = lmtt->ops->lmtt_pte_index(offset, pd->level);
+		pde = lmtt->ops->lmtt_pte_encode(pt_addr, pd->level);
+
+		lmtt_write_pte(lmtt, pd, pde, idx);
+
+		pd->entries[idx] = pt;
+
+		next = min(end, round_up(offset + 1, pte_addr_shift));
+
+		if (pt->level != 0) {
+			err = __lmtt_alloc_range(lmtt, pt, offset, next);
+			if (err)
+				return err;
+		}
+
+		offset = next;
+	}
+
+	return 0;
+}
+
+static int lmtt_alloc_range(struct xe_lmtt *lmtt, unsigned int vfid, u64 start, u64 end)
+{
+	struct xe_lmtt_pt *pd = lmtt->pd;
+	struct xe_lmtt_pt *pt;
+	u64 pt_addr;
+	u64 pde;
+	int err;
+
+	lmtt_assert(lmtt, pd->level > 0);
+	lmtt_assert(lmtt, vfid <= lmtt->ops->lmtt_pte_num(pd->level));
+	lmtt_assert(lmtt, IS_ALIGNED(start, lmtt_page_size(lmtt)));
+	lmtt_assert(lmtt, IS_ALIGNED(end, lmtt_page_size(lmtt)));
+
+	if (pd->entries[vfid])
+		return -ENOTEMPTY;
+
+	pt = lmtt_pt_alloc(lmtt, pd->level - 1);
+	if (IS_ERR(pt))
+		return PTR_ERR(pt);
+
+	pt_addr = xe_bo_main_addr(pt->bo, XE_PAGE_SIZE);
+
+	pde = lmtt->ops->lmtt_pte_encode(pt_addr, pd->level);
+
+	lmtt_write_pte(lmtt, pd, pde, vfid);
+
+	pd->entries[vfid] = pt;
+
+	if (pt->level != 0) {
+		err = __lmtt_alloc_range(lmtt, pt, start, end);
+		if (err)
+			goto out_free_pt;
+	}
+
+	return 0;
+
+out_free_pt:
+	lmtt_pt_free(pt);
+	return err;
+}
+
+static struct xe_lmtt_pt *lmtt_leaf_pt(struct xe_lmtt *lmtt, unsigned int vfid, u64 addr)
+{
+	struct xe_lmtt_pt *pd = lmtt->pd;
+	struct xe_lmtt_pt *pt;
+
+	lmtt_assert(lmtt, vfid <= lmtt->ops->lmtt_pte_num(pd->level));
+	pt = pd->entries[vfid];
+
+	while (pt->level) {
+		lmtt_assert(lmtt, lmtt->ops->lmtt_pte_index(addr, pt->level) <=
+			    lmtt->ops->lmtt_pte_num(pt->level));
+
+		pt = pt->entries[lmtt->ops->lmtt_pte_index(addr, pt->level)];
+
+		addr >>= lmtt->ops->lmtt_pte_shift(pt->level);
+	}
+
+	lmtt_assert(lmtt, lmtt->ops->lmtt_pte_index(addr, pt->level) <=
+		    lmtt->ops->lmtt_pte_num(pt->level));
+	lmtt_assert(lmtt, pt->level != pd->level);
+	lmtt_assert(lmtt, pt->level == 0);
+	return pt;
+}
+
+static void lmtt_insert_bo(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo *bo, u64 start)
+{
+	u64 page_size = lmtt_page_size(lmtt);
+	struct xe_res_cursor cur;
+	struct xe_lmtt_pt *pt;
+	u64 addr, vram_offset;
+
+	lmtt_assert(lmtt, IS_ALIGNED(start, page_size));
+	lmtt_assert(lmtt, IS_ALIGNED(bo->size, page_size));
+	lmtt_assert(lmtt, xe_bo_is_vram(bo));
+
+	vram_offset = vram_region_gpu_offset(bo->ttm.resource);
+	xe_res_first(bo->ttm.resource, 0, bo->size, &cur);
+	while (cur.remaining) {
+		addr = xe_res_dma(&cur);
+		addr += vram_offset; /* XXX */
+
+		pt = lmtt_leaf_pt(lmtt, vfid, start);
+
+		lmtt_write_pte(lmtt, pt, lmtt->ops->lmtt_pte_encode(addr, 0),
+					 lmtt->ops->lmtt_pte_index(start, 0));
+
+		xe_res_next(&cur, page_size);
+		start += page_size;
+	}
+}
+
+/**
+ * xe_lmtt_prepare_pages - Create VF's LMTT Page Tables.
+ * @lmtt: the &xe_lmtt to update
+ * @vfid: the VF identifier (1-based)
+ * @range: top range of LMEM offset to be supported
+ *
+ * This function creates empty LMTT page tables for given VF to support
+ * up to maximum #range LMEM offset. The LMTT page tables created by this
+ * function must be released using xe_lmtt_drop_pages() function.
+ *
+ * Notes:
+ * This function shall be called only after successful LMTT initialization.
+ * See xe_lmtt_init().
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_lmtt_prepare_pages(struct xe_lmtt *lmtt, unsigned int vfid, u64 range)
+{
+	lmtt_assert(lmtt, lmtt->pd);
+	lmtt_assert(lmtt, vfid);
+
+	return lmtt_alloc_range(lmtt, vfid, 0, range);
+}
+
+/**
+ * xe_lmtt_populate_pages - Update VF's LMTT Page Table Entries.
+ * @lmtt: the &xe_lmtt to update
+ * @vfid: the VF identifier (1-based)
+ * @bo: the buffer object with LMEM allocation to be mapped
+ * @offset: the offset at which #bo should be mapped
+ *
+ * This function updates VF's LMTT entries to use given buffer object as a backstore.
+ *
+ * Notes:
+ * This function shall be called only after successful preparation of the
+ * VF's LMTT Page Tables. See xe_lmtt_prepare().
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_lmtt_populate_pages(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo *bo, u64 offset)
+{
+	lmtt_assert(lmtt, lmtt->pd);
+	lmtt_assert(lmtt, vfid);
+
+	lmtt_insert_bo(lmtt, vfid, bo, offset);
+	return 0;
+}
+
+/**
+ * xe_lmtt_drop_pages - Remove VF's LMTT Pages.
+ * @lmtt: the &xe_lmtt to update
+ * @vfid: the VF identifier (1-based)
+ *
+ * This function removes all LMTT Page Tables prepared by xe_lmtt_prepare_pages().
+ *
+ * This function shall be called only after successful LMTT initialization.
+ * See xe_lmtt_init().
+ */
+void xe_lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid)
+{
+	lmtt_assert(lmtt, lmtt->pd);
+	lmtt_assert(lmtt, vfid);
+
+	lmtt_drop_pages(lmtt, vfid);
+}
+
+/**
+ * xe_lmtt_estimate_pt_size - Estimate size of LMTT PT allocations.
+ * @lmtt: the &xe_lmtt
+ * @size: the size of the LMEM to be mapped over LMTT (including any offset)
+ *
+ * This function shall be called only by PF.
+ *
+ * Return: size of the PT allocation(s) needed to support given LMEM size.
+ */
+u64 xe_lmtt_estimate_pt_size(struct xe_lmtt *lmtt, u64 size)
+{
+	unsigned int level = 0;
+	u64 pt_size;
+
+	lmtt_assert(lmtt, IS_SRIOV_PF(lmtt_to_xe(lmtt)));
+	lmtt_assert(lmtt, IS_DGFX(lmtt_to_xe(lmtt)));
+	lmtt_assert(lmtt, lmtt->ops);
+
+	pt_size = PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) *
+			     lmtt->ops->lmtt_pte_num(level));
+
+	while (++level < lmtt->ops->lmtt_root_pd_level()) {
+		pt_size *= lmtt->ops->lmtt_pte_index(size, level) + 1;
+		pt_size += PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) *
+				      lmtt->ops->lmtt_pte_num(level));
+	}
+
+	return pt_size;
+}
+
+#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_lmtt_test.c"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_lmtt.h b/drivers/gpu/drm/xe/xe_lmtt.h
new file mode 100644
index 000000000000..cb10ef994db6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_lmtt.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_LMTT_H_
+#define _XE_LMTT_H_
+
+#include <linux/types.h>
+
+struct xe_bo;
+struct xe_lmtt;
+struct xe_lmtt_ops;
+
+#ifdef CONFIG_PCI_IOV
+int xe_lmtt_init(struct xe_lmtt *lmtt);
+void xe_lmtt_init_hw(struct xe_lmtt *lmtt);
+int xe_lmtt_prepare_pages(struct xe_lmtt *lmtt, unsigned int vfid, u64 range);
+int xe_lmtt_populate_pages(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo *bo, u64 offset);
+void xe_lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid);
+u64 xe_lmtt_estimate_pt_size(struct xe_lmtt *lmtt, u64 size);
+#else
+static inline int xe_lmtt_init(struct xe_lmtt *lmtt) { return 0; }
+static inline void xe_lmtt_init_hw(struct xe_lmtt *lmtt) { }
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_lmtt_2l.c b/drivers/gpu/drm/xe/xe_lmtt_2l.c
new file mode 100644
index 000000000000..84bc5c4212b5
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_lmtt_2l.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/align.h>
+#include <linux/bitfield.h>
+#include <linux/log2.h>
+#include <linux/sizes.h>
+
+#include "xe_lmtt_types.h"
+#include "xe_macros.h"
+
+/**
+ * DOC: Two-Level LMTT Structure
+ *
+ * LMHAW (Local Memory Host Address Width) is 37 bit (128GB)
+ *
+ * LMGAW (Local Memory Guest Address Width) is 37 bit (128GB)
+ *
+ * The following figure illustrates the structure and function of the 2L LMTT::
+ *
+ *            LMTT Directory
+ *           (1 Entry per VF)
+ *            +-----------+                     LMTT (per VF)
+ *            |           |                     +-----------+
+ *            |           |                     |           |
+ *            |           |          index:     |           |
+ *            |           |          LMEM VF    +===========+
+ *            |           |          offset --> |    PTE    | ==> LMEM PF offset
+ *            |           |                     +===========+
+ *   index:   +===========+                     |           |
+ *   VFID --> |    PDE    |  -----------------> +-----------+
+ *            +===========+                    /              \.
+ *            |           |                   /                 \.
+ *            |           |                  /                    \.
+ *            |           |                 /                       \.
+ *            +-----------+ <== [LMTT Directory Ptr]                  \.
+ *           /             \              /                             \.
+ *          /               \         +-----------+-----------------+------+---+
+ *         /                 \        | 31:HAW-16 |        HAW-17:5 |  4:1 | 0 |
+ *        /                   \       +===========+=================+======+===+
+ *       /                     \      |  Reserved | LMEM Page (2MB) | Rsvd | V |
+ *      /                       \     +-----------+-----------------+------+---+
+ *     /                         \.
+ *   +-----------+-----------------+------+---+
+ *   | 31:HAW-12 |        HAW-13:4 |  3:1 | 0 |
+ *   +===========+=================+======+===+
+ *   |  Reserved | LMTT Ptr (64KB) | Rsvd | V |
+ *   +-----------+-----------------+------+---+
+ *
+ */
+
+typedef u32 lmtt_2l_pde_t;
+typedef u32 lmtt_2l_pte_t;
+
+#if IS_ENABLED(CONFIG_DRM_XE_LMTT_2L_128GB)
+#define LMTT_2L_HAW			37 /* 128 GiB */
+#else
+#define LMTT_2L_HAW			35 /* 32 GiB */
+#endif
+
+#define LMTT_2L_PDE_MAX_NUM		64 /* SRIOV with PF and 63 VFs, index 0 (PF) is unused */
+#define LMTT_2L_PDE_LMTT_PTR		GENMASK(LMTT_2L_HAW - 13, 4)
+#define LMTT_2L_PDE_VALID		BIT(0)
+
+#define LMTT_2L_PTE_MAX_NUM		BIT(LMTT_2L_HAW - ilog2(SZ_2M))
+#define LMTT_2L_PTE_LMEM_PAGE		GENMASK(LMTT_2L_HAW - 17, 5)
+#define LMTT_2L_PTE_VALID		BIT(0)
+
+static unsigned int lmtt_2l_root_pd_level(void)
+{
+	return 1; /* implementation is 0-based */
+}
+
+static unsigned int lmtt_2l_pte_num(unsigned int level)
+{
+	switch (level) {
+	case 1:
+		return LMTT_2L_PDE_MAX_NUM;
+	case 0:
+		BUILD_BUG_ON(LMTT_2L_HAW == 37 && LMTT_2L_PTE_MAX_NUM != SZ_64K);
+		BUILD_BUG_ON(LMTT_2L_HAW == 35 && LMTT_2L_PTE_MAX_NUM != SZ_16K);
+		return LMTT_2L_PTE_MAX_NUM;
+	default:
+		return 0;
+	}
+}
+
+static unsigned int lmtt_2l_pte_size(unsigned int level)
+{
+	switch (level) {
+	case 1:
+		return sizeof(lmtt_2l_pde_t);
+	case 0:
+		return sizeof(lmtt_2l_pte_t);
+	default:
+		return 0;
+	}
+}
+
+static unsigned int lmtt_2l_pte_shift(unsigned int level)
+{
+	switch (level) {
+	case 0:
+		return ilog2(SZ_2M);
+	default:
+		return 0;
+	}
+}
+
+static unsigned int lmtt_2l_pte_index(u64 addr, unsigned int level)
+{
+	addr >>= lmtt_2l_pte_shift(level);
+
+	switch (level) {
+	case 0:
+		/* SZ_2M increments */
+		BUILD_BUG_ON_NOT_POWER_OF_2(LMTT_2L_PTE_MAX_NUM);
+		return addr & (LMTT_2L_PTE_MAX_NUM - 1);
+	default:
+		return 0;
+	}
+}
+
+static u64 lmtt_2l_pte_encode(unsigned long offset, unsigned int level)
+{
+	switch (level) {
+	case 0:
+		XE_WARN_ON(!IS_ALIGNED(offset, SZ_2M));
+		XE_WARN_ON(!FIELD_FIT(LMTT_2L_PTE_LMEM_PAGE, offset / SZ_2M));
+		return FIELD_PREP(LMTT_2L_PTE_LMEM_PAGE, offset / SZ_2M) | LMTT_2L_PTE_VALID;
+	case 1:
+		XE_WARN_ON(!IS_ALIGNED(offset, SZ_64K));
+		XE_WARN_ON(!FIELD_FIT(LMTT_2L_PDE_LMTT_PTR, offset / SZ_64K));
+		return FIELD_PREP(LMTT_2L_PDE_LMTT_PTR, offset / SZ_64K) | LMTT_2L_PDE_VALID;
+	default:
+		XE_WARN_ON(true);
+		return 0;
+	}
+}
+
+const struct xe_lmtt_ops lmtt_2l_ops = {
+	.lmtt_root_pd_level = lmtt_2l_root_pd_level,
+	.lmtt_pte_num = lmtt_2l_pte_num,
+	.lmtt_pte_size = lmtt_2l_pte_size,
+	.lmtt_pte_shift = lmtt_2l_pte_shift,
+	.lmtt_pte_index = lmtt_2l_pte_index,
+	.lmtt_pte_encode = lmtt_2l_pte_encode,
+};
diff --git a/drivers/gpu/drm/xe/xe_lmtt_ml.c b/drivers/gpu/drm/xe/xe_lmtt_ml.c
new file mode 100644
index 000000000000..b21215a2edd6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_lmtt_ml.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/align.h>
+#include <linux/bitfield.h>
+#include <linux/log2.h>
+#include <linux/sizes.h>
+
+#include "xe_lmtt_types.h"
+#include "xe_macros.h"
+
+/**
+ * DOC: Multi-Level LMTT Structure
+ *
+ * LMHAW (Local Memory Host Address Width) is 48 bit (256TB)
+ *
+ * LMGAW (Local Memory Guest Address Width) is 48 bit (256TB)
+ *
+ * The following figure illustrates the structure and function of the ML LMTT::
+ *
+ *           LMTT L3 Directory
+ *           (1 Entry per VF)                                       LMTT L1 Leaf
+ *            +-----------+                                         +-----------+
+ *            |           |             LMTT L2 (per VF)            |           |
+ *            |           |              +-----------+              |           |
+ *            |           |              |           |     index:   +===========+
+ *            |           |              |           |     GDPA --> |    PTE    | => LMEM PF offset
+ *            |           |              |           |     34:21    +===========+
+ *            |           |    index:    |           |              |           |
+ *            |           |    LMEM VF   +===========+              |           |
+ *            |           |    offset -> |    PTE    |  ----------> +-----------+
+ *            |           |    GAW-1:35  +===========+              /           \.
+ *   index:   +===========+              |           |             /              \.
+ *   VFID --> |    PDE    |  --------->  +-----------+            /                 \.
+ *            +===========+             /           /            /                    \.
+ *            |           |           /            /            /                       \.
+ *            +-----------+  <== [LMTT Directory Ptr]          /                          \.
+ *           /             \      /              /            /                             \.
+ *          /                \  /               /       +-----------+-----------------+------+---+
+ *         /                  /\               /        | 31:HAW-16 |        HAW-17:5 |  4:1 | 0 |
+ *        /                 /    \            /         +===========+=================+======+===+
+ *       /                /        \         /          |  Reserved | LMEM Page (2MB) | Rsvd | V |
+ *      /                                   /           +-----------+-----------------+------+---+
+ *     /                                   /
+ *  +-----------+-----------------+------+---+
+ *  | 63:HAW-12 |        HAW-13:4 |  3:1 | 0 |
+ *  +===========+=================+======+===+
+ *  |  Reserved | LMTT Ptr (64KB) | Rsvd | V |
+ *  +-----------+-----------------+------+---+
+ *
+ */
+
+typedef u64 lmtt_ml_pde_t;
+typedef u32 lmtt_ml_pte_t;
+
+#define LMTT_ML_HAW			48 /* 256 TiB */
+
+#define LMTT_ML_PDE_MAX_NUM		64 /* SRIOV with PF and 63 VFs, index 0 (PF) is unused */
+#define LMTT_ML_PDE_LMTT_PTR		GENMASK_ULL(LMTT_ML_HAW - 13, 4)
+#define LMTT_ML_PDE_VALID		BIT(0)
+
+#define LMTT_ML_PDE_L2_SHIFT		35
+#define LMTT_ML_PDE_L2_MAX_NUM		BIT_ULL(LMTT_ML_HAW - 35)
+
+#define LMTT_ML_PTE_MAX_NUM		BIT(35 - ilog2(SZ_2M))
+#define LMTT_ML_PTE_LMEM_PAGE		GENMASK(LMTT_ML_HAW - 17, 5)
+#define LMTT_ML_PTE_VALID		BIT(0)
+
+static unsigned int lmtt_ml_root_pd_level(void)
+{
+	return 2; /* implementation is 0-based */
+}
+
+static unsigned int lmtt_ml_pte_num(unsigned int level)
+{
+	switch (level) {
+	case 2:
+		return LMTT_ML_PDE_MAX_NUM;
+	case 1:
+		BUILD_BUG_ON(LMTT_ML_HAW == 48 && LMTT_ML_PDE_L2_MAX_NUM != SZ_8K);
+		return LMTT_ML_PDE_L2_MAX_NUM;
+	case 0:
+		BUILD_BUG_ON(LMTT_ML_PTE_MAX_NUM != SZ_16K);
+		return LMTT_ML_PTE_MAX_NUM;
+	default:
+		return 0;
+	}
+}
+
+static unsigned int lmtt_ml_pte_size(unsigned int level)
+{
+	switch (level) {
+	case 2:
+	case 1:
+		return sizeof(lmtt_ml_pde_t);
+	case 0:
+		return sizeof(lmtt_ml_pte_t);
+	default:
+		return 0;
+	}
+}
+
+static unsigned int lmtt_ml_pte_shift(unsigned int level)
+{
+	switch (level) {
+	case 1:
+		BUILD_BUG_ON(BIT_ULL(LMTT_ML_PDE_L2_SHIFT) != SZ_32G);
+		return ilog2(SZ_32G);
+	case 0:
+		return ilog2(SZ_2M);
+	default:
+		return 0;
+	}
+}
+
+static unsigned int lmtt_ml_pte_index(u64 addr, unsigned int level)
+{
+	addr >>= lmtt_ml_pte_shift(level);
+
+	switch (level) {
+	case 1:
+		/* SZ_32G increments */
+		BUILD_BUG_ON_NOT_POWER_OF_2(LMTT_ML_PDE_L2_MAX_NUM);
+		return addr & (LMTT_ML_PDE_L2_MAX_NUM - 1);
+	case 0:
+		/* SZ_2M increments */
+		BUILD_BUG_ON_NOT_POWER_OF_2(LMTT_ML_PTE_MAX_NUM);
+		return addr & (LMTT_ML_PTE_MAX_NUM - 1);
+	default:
+		return 0;
+	}
+}
+
+static u64 lmtt_ml_pte_encode(unsigned long offset, unsigned int level)
+{
+	switch (level) {
+	case 0:
+		XE_WARN_ON(!IS_ALIGNED(offset, SZ_2M));
+		XE_WARN_ON(!FIELD_FIT(LMTT_ML_PTE_LMEM_PAGE, offset / SZ_2M));
+		return FIELD_PREP(LMTT_ML_PTE_LMEM_PAGE, offset / SZ_2M) | LMTT_ML_PTE_VALID;
+	case 1:
+	case 2:
+		XE_WARN_ON(!IS_ALIGNED(offset, SZ_64K));
+		XE_WARN_ON(!FIELD_FIT(LMTT_ML_PDE_LMTT_PTR, offset / SZ_64K));
+		return FIELD_PREP(LMTT_ML_PDE_LMTT_PTR, offset / SZ_64K) | LMTT_ML_PDE_VALID;
+	default:
+		XE_WARN_ON(true);
+		return 0;
+	}
+}
+
+const struct xe_lmtt_ops lmtt_ml_ops = {
+	.lmtt_root_pd_level = lmtt_ml_root_pd_level,
+	.lmtt_pte_num = lmtt_ml_pte_num,
+	.lmtt_pte_size = lmtt_ml_pte_size,
+	.lmtt_pte_shift = lmtt_ml_pte_shift,
+	.lmtt_pte_index = lmtt_ml_pte_index,
+	.lmtt_pte_encode = lmtt_ml_pte_encode,
+};
diff --git a/drivers/gpu/drm/xe/xe_lmtt_types.h b/drivers/gpu/drm/xe/xe_lmtt_types.h
new file mode 100644
index 000000000000..b37abad23416
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_lmtt_types.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_LMTT_TYPES_H_
+#define _XE_LMTT_TYPES_H_
+
+#include <linux/types.h>
+
+struct xe_bo;
+struct xe_lmtt;
+struct xe_lmtt_pt;
+struct xe_lmtt_ops;
+
+#define LMTT_PTE_INVALID	ULL(0)
+
+/**
+ * struct xe_lmtt - Local Memory Translation Table Manager
+ */
+struct xe_lmtt {
+	/** @pd: root LMTT Directory */
+	struct xe_lmtt_pt *pd;
+
+	/** @ops: LMTT functions */
+	const struct xe_lmtt_ops *ops;
+};
+
+/**
+ * struct xe_lmtt_pt - Local Memory Translation Table Page Table
+ *
+ * Represents single level of the LMTT.
+ */
+struct xe_lmtt_pt {
+	/** @level: page table level, 0 is leaf */
+	unsigned int level;
+
+	/** @bo: buffer object with actual LMTT PTE values */
+	struct xe_bo *bo;
+
+	/** @entries: leaf page tables, exist only for root/non-leaf */
+	struct xe_lmtt_pt *entries[];
+};
+
+/**
+ * struct xe_lmtt_ops - Local Memory Translation Table Operations
+ *
+ * Provides abstraction of the LMTT variants.
+ */
+struct xe_lmtt_ops {
+	/* private: */
+	unsigned int (*lmtt_root_pd_level)(void);
+	unsigned int (*lmtt_pte_num)(unsigned int level);
+	unsigned int (*lmtt_pte_size)(unsigned int level);
+	unsigned int (*lmtt_pte_shift)(unsigned int level);
+	unsigned int (*lmtt_pte_index)(u64 addr, unsigned int level);
+	u64 (*lmtt_pte_encode)(unsigned long offset, unsigned int level);
+};
+
+extern const struct xe_lmtt_ops lmtt_2l_ops;
+extern const struct xe_lmtt_ops lmtt_ml_ops;
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
new file mode 100644
index 000000000000..b38319d2801e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -0,0 +1,1264 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_lrc.h"
+
+#include "instructions/xe_mi_commands.h"
+#include "instructions/xe_gfxpipe_commands.h"
+#include "regs/xe_engine_regs.h"
+#include "regs/xe_gpu_commands.h"
+#include "regs/xe_lrc_layout.h"
+#include "xe_bb.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_drm_client.h"
+#include "xe_exec_queue_types.h"
+#include "xe_gt.h"
+#include "xe_gt_printk.h"
+#include "xe_hw_fence.h"
+#include "xe_map.h"
+#include "xe_vm.h"
+
+#define LRC_VALID				(1 << 0)
+#define LRC_PRIVILEGE				(1 << 8)
+#define LRC_ADDRESSING_MODE_SHIFT		3
+#define LRC_LEGACY_64B_CONTEXT			3
+
+#define ENGINE_CLASS_SHIFT			61
+#define ENGINE_INSTANCE_SHIFT			48
+
+static struct xe_device *
+lrc_to_xe(struct xe_lrc *lrc)
+{
+	return gt_to_xe(lrc->fence_ctx.gt);
+}
+
+size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class)
+{
+	switch (class) {
+	case XE_ENGINE_CLASS_RENDER:
+		if (GRAPHICS_VER(xe) >= 20)
+			return 4 * SZ_4K;
+		else
+			return 14 * SZ_4K;
+	case XE_ENGINE_CLASS_COMPUTE:
+		/* 14 pages since graphics_ver == 11 */
+		if (GRAPHICS_VER(xe) >= 20)
+			return 3 * SZ_4K;
+		else
+			return 14 * SZ_4K;
+	default:
+		WARN(1, "Unknown engine class: %d", class);
+		fallthrough;
+	case XE_ENGINE_CLASS_COPY:
+	case XE_ENGINE_CLASS_VIDEO_DECODE:
+	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+	case XE_ENGINE_CLASS_OTHER:
+		return 2 * SZ_4K;
+	}
+}
+
+/*
+ * The per-platform tables are u8-encoded in @data. Decode @data and set the
+ * addresses' offset and commands in @regs. The following encoding is used
+ * for each byte. There are 2 steps: decoding commands and decoding addresses.
+ *
+ * Commands:
+ * [7]: create NOPs - number of NOPs are set in lower bits
+ * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
+ *      MI_LRI_FORCE_POSTED
+ * [5:0]: Number of NOPs or registers to set values to in case of
+ *        MI_LOAD_REGISTER_IMM
+ *
+ * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
+ * number of registers. They are set by using the REG/REG16 macros: the former
+ * is used for offsets smaller than 0x200 while the latter is for values bigger
+ * than that. Those macros already set all the bits documented below correctly:
+ *
+ * [7]: When a register offset needs more than 6 bits, use additional bytes, to
+ *      follow, for the lower bits
+ * [6:0]: Register offset, without considering the engine base.
+ *
+ * This function only tweaks the commands and register offsets. Values are not
+ * filled out.
+ */
+static void set_offsets(u32 *regs,
+			const u8 *data,
+			const struct xe_hw_engine *hwe)
+#define NOP(x) (BIT(7) | (x))
+#define LRI(count, flags) ((flags) << 6 | (count) | \
+			   BUILD_BUG_ON_ZERO(count >= BIT(6)))
+#define POSTED BIT(0)
+#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
+#define REG16(x) \
+	(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
+	(((x) >> 2) & 0x7f)
+#define END 0
+{
+	const u32 base = hwe->mmio_base;
+
+	while (*data) {
+		u8 count, flags;
+
+		if (*data & BIT(7)) { /* skip */
+			count = *data++ & ~BIT(7);
+			regs += count;
+			continue;
+		}
+
+		count = *data & 0x3f;
+		flags = *data >> 6;
+		data++;
+
+		*regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
+		if (flags & POSTED)
+			*regs |= MI_LRI_FORCE_POSTED;
+		*regs |= MI_LRI_LRM_CS_MMIO;
+		regs++;
+
+		xe_gt_assert(hwe->gt, count);
+		do {
+			u32 offset = 0;
+			u8 v;
+
+			do {
+				v = *data++;
+				offset <<= 7;
+				offset |= v & ~BIT(7);
+			} while (v & BIT(7));
+
+			regs[0] = base + (offset << 2);
+			regs += 2;
+		} while (--count);
+	}
+
+	*regs = MI_BATCH_BUFFER_END | BIT(0);
+}
+
+static const u8 gen12_xcs_offsets[] = {
+	NOP(1),
+	LRI(13, POSTED),
+	REG16(0x244),
+	REG(0x034),
+	REG(0x030),
+	REG(0x038),
+	REG(0x03c),
+	REG(0x168),
+	REG(0x140),
+	REG(0x110),
+	REG(0x1c0),
+	REG(0x1c4),
+	REG(0x1c8),
+	REG(0x180),
+	REG16(0x2b4),
+
+	NOP(5),
+	LRI(9, POSTED),
+	REG16(0x3a8),
+	REG16(0x28c),
+	REG16(0x288),
+	REG16(0x284),
+	REG16(0x280),
+	REG16(0x27c),
+	REG16(0x278),
+	REG16(0x274),
+	REG16(0x270),
+
+	END
+};
+
+static const u8 dg2_xcs_offsets[] = {
+	NOP(1),
+	LRI(15, POSTED),
+	REG16(0x244),
+	REG(0x034),
+	REG(0x030),
+	REG(0x038),
+	REG(0x03c),
+	REG(0x168),
+	REG(0x140),
+	REG(0x110),
+	REG(0x1c0),
+	REG(0x1c4),
+	REG(0x1c8),
+	REG(0x180),
+	REG16(0x2b4),
+	REG(0x120),
+	REG(0x124),
+
+	NOP(1),
+	LRI(9, POSTED),
+	REG16(0x3a8),
+	REG16(0x28c),
+	REG16(0x288),
+	REG16(0x284),
+	REG16(0x280),
+	REG16(0x27c),
+	REG16(0x278),
+	REG16(0x274),
+	REG16(0x270),
+
+	END
+};
+
+static const u8 gen12_rcs_offsets[] = {
+	NOP(1),
+	LRI(13, POSTED),
+	REG16(0x244),
+	REG(0x034),
+	REG(0x030),
+	REG(0x038),
+	REG(0x03c),
+	REG(0x168),
+	REG(0x140),
+	REG(0x110),
+	REG(0x1c0),
+	REG(0x1c4),
+	REG(0x1c8),
+	REG(0x180),
+	REG16(0x2b4),
+
+	NOP(5),
+	LRI(9, POSTED),
+	REG16(0x3a8),
+	REG16(0x28c),
+	REG16(0x288),
+	REG16(0x284),
+	REG16(0x280),
+	REG16(0x27c),
+	REG16(0x278),
+	REG16(0x274),
+	REG16(0x270),
+
+	LRI(3, POSTED),
+	REG(0x1b0),
+	REG16(0x5a8),
+	REG16(0x5ac),
+
+	NOP(6),
+	LRI(1, 0),
+	REG(0x0c8),
+	NOP(3 + 9 + 1),
+
+	LRI(51, POSTED),
+	REG16(0x588),
+	REG16(0x588),
+	REG16(0x588),
+	REG16(0x588),
+	REG16(0x588),
+	REG16(0x588),
+	REG(0x028),
+	REG(0x09c),
+	REG(0x0c0),
+	REG(0x178),
+	REG(0x17c),
+	REG16(0x358),
+	REG(0x170),
+	REG(0x150),
+	REG(0x154),
+	REG(0x158),
+	REG16(0x41c),
+	REG16(0x600),
+	REG16(0x604),
+	REG16(0x608),
+	REG16(0x60c),
+	REG16(0x610),
+	REG16(0x614),
+	REG16(0x618),
+	REG16(0x61c),
+	REG16(0x620),
+	REG16(0x624),
+	REG16(0x628),
+	REG16(0x62c),
+	REG16(0x630),
+	REG16(0x634),
+	REG16(0x638),
+	REG16(0x63c),
+	REG16(0x640),
+	REG16(0x644),
+	REG16(0x648),
+	REG16(0x64c),
+	REG16(0x650),
+	REG16(0x654),
+	REG16(0x658),
+	REG16(0x65c),
+	REG16(0x660),
+	REG16(0x664),
+	REG16(0x668),
+	REG16(0x66c),
+	REG16(0x670),
+	REG16(0x674),
+	REG16(0x678),
+	REG16(0x67c),
+	REG(0x068),
+	REG(0x084),
+	NOP(1),
+
+	END
+};
+
+static const u8 xehp_rcs_offsets[] = {
+	NOP(1),
+	LRI(13, POSTED),
+	REG16(0x244),
+	REG(0x034),
+	REG(0x030),
+	REG(0x038),
+	REG(0x03c),
+	REG(0x168),
+	REG(0x140),
+	REG(0x110),
+	REG(0x1c0),
+	REG(0x1c4),
+	REG(0x1c8),
+	REG(0x180),
+	REG16(0x2b4),
+
+	NOP(5),
+	LRI(9, POSTED),
+	REG16(0x3a8),
+	REG16(0x28c),
+	REG16(0x288),
+	REG16(0x284),
+	REG16(0x280),
+	REG16(0x27c),
+	REG16(0x278),
+	REG16(0x274),
+	REG16(0x270),
+
+	LRI(3, POSTED),
+	REG(0x1b0),
+	REG16(0x5a8),
+	REG16(0x5ac),
+
+	NOP(6),
+	LRI(1, 0),
+	REG(0x0c8),
+
+	END
+};
+
+static const u8 dg2_rcs_offsets[] = {
+	NOP(1),
+	LRI(15, POSTED),
+	REG16(0x244),
+	REG(0x034),
+	REG(0x030),
+	REG(0x038),
+	REG(0x03c),
+	REG(0x168),
+	REG(0x140),
+	REG(0x110),
+	REG(0x1c0),
+	REG(0x1c4),
+	REG(0x1c8),
+	REG(0x180),
+	REG16(0x2b4),
+	REG(0x120),
+	REG(0x124),
+
+	NOP(1),
+	LRI(9, POSTED),
+	REG16(0x3a8),
+	REG16(0x28c),
+	REG16(0x288),
+	REG16(0x284),
+	REG16(0x280),
+	REG16(0x27c),
+	REG16(0x278),
+	REG16(0x274),
+	REG16(0x270),
+
+	LRI(3, POSTED),
+	REG(0x1b0),
+	REG16(0x5a8),
+	REG16(0x5ac),
+
+	NOP(6),
+	LRI(1, 0),
+	REG(0x0c8),
+
+	END
+};
+
+static const u8 mtl_rcs_offsets[] = {
+	NOP(1),
+	LRI(15, POSTED),
+	REG16(0x244),
+	REG(0x034),
+	REG(0x030),
+	REG(0x038),
+	REG(0x03c),
+	REG(0x168),
+	REG(0x140),
+	REG(0x110),
+	REG(0x1c0),
+	REG(0x1c4),
+	REG(0x1c8),
+	REG(0x180),
+	REG16(0x2b4),
+	REG(0x120),
+	REG(0x124),
+
+	NOP(1),
+	LRI(9, POSTED),
+	REG16(0x3a8),
+	REG16(0x28c),
+	REG16(0x288),
+	REG16(0x284),
+	REG16(0x280),
+	REG16(0x27c),
+	REG16(0x278),
+	REG16(0x274),
+	REG16(0x270),
+
+	NOP(2),
+	LRI(2, POSTED),
+	REG16(0x5a8),
+	REG16(0x5ac),
+
+	NOP(6),
+	LRI(1, 0),
+	REG(0x0c8),
+
+	END
+};
+
+#define XE2_CTX_COMMON \
+	NOP(1),                 /* [0x00] */ \
+	LRI(15, POSTED),        /* [0x01] */ \
+	REG16(0x244),           /* [0x02] CTXT_SR_CTL */ \
+	REG(0x034),             /* [0x04] RING_BUFFER_HEAD */ \
+	REG(0x030),             /* [0x06] RING_BUFFER_TAIL */ \
+	REG(0x038),             /* [0x08] RING_BUFFER_START */ \
+	REG(0x03c),             /* [0x0a] RING_BUFFER_CONTROL */ \
+	REG(0x168),             /* [0x0c] BB_ADDR_UDW */ \
+	REG(0x140),             /* [0x0e] BB_ADDR */ \
+	REG(0x110),             /* [0x10] BB_STATE */ \
+	REG(0x1c0),             /* [0x12] BB_PER_CTX_PTR */ \
+	REG(0x1c4),             /* [0x14] RCS_INDIRECT_CTX */ \
+	REG(0x1c8),             /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \
+	REG(0x180),             /* [0x18] CCID */ \
+	REG16(0x2b4),           /* [0x1a] SEMAPHORE_TOKEN */ \
+	REG(0x120),             /* [0x1c] PRT_BB_STATE */ \
+	REG(0x124),             /* [0x1e] PRT_BB_STATE_UDW */ \
+	\
+	NOP(1),                 /* [0x20] */ \
+	LRI(9, POSTED),         /* [0x21] */ \
+	REG16(0x3a8),           /* [0x22] CTX_TIMESTAMP */ \
+	REG16(0x3ac),           /* [0x24] CTX_TIMESTAMP_UDW */ \
+	REG(0x108),             /* [0x26] INDIRECT_RING_STATE */ \
+	REG16(0x284),           /* [0x28] dummy reg */ \
+	REG16(0x280),           /* [0x2a] CS_ACC_CTR_THOLD */ \
+	REG16(0x27c),           /* [0x2c] CS_CTX_SYS_PASID */ \
+	REG16(0x278),           /* [0x2e] CS_CTX_ASID */ \
+	REG16(0x274),           /* [0x30] PTBP_UDW */ \
+	REG16(0x270)            /* [0x32] PTBP_LDW */
+
+static const u8 xe2_rcs_offsets[] = {
+	XE2_CTX_COMMON,
+
+	NOP(2),                 /* [0x34] */
+	LRI(2, POSTED),         /* [0x36] */
+	REG16(0x5a8),           /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */
+	REG16(0x5ac),           /* [0x39] PREEMPTION_STATUS */
+
+	NOP(6),                 /* [0x41] */
+	LRI(1, 0),              /* [0x47] */
+	REG(0x0c8),             /* [0x48] R_PWR_CLK_STATE */
+
+	END
+};
+
+static const u8 xe2_bcs_offsets[] = {
+	XE2_CTX_COMMON,
+
+	NOP(4 + 8 + 1),         /* [0x34] */
+	LRI(2, POSTED),         /* [0x41] */
+	REG16(0x200),           /* [0x42] BCS_SWCTRL */
+	REG16(0x204),           /* [0x44] BLIT_CCTL */
+
+	END
+};
+
+static const u8 xe2_xcs_offsets[] = {
+	XE2_CTX_COMMON,
+
+	END
+};
+
+#undef END
+#undef REG16
+#undef REG
+#undef LRI
+#undef NOP
+
+static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class)
+{
+	if (class == XE_ENGINE_CLASS_RENDER) {
+		if (GRAPHICS_VER(xe) >= 20)
+			return xe2_rcs_offsets;
+		else if (GRAPHICS_VERx100(xe) >= 1270)
+			return mtl_rcs_offsets;
+		else if (GRAPHICS_VERx100(xe) >= 1255)
+			return dg2_rcs_offsets;
+		else if (GRAPHICS_VERx100(xe) >= 1250)
+			return xehp_rcs_offsets;
+		else
+			return gen12_rcs_offsets;
+	} else if (class == XE_ENGINE_CLASS_COPY) {
+		if (GRAPHICS_VER(xe) >= 20)
+			return xe2_bcs_offsets;
+		else
+			return gen12_xcs_offsets;
+	} else {
+		if (GRAPHICS_VER(xe) >= 20)
+			return xe2_xcs_offsets;
+		else if (GRAPHICS_VERx100(xe) >= 1255)
+			return dg2_xcs_offsets;
+		else
+			return gen12_xcs_offsets;
+	}
+}
+
+static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
+{
+	regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH) |
+				    _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
+				    CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
+
+	/* TODO: Timestamp */
+}
+
+static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
+{
+	struct xe_device *xe = gt_to_xe(hwe->gt);
+
+	if (GRAPHICS_VERx100(xe) >= 1250)
+		return 0x70;
+	else
+		return 0x60;
+}
+
+static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe)
+{
+	int x;
+
+	x = lrc_ring_mi_mode(hwe);
+	regs[x + 1] &= ~STOP_RING;
+	regs[x + 1] |= STOP_RING << 16;
+}
+
+static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc)
+{
+	return 0;
+}
+
+u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
+{
+	return lrc->ring.size;
+}
+
+/* Make the magic macros work */
+#define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset
+
+#define LRC_SEQNO_PPHWSP_OFFSET 512
+#define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8)
+#define LRC_PARALLEL_PPHWSP_OFFSET 2048
+#define LRC_PPHWSP_SIZE SZ_4K
+
+static size_t lrc_reg_size(struct xe_device *xe)
+{
+	if (GRAPHICS_VERx100(xe) >= 1250)
+		return 96 * sizeof(u32);
+	else
+		return 80 * sizeof(u32);
+}
+
+size_t xe_lrc_skip_size(struct xe_device *xe)
+{
+	return LRC_PPHWSP_SIZE + lrc_reg_size(xe);
+}
+
+static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc)
+{
+	/* The seqno is stored in the driver-defined portion of PPHWSP */
+	return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET;
+}
+
+static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc)
+{
+	/* The start seqno is stored in the driver-defined portion of PPHWSP */
+	return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET;
+}
+
+static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc)
+{
+	/* The parallel is stored in the driver-defined portion of PPHWSP */
+	return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET;
+}
+
+static inline u32 __xe_lrc_regs_offset(struct xe_lrc *lrc)
+{
+	return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE;
+}
+
+#define DECL_MAP_ADDR_HELPERS(elem) \
+static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \
+{ \
+	struct iosys_map map = lrc->bo->vmap; \
+\
+	xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map));  \
+	iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \
+	return map; \
+} \
+static inline u32 __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \
+{ \
+	return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \
+} \
+
+DECL_MAP_ADDR_HELPERS(ring)
+DECL_MAP_ADDR_HELPERS(pphwsp)
+DECL_MAP_ADDR_HELPERS(seqno)
+DECL_MAP_ADDR_HELPERS(regs)
+DECL_MAP_ADDR_HELPERS(start_seqno)
+DECL_MAP_ADDR_HELPERS(parallel)
+
+#undef DECL_MAP_ADDR_HELPERS
+
+u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc)
+{
+	return __xe_lrc_pphwsp_ggtt_addr(lrc);
+}
+
+u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr)
+{
+	struct xe_device *xe = lrc_to_xe(lrc);
+	struct iosys_map map;
+
+	map = __xe_lrc_regs_map(lrc);
+	iosys_map_incr(&map, reg_nr * sizeof(u32));
+	return xe_map_read32(xe, &map);
+}
+
+void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val)
+{
+	struct xe_device *xe = lrc_to_xe(lrc);
+	struct iosys_map map;
+
+	map = __xe_lrc_regs_map(lrc);
+	iosys_map_incr(&map, reg_nr * sizeof(u32));
+	xe_map_write32(xe, &map, val);
+}
+
+static void *empty_lrc_data(struct xe_hw_engine *hwe)
+{
+	struct xe_device *xe = gt_to_xe(hwe->gt);
+	void *data;
+	u32 *regs;
+
+	data = kzalloc(xe_lrc_size(xe, hwe->class), GFP_KERNEL);
+	if (!data)
+		return NULL;
+
+	/* 1st page: Per-Process of HW status Page */
+	regs = data + LRC_PPHWSP_SIZE;
+	set_offsets(regs, reg_offsets(xe, hwe->class), hwe);
+	set_context_control(regs, hwe);
+	reset_stop_ring(regs, hwe);
+
+	return data;
+}
+
+static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
+{
+	u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile);
+
+	xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc));
+	xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc));
+}
+
+#define PVC_CTX_ASID		(0x2e + 1)
+#define PVC_CTX_ACC_CTR_THOLD	(0x2a + 1)
+
+int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
+		struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size)
+{
+	struct xe_gt *gt = hwe->gt;
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_device *xe = gt_to_xe(gt);
+	struct iosys_map map;
+	void *init_data = NULL;
+	u32 arb_enable;
+	int err;
+
+	lrc->flags = 0;
+
+	/*
+	 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
+	 * via VM bind calls.
+	 */
+	lrc->bo = xe_bo_create_pin_map(xe, tile, vm,
+				      ring_size + xe_lrc_size(xe, hwe->class),
+				      ttm_bo_type_kernel,
+				      XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+				      XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(lrc->bo))
+		return PTR_ERR(lrc->bo);
+
+	lrc->tile = gt_to_tile(hwe->gt);
+	lrc->ring.size = ring_size;
+	lrc->ring.tail = 0;
+
+	xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt,
+			     hwe->fence_irq, hwe->name);
+
+	if (!gt->default_lrc[hwe->class]) {
+		init_data = empty_lrc_data(hwe);
+		if (!init_data) {
+			err = -ENOMEM;
+			goto err_lrc_finish;
+		}
+	}
+
+	/*
+	 * Init Per-Process of HW status Page, LRC / context state to known
+	 * values
+	 */
+	map = __xe_lrc_pphwsp_map(lrc);
+	if (!init_data) {
+		xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE);	/* PPHWSP */
+		xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE,
+				 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE,
+				 xe_lrc_size(xe, hwe->class) - LRC_PPHWSP_SIZE);
+	} else {
+		xe_map_memcpy_to(xe, &map, 0, init_data,
+				 xe_lrc_size(xe, hwe->class));
+		kfree(init_data);
+	}
+
+	if (vm) {
+		xe_lrc_set_ppgtt(lrc, vm);
+
+		if (vm->xef)
+			xe_drm_client_add_bo(vm->xef->client, lrc->bo);
+	}
+
+	xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc));
+	xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0);
+	xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
+	xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
+			     RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
+	if (xe->info.has_asid && vm)
+		xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
+
+	lrc->desc = LRC_VALID;
+	lrc->desc |= LRC_LEGACY_64B_CONTEXT << LRC_ADDRESSING_MODE_SHIFT;
+	/* TODO: Priority */
+
+	/* While this appears to have something about privileged batches or
+	 * some such, it really just means PPGTT mode.
+	 */
+	if (vm)
+		lrc->desc |= LRC_PRIVILEGE;
+
+	if (GRAPHICS_VERx100(xe) < 1250) {
+		lrc->desc |= (u64)hwe->instance << ENGINE_INSTANCE_SHIFT;
+		lrc->desc |= (u64)hwe->class << ENGINE_CLASS_SHIFT;
+	}
+
+	arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+	xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable));
+
+	map = __xe_lrc_seqno_map(lrc);
+	xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
+
+	map = __xe_lrc_start_seqno_map(lrc);
+	xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
+
+	return 0;
+
+err_lrc_finish:
+	xe_lrc_finish(lrc);
+	return err;
+}
+
+void xe_lrc_finish(struct xe_lrc *lrc)
+{
+	xe_hw_fence_ctx_finish(&lrc->fence_ctx);
+	xe_bo_lock(lrc->bo, false);
+	xe_bo_unpin(lrc->bo);
+	xe_bo_unlock(lrc->bo);
+	xe_bo_put(lrc->bo);
+}
+
+void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head)
+{
+	xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head);
+}
+
+u32 xe_lrc_ring_head(struct xe_lrc *lrc)
+{
+	return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR;
+}
+
+u32 xe_lrc_ring_space(struct xe_lrc *lrc)
+{
+	const u32 head = xe_lrc_ring_head(lrc);
+	const u32 tail = lrc->ring.tail;
+	const u32 size = lrc->ring.size;
+
+	return ((head - tail - 1) & (size - 1)) + 1;
+}
+
+static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring,
+				const void *data, size_t size)
+{
+	struct xe_device *xe = lrc_to_xe(lrc);
+
+	iosys_map_incr(&ring, lrc->ring.tail);
+	xe_map_memcpy_to(xe, &ring, 0, data, size);
+	lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1);
+}
+
+void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size)
+{
+	struct xe_device *xe = lrc_to_xe(lrc);
+	struct iosys_map ring;
+	u32 rhs;
+	size_t aligned_size;
+
+	xe_assert(xe, IS_ALIGNED(size, 4));
+	aligned_size = ALIGN(size, 8);
+
+	ring = __xe_lrc_ring_map(lrc);
+
+	xe_assert(xe, lrc->ring.tail < lrc->ring.size);
+	rhs = lrc->ring.size - lrc->ring.tail;
+	if (size > rhs) {
+		__xe_lrc_write_ring(lrc, ring, data, rhs);
+		__xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs);
+	} else {
+		__xe_lrc_write_ring(lrc, ring, data, size);
+	}
+
+	if (aligned_size > size) {
+		u32 noop = MI_NOOP;
+
+		__xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop));
+	}
+}
+
+u64 xe_lrc_descriptor(struct xe_lrc *lrc)
+{
+	return lrc->desc | xe_lrc_ggtt_addr(lrc);
+}
+
+u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc)
+{
+	return __xe_lrc_seqno_ggtt_addr(lrc);
+}
+
+struct dma_fence *xe_lrc_create_seqno_fence(struct xe_lrc *lrc)
+{
+	return &xe_hw_fence_create(&lrc->fence_ctx,
+				   __xe_lrc_seqno_map(lrc))->dma;
+}
+
+s32 xe_lrc_seqno(struct xe_lrc *lrc)
+{
+	struct iosys_map map = __xe_lrc_seqno_map(lrc);
+
+	return xe_map_read32(lrc_to_xe(lrc), &map);
+}
+
+s32 xe_lrc_start_seqno(struct xe_lrc *lrc)
+{
+	struct iosys_map map = __xe_lrc_start_seqno_map(lrc);
+
+	return xe_map_read32(lrc_to_xe(lrc), &map);
+}
+
+u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc)
+{
+	return __xe_lrc_start_seqno_ggtt_addr(lrc);
+}
+
+u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc)
+{
+	return __xe_lrc_parallel_ggtt_addr(lrc);
+}
+
+struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc)
+{
+	return __xe_lrc_parallel_map(lrc);
+}
+
+static int instr_dw(u32 cmd_header)
+{
+	/* GFXPIPE "SINGLE_DW" opcodes are a single dword */
+	if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) ==
+	    GFXPIPE_SINGLE_DW_CMD(0, 0))
+		return 1;
+
+	/* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */
+	if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST)
+		return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2;
+
+	/* Most instructions have the # of dwords (minus 2) in 7:0 */
+	return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2;
+}
+
+static int dump_mi_command(struct drm_printer *p,
+			   struct xe_gt *gt,
+			   u32 *dw,
+			   int remaining_dw)
+{
+	u32 inst_header = *dw;
+	u32 numdw = instr_dw(inst_header);
+	u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header);
+	int num_noop;
+
+	/* First check for commands that don't have/use a '# DW' field */
+	switch (inst_header & MI_OPCODE) {
+	case MI_NOOP:
+		num_noop = 1;
+		while (num_noop < remaining_dw &&
+		       (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP)
+			num_noop++;
+		drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop);
+		return num_noop;
+
+	case MI_TOPOLOGY_FILTER:
+		drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header);
+		return 1;
+
+	case MI_BATCH_BUFFER_END:
+		drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header);
+		/* Return 'remaining_dw' to consume the rest of the LRC */
+		return remaining_dw;
+	}
+
+	/*
+	 * Any remaining commands include a # of dwords.  We should make sure
+	 * it doesn't exceed the remaining size of the LRC.
+	 */
+	if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
+		numdw = remaining_dw;
+
+	switch (inst_header & MI_OPCODE) {
+	case MI_LOAD_REGISTER_IMM:
+		drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n",
+			   inst_header, (numdw - 1) / 2);
+		for (int i = 1; i < numdw; i += 2)
+			drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]);
+		return numdw;
+
+	case MI_FORCE_WAKEUP:
+		drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header);
+		return numdw;
+
+	default:
+		drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n",
+			   inst_header, opcode, numdw);
+		return numdw;
+	}
+}
+
+static int dump_gfxpipe_command(struct drm_printer *p,
+				struct xe_gt *gt,
+				u32 *dw,
+				int remaining_dw)
+{
+	u32 numdw = instr_dw(*dw);
+	u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw);
+	u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw);
+	u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw);
+
+	/*
+	 * Make sure we haven't mis-parsed a number of dwords that exceeds the
+	 * remaining size of the LRC.
+	 */
+	if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
+		numdw = remaining_dw;
+
+	switch (*dw & GFXPIPE_MATCH_MASK) {
+#define MATCH(cmd) \
+	case cmd: \
+		drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
+		return numdw
+#define MATCH3D(cmd) \
+	case CMD_##cmd: \
+		drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
+		return numdw
+
+	MATCH(STATE_BASE_ADDRESS);
+	MATCH(STATE_SIP);
+	MATCH(GPGPU_CSR_BASE_ADDRESS);
+	MATCH(STATE_COMPUTE_MODE);
+	MATCH3D(3DSTATE_BTD);
+
+	MATCH3D(3DSTATE_VF_STATISTICS);
+
+	MATCH(PIPELINE_SELECT);
+
+	MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST);
+	MATCH3D(3DSTATE_CLEAR_PARAMS);
+	MATCH3D(3DSTATE_DEPTH_BUFFER);
+	MATCH3D(3DSTATE_STENCIL_BUFFER);
+	MATCH3D(3DSTATE_HIER_DEPTH_BUFFER);
+	MATCH3D(3DSTATE_VERTEX_BUFFERS);
+	MATCH3D(3DSTATE_VERTEX_ELEMENTS);
+	MATCH3D(3DSTATE_INDEX_BUFFER);
+	MATCH3D(3DSTATE_VF);
+	MATCH3D(3DSTATE_MULTISAMPLE);
+	MATCH3D(3DSTATE_CC_STATE_POINTERS);
+	MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS);
+	MATCH3D(3DSTATE_VS);
+	MATCH3D(3DSTATE_GS);
+	MATCH3D(3DSTATE_CLIP);
+	MATCH3D(3DSTATE_SF);
+	MATCH3D(3DSTATE_WM);
+	MATCH3D(3DSTATE_CONSTANT_VS);
+	MATCH3D(3DSTATE_CONSTANT_GS);
+	MATCH3D(3DSTATE_SAMPLE_MASK);
+	MATCH3D(3DSTATE_CONSTANT_HS);
+	MATCH3D(3DSTATE_CONSTANT_DS);
+	MATCH3D(3DSTATE_HS);
+	MATCH3D(3DSTATE_TE);
+	MATCH3D(3DSTATE_DS);
+	MATCH3D(3DSTATE_STREAMOUT);
+	MATCH3D(3DSTATE_SBE);
+	MATCH3D(3DSTATE_PS);
+	MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
+	MATCH3D(3DSTATE_CPS_POINTERS);
+	MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC);
+	MATCH3D(3DSTATE_BLEND_STATE_POINTERS);
+	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS);
+	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS);
+	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS);
+	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS);
+	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS);
+	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS);
+	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS);
+	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS);
+	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS);
+	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS);
+	MATCH3D(3DSTATE_VF_INSTANCING);
+	MATCH3D(3DSTATE_VF_SGVS);
+	MATCH3D(3DSTATE_VF_TOPOLOGY);
+	MATCH3D(3DSTATE_WM_CHROMAKEY);
+	MATCH3D(3DSTATE_PS_BLEND);
+	MATCH3D(3DSTATE_WM_DEPTH_STENCIL);
+	MATCH3D(3DSTATE_PS_EXTRA);
+	MATCH3D(3DSTATE_RASTER);
+	MATCH3D(3DSTATE_SBE_SWIZ);
+	MATCH3D(3DSTATE_WM_HZ_OP);
+	MATCH3D(3DSTATE_VF_COMPONENT_PACKING);
+	MATCH3D(3DSTATE_VF_SGVS_2);
+	MATCH3D(3DSTATE_VFG);
+	MATCH3D(3DSTATE_URB_ALLOC_VS);
+	MATCH3D(3DSTATE_URB_ALLOC_HS);
+	MATCH3D(3DSTATE_URB_ALLOC_DS);
+	MATCH3D(3DSTATE_URB_ALLOC_GS);
+	MATCH3D(3DSTATE_SO_BUFFER_INDEX_0);
+	MATCH3D(3DSTATE_SO_BUFFER_INDEX_1);
+	MATCH3D(3DSTATE_SO_BUFFER_INDEX_2);
+	MATCH3D(3DSTATE_SO_BUFFER_INDEX_3);
+	MATCH3D(3DSTATE_PRIMITIVE_REPLICATION);
+	MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO);
+	MATCH3D(3DSTATE_AMFS);
+	MATCH3D(3DSTATE_DEPTH_BOUNDS);
+	MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS);
+	MATCH3D(3DSTATE_CONSTANT_TS_POINTER);
+	MATCH3D(3DSTATE_MESH_CONTROL);
+	MATCH3D(3DSTATE_MESH_DISTRIB);
+	MATCH3D(3DSTATE_TASK_REDISTRIB);
+	MATCH3D(3DSTATE_MESH_SHADER);
+	MATCH3D(3DSTATE_MESH_SHADER_DATA);
+	MATCH3D(3DSTATE_TASK_CONTROL);
+	MATCH3D(3DSTATE_TASK_SHADER);
+	MATCH3D(3DSTATE_TASK_SHADER_DATA);
+	MATCH3D(3DSTATE_URB_ALLOC_MESH);
+	MATCH3D(3DSTATE_URB_ALLOC_TASK);
+	MATCH3D(3DSTATE_CLIP_MESH);
+	MATCH3D(3DSTATE_SBE_MESH);
+	MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER);
+
+	MATCH3D(3DSTATE_DRAWING_RECTANGLE);
+	MATCH3D(3DSTATE_CHROMA_KEY);
+	MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET);
+	MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN);
+	MATCH3D(3DSTATE_LINE_STIPPLE);
+	MATCH3D(3DSTATE_AA_LINE_PARAMETERS);
+	MATCH3D(3DSTATE_MONOFILTER_SIZE);
+	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS);
+	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS);
+	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS);
+	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS);
+	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS);
+	MATCH3D(3DSTATE_SO_DECL_LIST);
+	MATCH3D(3DSTATE_SO_BUFFER);
+	MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC);
+	MATCH3D(3DSTATE_SAMPLE_PATTERN);
+	MATCH3D(3DSTATE_3D_MODE);
+	MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE);
+	MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS);
+	MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO);
+
+	default:
+		drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n",
+			   *dw, pipeline, opcode, subopcode, numdw);
+		return numdw;
+	}
+}
+
+void xe_lrc_dump_default(struct drm_printer *p,
+			 struct xe_gt *gt,
+			 enum xe_engine_class hwe_class)
+{
+	u32 *dw;
+	int remaining_dw, num_dw;
+
+	if (!gt->default_lrc[hwe_class]) {
+		drm_printf(p, "No default LRC for class %d\n", hwe_class);
+		return;
+	}
+
+	/*
+	 * Skip the beginning of the LRC since it contains the per-process
+	 * hardware status page.
+	 */
+	dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE;
+	remaining_dw = (xe_lrc_size(gt_to_xe(gt), hwe_class) - LRC_PPHWSP_SIZE) / 4;
+
+	while (remaining_dw > 0) {
+		if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) {
+			num_dw = dump_mi_command(p, gt, dw, remaining_dw);
+		} else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) {
+			num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw);
+		} else {
+			num_dw = min(instr_dw(*dw), remaining_dw);
+			drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n",
+				   *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw),
+				   num_dw);
+		}
+
+		dw += num_dw;
+		remaining_dw -= num_dw;
+	}
+}
+
+struct instr_state {
+	u32 instr;
+	u16 num_dw;
+};
+
+static const struct instr_state xe_hpg_svg_state[] = {
+	{ .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 },
+	{ .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 },
+	{ .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 },
+	{ .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 },
+	{ .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 },
+	{ .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 },
+	{ .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 },
+	{ .instr = CMD_3DSTATE_VS, .num_dw = 9 },
+	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 },
+	{ .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 },
+	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 },
+	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 },
+	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 },
+	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 },
+	{ .instr = CMD_3DSTATE_CLIP, .num_dw = 4 },
+	{ .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 },
+	{ .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_SF, .num_dw = 4 },
+	{ .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_RASTER, .num_dw = 5 },
+	{ .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 },
+	{ .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 },
+	{ .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_HS, .num_dw = 9 },
+	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 },
+	{ .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 },
+	{ .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 },
+	{ .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 },
+	{ .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 },
+	{ .instr = CMD_3DSTATE_TE, .num_dw = 5 },
+	{ .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_DS, .num_dw = 11 },
+	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 },
+	{ .instr = CMD_3DSTATE_GS, .num_dw = 10 },
+	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 },
+	{ .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 },
+	{ .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 },
+	{ .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 },
+	{ .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 },
+	{ .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 },
+};
+
+void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb)
+{
+	struct xe_gt *gt = q->hwe->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	const struct instr_state *state_table = NULL;
+	int state_table_size = 0;
+
+	/*
+	 * At the moment we only need to emit non-register state for the RCS
+	 * engine.
+	 */
+	if (q->hwe->class != XE_ENGINE_CLASS_RENDER)
+		return;
+
+	switch (GRAPHICS_VERx100(xe)) {
+	case 1255:
+	case 1270 ... 2004:
+		state_table = xe_hpg_svg_state;
+		state_table_size = ARRAY_SIZE(xe_hpg_svg_state);
+		break;
+	default:
+		xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n",
+			  GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
+		return;
+	}
+
+	for (int i = 0; i < state_table_size; i++) {
+		u32 instr = state_table[i].instr;
+		u16 num_dw = state_table[i].num_dw;
+		bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW);
+
+		xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE);
+		xe_gt_assert(gt, num_dw != 0);
+		xe_gt_assert(gt, is_single_dw ^ (num_dw > 1));
+
+		/*
+		 * Xe2's SVG context is the same as the one on DG2 / MTL
+		 * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has
+		 * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined).
+		 * Just make the replacement here rather than defining a
+		 * whole separate table for the single trivial change.
+		 */
+		if (GRAPHICS_VER(xe) >= 20 &&
+		    instr == CMD_3DSTATE_DRAWING_RECTANGLE)
+			instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST;
+
+		bb->cs[bb->len] = instr;
+		if (!is_single_dw)
+			bb->cs[bb->len] |= (num_dw - 2);
+
+		bb->len += num_dw;
+	}
+}
diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
new file mode 100644
index 000000000000..28b1d3f404d4
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_lrc.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+#ifndef _XE_LRC_H_
+#define _XE_LRC_H_
+
+#include "xe_lrc_types.h"
+
+struct drm_printer;
+struct xe_bb;
+struct xe_device;
+struct xe_exec_queue;
+enum xe_engine_class;
+struct xe_hw_engine;
+struct xe_vm;
+
+#define LRC_PPHWSP_SCRATCH_ADDR (0x34 * 4)
+
+int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
+		struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size);
+void xe_lrc_finish(struct xe_lrc *lrc);
+
+size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class);
+u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc);
+
+void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head);
+u32 xe_lrc_ring_head(struct xe_lrc *lrc);
+u32 xe_lrc_ring_space(struct xe_lrc *lrc);
+void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size);
+
+u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc);
+u32 *xe_lrc_regs(struct xe_lrc *lrc);
+
+u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr);
+void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val);
+
+u64 xe_lrc_descriptor(struct xe_lrc *lrc);
+
+u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc);
+struct dma_fence *xe_lrc_create_seqno_fence(struct xe_lrc *lrc);
+s32 xe_lrc_seqno(struct xe_lrc *lrc);
+
+u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc);
+s32 xe_lrc_start_seqno(struct xe_lrc *lrc);
+
+u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc);
+struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc);
+
+size_t xe_lrc_skip_size(struct xe_device *xe);
+
+void xe_lrc_dump_default(struct drm_printer *p,
+			 struct xe_gt *gt,
+			 enum xe_engine_class);
+
+void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h
new file mode 100644
index 000000000000..78220336062c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_lrc_types.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_LRC_TYPES_H_
+#define _XE_LRC_TYPES_H_
+
+#include "xe_hw_fence_types.h"
+
+struct xe_bo;
+
+/**
+ * struct xe_lrc - Logical ring context (LRC) and submission ring object
+ */
+struct xe_lrc {
+	/**
+	 * @bo: buffer object (memory) for logical ring context, per process HW
+	 * status page, and submission ring.
+	 */
+	struct xe_bo *bo;
+
+	/** @tile: tile which this LRC belongs to */
+	struct xe_tile *tile;
+
+	/** @flags: LRC flags */
+	u32 flags;
+
+	/** @ring: submission ring state */
+	struct {
+		/** @size: size of submission ring */
+		u32 size;
+		/** @tail: tail of submission ring */
+		u32 tail;
+		/** @old_tail: shadow of tail */
+		u32 old_tail;
+	} ring;
+
+	/** @desc: LRC descriptor */
+	u64 desc;
+
+	/** @fence_ctx: context for hw fence */
+	struct xe_hw_fence_ctx fence_ctx;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_macros.h b/drivers/gpu/drm/xe/xe_macros.h
new file mode 100644
index 000000000000..daf56c846d03
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_macros.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_MACROS_H_
+#define _XE_MACROS_H_
+
+#include <linux/bug.h>
+
+#define XE_WARN_ON WARN_ON
+
+#define XE_IOCTL_DBG(xe, cond) \
+	((cond) && (drm_dbg(&(xe)->drm, \
+			    "Ioctl argument check failed at %s:%d: %s", \
+			    __FILE__, __LINE__, #cond), 1))
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_map.h b/drivers/gpu/drm/xe/xe_map.h
new file mode 100644
index 000000000000..f62e0c8b67ab
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_map.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_MAP_H_
+#define _XE_MAP_H_
+
+#include <linux/iosys-map.h>
+
+#include <xe_device.h>
+
+/**
+ * DOC: Map layer
+ *
+ * All access to any memory shared with a device (both sysmem and vram) in the
+ * XE driver should go through this layer (xe_map). This layer is built on top
+ * of :ref:`driver-api/device-io:Generalizing Access to System and I/O Memory`
+ * and with extra hooks into the XE driver that allows adding asserts to memory
+ * accesses (e.g. for blocking runtime_pm D3Cold on Discrete Graphics).
+ */
+
+static inline void xe_map_memcpy_to(struct xe_device *xe, struct iosys_map *dst,
+				    size_t dst_offset, const void *src,
+				    size_t len)
+{
+	xe_device_assert_mem_access(xe);
+	iosys_map_memcpy_to(dst, dst_offset, src, len);
+}
+
+static inline void xe_map_memcpy_from(struct xe_device *xe, void *dst,
+				      const struct iosys_map *src,
+				      size_t src_offset, size_t len)
+{
+	xe_device_assert_mem_access(xe);
+	iosys_map_memcpy_from(dst, src, src_offset, len);
+}
+
+static inline void xe_map_memset(struct xe_device *xe,
+				 struct iosys_map *dst, size_t offset,
+				 int value, size_t len)
+{
+	xe_device_assert_mem_access(xe);
+	iosys_map_memset(dst, offset, value, len);
+}
+
+/* FIXME: We likely should kill these two functions sooner or later */
+static inline u32 xe_map_read32(struct xe_device *xe, struct iosys_map *map)
+{
+	xe_device_assert_mem_access(xe);
+
+	if (map->is_iomem)
+		return readl(map->vaddr_iomem);
+	else
+		return READ_ONCE(*(u32 *)map->vaddr);
+}
+
+static inline void xe_map_write32(struct xe_device *xe, struct iosys_map *map,
+				  u32 val)
+{
+	xe_device_assert_mem_access(xe);
+
+	if (map->is_iomem)
+		writel(val, map->vaddr_iomem);
+	else
+		*(u32 *)map->vaddr = val;
+}
+
+#define xe_map_rd(xe__, map__, offset__, type__) ({			\
+	struct xe_device *__xe = xe__;					\
+	xe_device_assert_mem_access(__xe);				\
+	iosys_map_rd(map__, offset__, type__);				\
+})
+
+#define xe_map_wr(xe__, map__, offset__, type__, val__) ({		\
+	struct xe_device *__xe = xe__;					\
+	xe_device_assert_mem_access(__xe);				\
+	iosys_map_wr(map__, offset__, type__, val__);			\
+})
+
+#define xe_map_rd_field(xe__, map__, struct_offset__, struct_type__, field__) ({	\
+	struct xe_device *__xe = xe__;					\
+	xe_device_assert_mem_access(__xe);				\
+	iosys_map_rd_field(map__, struct_offset__, struct_type__, field__);		\
+})
+
+#define xe_map_wr_field(xe__, map__, struct_offset__, struct_type__, field__, val__) ({	\
+	struct xe_device *__xe = xe__;					\
+	xe_device_assert_mem_access(__xe);				\
+	iosys_map_wr_field(map__, struct_offset__, struct_type__, field__, val__);	\
+})
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
new file mode 100644
index 000000000000..70480c305602
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -0,0 +1,1455 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include "xe_migrate.h"
+
+#include <linux/bitfield.h>
+#include <linux/sizes.h>
+
+#include <drm/drm_managed.h>
+#include <drm/ttm/ttm_tt.h>
+#include <drm/xe_drm.h>
+
+#include "generated/xe_wa_oob.h"
+#include "instructions/xe_mi_commands.h"
+#include "regs/xe_gpu_commands.h"
+#include "tests/xe_test.h"
+#include "xe_assert.h"
+#include "xe_bb.h"
+#include "xe_bo.h"
+#include "xe_exec_queue.h"
+#include "xe_ggtt.h"
+#include "xe_gt.h"
+#include "xe_hw_engine.h"
+#include "xe_lrc.h"
+#include "xe_map.h"
+#include "xe_mocs.h"
+#include "xe_pt.h"
+#include "xe_res_cursor.h"
+#include "xe_sched_job.h"
+#include "xe_sync.h"
+#include "xe_trace.h"
+#include "xe_vm.h"
+#include "xe_wa.h"
+
+/**
+ * struct xe_migrate - migrate context.
+ */
+struct xe_migrate {
+	/** @q: Default exec queue used for migration */
+	struct xe_exec_queue *q;
+	/** @tile: Backpointer to the tile this struct xe_migrate belongs to. */
+	struct xe_tile *tile;
+	/** @job_mutex: Timeline mutex for @eng. */
+	struct mutex job_mutex;
+	/** @pt_bo: Page-table buffer object. */
+	struct xe_bo *pt_bo;
+	/** @batch_base_ofs: VM offset of the migration batch buffer */
+	u64 batch_base_ofs;
+	/** @usm_batch_base_ofs: VM offset of the usm batch buffer */
+	u64 usm_batch_base_ofs;
+	/** @cleared_mem_ofs: VM offset of @cleared_bo. */
+	u64 cleared_mem_ofs;
+	/**
+	 * @fence: dma-fence representing the last migration job batch.
+	 * Protected by @job_mutex.
+	 */
+	struct dma_fence *fence;
+	/**
+	 * @vm_update_sa: For integrated, used to suballocate page-tables
+	 * out of the pt_bo.
+	 */
+	struct drm_suballoc_manager vm_update_sa;
+	/** @min_chunk_size: For dgfx, Minimum chunk size */
+	u64 min_chunk_size;
+};
+
+#define MAX_PREEMPTDISABLE_TRANSFER SZ_8M /* Around 1ms. */
+#define MAX_CCS_LIMITED_TRANSFER SZ_4M /* XE_PAGE_SIZE * (FIELD_MAX(XE2_CCS_SIZE_MASK) + 1) */
+#define NUM_KERNEL_PDE 17
+#define NUM_PT_SLOTS 32
+#define LEVEL0_PAGE_TABLE_ENCODE_SIZE SZ_2M
+
+/**
+ * xe_tile_migrate_engine() - Get this tile's migrate engine.
+ * @tile: The tile.
+ *
+ * Returns the default migrate engine of this tile.
+ * TODO: Perhaps this function is slightly misplaced, and even unneeded?
+ *
+ * Return: The default migrate engine
+ */
+struct xe_exec_queue *xe_tile_migrate_engine(struct xe_tile *tile)
+{
+	return tile->migrate->q;
+}
+
+static void xe_migrate_fini(struct drm_device *dev, void *arg)
+{
+	struct xe_migrate *m = arg;
+
+	xe_vm_lock(m->q->vm, false);
+	xe_bo_unpin(m->pt_bo);
+	xe_vm_unlock(m->q->vm);
+
+	dma_fence_put(m->fence);
+	xe_bo_put(m->pt_bo);
+	drm_suballoc_manager_fini(&m->vm_update_sa);
+	mutex_destroy(&m->job_mutex);
+	xe_vm_close_and_put(m->q->vm);
+	xe_exec_queue_put(m->q);
+}
+
+static u64 xe_migrate_vm_addr(u64 slot, u32 level)
+{
+	XE_WARN_ON(slot >= NUM_PT_SLOTS);
+
+	/* First slot is reserved for mapping of PT bo and bb, start from 1 */
+	return (slot + 1ULL) << xe_pt_shift(level + 1);
+}
+
+static u64 xe_migrate_vram_ofs(struct xe_device *xe, u64 addr)
+{
+	/*
+	 * Remove the DPA to get a correct offset into identity table for the
+	 * migrate offset
+	 */
+	addr -= xe->mem.vram.dpa_base;
+	return addr + (256ULL << xe_pt_shift(2));
+}
+
+static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
+				 struct xe_vm *vm)
+{
+	struct xe_device *xe = tile_to_xe(tile);
+	u16 pat_index = xe->pat.idx[XE_CACHE_WB];
+	u8 id = tile->id;
+	u32 num_entries = NUM_PT_SLOTS, num_level = vm->pt_root[id]->level;
+	u32 map_ofs, level, i;
+	struct xe_bo *bo, *batch = tile->mem.kernel_bb_pool->bo;
+	u64 entry;
+
+	/* Can't bump NUM_PT_SLOTS too high */
+	BUILD_BUG_ON(NUM_PT_SLOTS > SZ_2M/XE_PAGE_SIZE);
+	/* Must be a multiple of 64K to support all platforms */
+	BUILD_BUG_ON(NUM_PT_SLOTS * XE_PAGE_SIZE % SZ_64K);
+	/* And one slot reserved for the 4KiB page table updates */
+	BUILD_BUG_ON(!(NUM_KERNEL_PDE & 1));
+
+	/* Need to be sure everything fits in the first PT, or create more */
+	xe_tile_assert(tile, m->batch_base_ofs + batch->size < SZ_2M);
+
+	bo = xe_bo_create_pin_map(vm->xe, tile, vm,
+				  num_entries * XE_PAGE_SIZE,
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+				  XE_BO_CREATE_PINNED_BIT);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	entry = vm->pt_ops->pde_encode_bo(bo, bo->size - XE_PAGE_SIZE, pat_index);
+	xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry);
+
+	map_ofs = (num_entries - num_level) * XE_PAGE_SIZE;
+
+	/* Map the entire BO in our level 0 pt */
+	for (i = 0, level = 0; i < num_entries; level++) {
+		entry = vm->pt_ops->pte_encode_bo(bo, i * XE_PAGE_SIZE,
+						  pat_index, 0);
+
+		xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, entry);
+
+		if (vm->flags & XE_VM_FLAG_64K)
+			i += 16;
+		else
+			i += 1;
+	}
+
+	if (!IS_DGFX(xe)) {
+		/* Write out batch too */
+		m->batch_base_ofs = NUM_PT_SLOTS * XE_PAGE_SIZE;
+		for (i = 0; i < batch->size;
+		     i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE :
+		     XE_PAGE_SIZE) {
+			entry = vm->pt_ops->pte_encode_bo(batch, i,
+							  pat_index, 0);
+
+			xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64,
+				  entry);
+			level++;
+		}
+		if (xe->info.has_usm) {
+			xe_tile_assert(tile, batch->size == SZ_1M);
+
+			batch = tile->primary_gt->usm.bb_pool->bo;
+			m->usm_batch_base_ofs = m->batch_base_ofs + SZ_1M;
+			xe_tile_assert(tile, batch->size == SZ_512K);
+
+			for (i = 0; i < batch->size;
+			     i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE :
+			     XE_PAGE_SIZE) {
+				entry = vm->pt_ops->pte_encode_bo(batch, i,
+								  pat_index, 0);
+
+				xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64,
+					  entry);
+				level++;
+			}
+		}
+	} else {
+		u64 batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE);
+
+		m->batch_base_ofs = xe_migrate_vram_ofs(xe, batch_addr);
+
+		if (xe->info.has_usm) {
+			batch = tile->primary_gt->usm.bb_pool->bo;
+			batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE);
+			m->usm_batch_base_ofs = xe_migrate_vram_ofs(xe, batch_addr);
+		}
+	}
+
+	for (level = 1; level < num_level; level++) {
+		u32 flags = 0;
+
+		if (vm->flags & XE_VM_FLAG_64K && level == 1)
+			flags = XE_PDE_64K;
+
+		entry = vm->pt_ops->pde_encode_bo(bo, map_ofs + (level - 1) *
+						  XE_PAGE_SIZE, pat_index);
+		xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level, u64,
+			  entry | flags);
+	}
+
+	/* Write PDE's that point to our BO. */
+	for (i = 0; i < num_entries - num_level; i++) {
+		entry = vm->pt_ops->pde_encode_bo(bo, i * XE_PAGE_SIZE,
+						  pat_index);
+
+		xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE +
+			  (i + 1) * 8, u64, entry);
+	}
+
+	/* Set up a 1GiB NULL mapping at 255GiB offset. */
+	level = 2;
+	xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level + 255 * 8, u64,
+		  vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, IS_DGFX(xe), 0)
+		  | XE_PTE_NULL);
+	m->cleared_mem_ofs = (255ULL << xe_pt_shift(level));
+
+	/* Identity map the entire vram at 256GiB offset */
+	if (IS_DGFX(xe)) {
+		u64 pos, ofs, flags;
+
+		level = 2;
+		ofs = map_ofs + XE_PAGE_SIZE * level + 256 * 8;
+		flags = vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level,
+						    true, 0);
+
+		/*
+		 * Use 1GB pages, it shouldn't matter the physical amount of
+		 * vram is less, when we don't access it.
+		 */
+		for (pos = xe->mem.vram.dpa_base;
+		     pos < xe->mem.vram.actual_physical_size + xe->mem.vram.dpa_base;
+		     pos += SZ_1G, ofs += 8)
+			xe_map_wr(xe, &bo->vmap, ofs, u64, pos | flags);
+	}
+
+	/*
+	 * Example layout created above, with root level = 3:
+	 * [PT0...PT7]: kernel PT's for copy/clear; 64 or 4KiB PTE's
+	 * [PT8]: Kernel PT for VM_BIND, 4 KiB PTE's
+	 * [PT9...PT28]: Userspace PT's for VM_BIND, 4 KiB PTE's
+	 * [PT29 = PDE 0] [PT30 = PDE 1] [PT31 = PDE 2]
+	 *
+	 * This makes the lowest part of the VM point to the pagetables.
+	 * Hence the lowest 2M in the vm should point to itself, with a few writes
+	 * and flushes, other parts of the VM can be used either for copying and
+	 * clearing.
+	 *
+	 * For performance, the kernel reserves PDE's, so about 20 are left
+	 * for async VM updates.
+	 *
+	 * To make it easier to work, each scratch PT is put in slot (1 + PT #)
+	 * everywhere, this allows lockless updates to scratch pages by using
+	 * the different addresses in VM.
+	 */
+#define NUM_VMUSA_UNIT_PER_PAGE	32
+#define VM_SA_UPDATE_UNIT_SIZE		(XE_PAGE_SIZE / NUM_VMUSA_UNIT_PER_PAGE)
+#define NUM_VMUSA_WRITES_PER_UNIT	(VM_SA_UPDATE_UNIT_SIZE / sizeof(u64))
+	drm_suballoc_manager_init(&m->vm_update_sa,
+				  (map_ofs / XE_PAGE_SIZE - NUM_KERNEL_PDE) *
+				  NUM_VMUSA_UNIT_PER_PAGE, 0);
+
+	m->pt_bo = bo;
+	return 0;
+}
+
+/*
+ * Due to workaround 16017236439, odd instance hardware copy engines are
+ * faster than even instance ones.
+ * This function returns the mask involving all fast copy engines and the
+ * reserved copy engine to be used as logical mask for migrate engine.
+ * Including the reserved copy engine is required to avoid deadlocks due to
+ * migrate jobs servicing the faults gets stuck behind the job that faulted.
+ */
+static u32 xe_migrate_usm_logical_mask(struct xe_gt *gt)
+{
+	u32 logical_mask = 0;
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+
+	for_each_hw_engine(hwe, gt, id) {
+		if (hwe->class != XE_ENGINE_CLASS_COPY)
+			continue;
+
+		if (!XE_WA(gt, 16017236439) ||
+		    xe_gt_is_usm_hwe(gt, hwe) || hwe->instance & 1)
+			logical_mask |= BIT(hwe->logical_instance);
+	}
+
+	return logical_mask;
+}
+
+/**
+ * xe_migrate_init() - Initialize a migrate context
+ * @tile: Back-pointer to the tile we're initializing for.
+ *
+ * Return: Pointer to a migrate context on success. Error pointer on error.
+ */
+struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
+{
+	struct xe_device *xe = tile_to_xe(tile);
+	struct xe_gt *primary_gt = tile->primary_gt;
+	struct xe_migrate *m;
+	struct xe_vm *vm;
+	int err;
+
+	m = drmm_kzalloc(&xe->drm, sizeof(*m), GFP_KERNEL);
+	if (!m)
+		return ERR_PTR(-ENOMEM);
+
+	m->tile = tile;
+
+	/* Special layout, prepared below.. */
+	vm = xe_vm_create(xe, XE_VM_FLAG_MIGRATION |
+			  XE_VM_FLAG_SET_TILE_ID(tile));
+	if (IS_ERR(vm))
+		return ERR_CAST(vm);
+
+	xe_vm_lock(vm, false);
+	err = xe_migrate_prepare_vm(tile, m, vm);
+	xe_vm_unlock(vm);
+	if (err) {
+		xe_vm_close_and_put(vm);
+		return ERR_PTR(err);
+	}
+
+	if (xe->info.has_usm) {
+		struct xe_hw_engine *hwe = xe_gt_hw_engine(primary_gt,
+							   XE_ENGINE_CLASS_COPY,
+							   primary_gt->usm.reserved_bcs_instance,
+							   false);
+		u32 logical_mask = xe_migrate_usm_logical_mask(primary_gt);
+
+		if (!hwe || !logical_mask)
+			return ERR_PTR(-EINVAL);
+
+		m->q = xe_exec_queue_create(xe, vm, logical_mask, 1, hwe,
+					    EXEC_QUEUE_FLAG_KERNEL |
+					    EXEC_QUEUE_FLAG_PERMANENT |
+					    EXEC_QUEUE_FLAG_HIGH_PRIORITY);
+	} else {
+		m->q = xe_exec_queue_create_class(xe, primary_gt, vm,
+						  XE_ENGINE_CLASS_COPY,
+						  EXEC_QUEUE_FLAG_KERNEL |
+						  EXEC_QUEUE_FLAG_PERMANENT);
+	}
+	if (IS_ERR(m->q)) {
+		xe_vm_close_and_put(vm);
+		return ERR_CAST(m->q);
+	}
+
+	mutex_init(&m->job_mutex);
+
+	err = drmm_add_action_or_reset(&xe->drm, xe_migrate_fini, m);
+	if (err)
+		return ERR_PTR(err);
+
+	if (IS_DGFX(xe)) {
+		if (xe_device_has_flat_ccs(xe))
+			/* min chunk size corresponds to 4K of CCS Metadata */
+			m->min_chunk_size = SZ_4K * SZ_64K /
+				xe_device_ccs_bytes(xe, SZ_64K);
+		else
+			/* Somewhat arbitrary to avoid a huge amount of blits */
+			m->min_chunk_size = SZ_64K;
+		m->min_chunk_size = roundup_pow_of_two(m->min_chunk_size);
+		drm_dbg(&xe->drm, "Migrate min chunk size is 0x%08llx\n",
+			(unsigned long long)m->min_chunk_size);
+	}
+
+	return m;
+}
+
+static u64 max_mem_transfer_per_pass(struct xe_device *xe)
+{
+	if (!IS_DGFX(xe) && xe_device_has_flat_ccs(xe))
+		return MAX_CCS_LIMITED_TRANSFER;
+
+	return MAX_PREEMPTDISABLE_TRANSFER;
+}
+
+static u64 xe_migrate_res_sizes(struct xe_migrate *m, struct xe_res_cursor *cur)
+{
+	struct xe_device *xe = tile_to_xe(m->tile);
+	u64 size = min_t(u64, max_mem_transfer_per_pass(xe), cur->remaining);
+
+	if (mem_type_is_vram(cur->mem_type)) {
+		/*
+		 * VRAM we want to blit in chunks with sizes aligned to
+		 * min_chunk_size in order for the offset to CCS metadata to be
+		 * page-aligned. If it's the last chunk it may be smaller.
+		 *
+		 * Another constraint is that we need to limit the blit to
+		 * the VRAM block size, unless size is smaller than
+		 * min_chunk_size.
+		 */
+		u64 chunk = max_t(u64, cur->size, m->min_chunk_size);
+
+		size = min_t(u64, size, chunk);
+		if (size > m->min_chunk_size)
+			size = round_down(size, m->min_chunk_size);
+	}
+
+	return size;
+}
+
+static bool xe_migrate_allow_identity(u64 size, const struct xe_res_cursor *cur)
+{
+	/* If the chunk is not fragmented, allow identity map. */
+	return cur->size >= size;
+}
+
+static u32 pte_update_size(struct xe_migrate *m,
+			   bool is_vram,
+			   struct ttm_resource *res,
+			   struct xe_res_cursor *cur,
+			   u64 *L0, u64 *L0_ofs, u32 *L0_pt,
+			   u32 cmd_size, u32 pt_ofs, u32 avail_pts)
+{
+	u32 cmds = 0;
+
+	*L0_pt = pt_ofs;
+	if (is_vram && xe_migrate_allow_identity(*L0, cur)) {
+		/* Offset into identity map. */
+		*L0_ofs = xe_migrate_vram_ofs(tile_to_xe(m->tile),
+					      cur->start + vram_region_gpu_offset(res));
+		cmds += cmd_size;
+	} else {
+		/* Clip L0 to available size */
+		u64 size = min(*L0, (u64)avail_pts * SZ_2M);
+		u64 num_4k_pages = DIV_ROUND_UP(size, XE_PAGE_SIZE);
+
+		*L0 = size;
+		*L0_ofs = xe_migrate_vm_addr(pt_ofs, 0);
+
+		/* MI_STORE_DATA_IMM */
+		cmds += 3 * DIV_ROUND_UP(num_4k_pages, 0x1ff);
+
+		/* PDE qwords */
+		cmds += num_4k_pages * 2;
+
+		/* Each chunk has a single blit command */
+		cmds += cmd_size;
+	}
+
+	return cmds;
+}
+
+static void emit_pte(struct xe_migrate *m,
+		     struct xe_bb *bb, u32 at_pt,
+		     bool is_vram, bool is_comp_pte,
+		     struct xe_res_cursor *cur,
+		     u32 size, struct ttm_resource *res)
+{
+	struct xe_device *xe = tile_to_xe(m->tile);
+	struct xe_vm *vm = m->q->vm;
+	u16 pat_index;
+	u32 ptes;
+	u64 ofs = at_pt * XE_PAGE_SIZE;
+	u64 cur_ofs;
+
+	/* Indirect access needs compression enabled uncached PAT index */
+	if (GRAPHICS_VERx100(xe) >= 2000)
+		pat_index = is_comp_pte ? xe->pat.idx[XE_CACHE_NONE_COMPRESSION] :
+					  xe->pat.idx[XE_CACHE_WB];
+	else
+		pat_index = xe->pat.idx[XE_CACHE_WB];
+
+	ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE);
+
+	while (ptes) {
+		u32 chunk = min(0x1ffU, ptes);
+
+		bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk);
+		bb->cs[bb->len++] = ofs;
+		bb->cs[bb->len++] = 0;
+
+		cur_ofs = ofs;
+		ofs += chunk * 8;
+		ptes -= chunk;
+
+		while (chunk--) {
+			u64 addr, flags = 0;
+			bool devmem = false;
+
+			addr = xe_res_dma(cur) & PAGE_MASK;
+			if (is_vram) {
+				if (vm->flags & XE_VM_FLAG_64K) {
+					u64 va = cur_ofs * XE_PAGE_SIZE / 8;
+
+					xe_assert(xe, (va & (SZ_64K - 1)) ==
+						  (addr & (SZ_64K - 1)));
+
+					flags |= XE_PTE_PS64;
+				}
+
+				addr += vram_region_gpu_offset(res);
+				devmem = true;
+			}
+
+			addr = vm->pt_ops->pte_encode_addr(m->tile->xe,
+							   addr, pat_index,
+							   0, devmem, flags);
+			bb->cs[bb->len++] = lower_32_bits(addr);
+			bb->cs[bb->len++] = upper_32_bits(addr);
+
+			xe_res_next(cur, min_t(u32, size, PAGE_SIZE));
+			cur_ofs += 8;
+		}
+	}
+}
+
+#define EMIT_COPY_CCS_DW 5
+static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb,
+			  u64 dst_ofs, bool dst_is_indirect,
+			  u64 src_ofs, bool src_is_indirect,
+			  u32 size)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 *cs = bb->cs + bb->len;
+	u32 num_ccs_blks;
+	u32 num_pages;
+	u32 ccs_copy_size;
+	u32 mocs;
+
+	if (GRAPHICS_VERx100(xe) >= 2000) {
+		num_pages = DIV_ROUND_UP(size, XE_PAGE_SIZE);
+		xe_gt_assert(gt, FIELD_FIT(XE2_CCS_SIZE_MASK, num_pages - 1));
+
+		ccs_copy_size = REG_FIELD_PREP(XE2_CCS_SIZE_MASK, num_pages - 1);
+		mocs = FIELD_PREP(XE2_XY_CTRL_SURF_MOCS_INDEX_MASK, gt->mocs.uc_index);
+
+	} else {
+		num_ccs_blks = DIV_ROUND_UP(xe_device_ccs_bytes(gt_to_xe(gt), size),
+					    NUM_CCS_BYTES_PER_BLOCK);
+		xe_gt_assert(gt, FIELD_FIT(CCS_SIZE_MASK, num_ccs_blks - 1));
+
+		ccs_copy_size = REG_FIELD_PREP(CCS_SIZE_MASK, num_ccs_blks - 1);
+		mocs = FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, gt->mocs.uc_index);
+	}
+
+	*cs++ = XY_CTRL_SURF_COPY_BLT |
+		(src_is_indirect ? 0x0 : 0x1) << SRC_ACCESS_TYPE_SHIFT |
+		(dst_is_indirect ? 0x0 : 0x1) << DST_ACCESS_TYPE_SHIFT |
+		ccs_copy_size;
+	*cs++ = lower_32_bits(src_ofs);
+	*cs++ = upper_32_bits(src_ofs) | mocs;
+	*cs++ = lower_32_bits(dst_ofs);
+	*cs++ = upper_32_bits(dst_ofs) | mocs;
+
+	bb->len = cs - bb->cs;
+}
+
+#define EMIT_COPY_DW 10
+static void emit_copy(struct xe_gt *gt, struct xe_bb *bb,
+		      u64 src_ofs, u64 dst_ofs, unsigned int size,
+		      unsigned int pitch)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 mocs = 0;
+	u32 tile_y = 0;
+
+	xe_gt_assert(gt, size / pitch <= S16_MAX);
+	xe_gt_assert(gt, pitch / 4 <= S16_MAX);
+	xe_gt_assert(gt, pitch <= U16_MAX);
+
+	if (GRAPHICS_VER(xe) >= 20)
+		mocs = FIELD_PREP(XE2_XY_FAST_COPY_BLT_MOCS_INDEX_MASK, gt->mocs.uc_index);
+
+	if (GRAPHICS_VERx100(xe) >= 1250)
+		tile_y = XY_FAST_COPY_BLT_D1_SRC_TILE4 | XY_FAST_COPY_BLT_D1_DST_TILE4;
+
+	bb->cs[bb->len++] = XY_FAST_COPY_BLT_CMD | (10 - 2);
+	bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch | tile_y | mocs;
+	bb->cs[bb->len++] = 0;
+	bb->cs[bb->len++] = (size / pitch) << 16 | pitch / 4;
+	bb->cs[bb->len++] = lower_32_bits(dst_ofs);
+	bb->cs[bb->len++] = upper_32_bits(dst_ofs);
+	bb->cs[bb->len++] = 0;
+	bb->cs[bb->len++] = pitch | mocs;
+	bb->cs[bb->len++] = lower_32_bits(src_ofs);
+	bb->cs[bb->len++] = upper_32_bits(src_ofs);
+}
+
+static int job_add_deps(struct xe_sched_job *job, struct dma_resv *resv,
+			enum dma_resv_usage usage)
+{
+	return drm_sched_job_add_resv_dependencies(&job->drm, resv, usage);
+}
+
+static u64 xe_migrate_batch_base(struct xe_migrate *m, bool usm)
+{
+	return usm ? m->usm_batch_base_ofs : m->batch_base_ofs;
+}
+
+static u32 xe_migrate_ccs_copy(struct xe_migrate *m,
+			       struct xe_bb *bb,
+			       u64 src_ofs, bool src_is_indirect,
+			       u64 dst_ofs, bool dst_is_indirect, u32 dst_size,
+			       u64 ccs_ofs, bool copy_ccs)
+{
+	struct xe_gt *gt = m->tile->primary_gt;
+	u32 flush_flags = 0;
+
+	if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && dst_is_indirect) {
+		/*
+		 * If the src is already in vram, then it should already
+		 * have been cleared by us, or has been populated by the
+		 * user. Make sure we copy the CCS aux state as-is.
+		 *
+		 * Otherwise if the bo doesn't have any CCS metadata attached,
+		 * we still need to clear it for security reasons.
+		 */
+		u64 ccs_src_ofs =  src_is_indirect ? src_ofs : m->cleared_mem_ofs;
+
+		emit_copy_ccs(gt, bb,
+			      dst_ofs, true,
+			      ccs_src_ofs, src_is_indirect, dst_size);
+
+		flush_flags = MI_FLUSH_DW_CCS;
+	} else if (copy_ccs) {
+		if (!src_is_indirect)
+			src_ofs = ccs_ofs;
+		else if (!dst_is_indirect)
+			dst_ofs = ccs_ofs;
+
+		xe_gt_assert(gt, src_is_indirect || dst_is_indirect);
+
+		emit_copy_ccs(gt, bb, dst_ofs, dst_is_indirect, src_ofs,
+			      src_is_indirect, dst_size);
+		if (dst_is_indirect)
+			flush_flags = MI_FLUSH_DW_CCS;
+	}
+
+	return flush_flags;
+}
+
+/**
+ * xe_migrate_copy() - Copy content of TTM resources.
+ * @m: The migration context.
+ * @src_bo: The buffer object @src is currently bound to.
+ * @dst_bo: If copying between resources created for the same bo, set this to
+ * the same value as @src_bo. If copying between buffer objects, set it to
+ * the buffer object @dst is currently bound to.
+ * @src: The source TTM resource.
+ * @dst: The dst TTM resource.
+ * @copy_only_ccs: If true copy only CCS metadata
+ *
+ * Copies the contents of @src to @dst: On flat CCS devices,
+ * the CCS metadata is copied as well if needed, or if not present,
+ * the CCS metadata of @dst is cleared for security reasons.
+ *
+ * Return: Pointer to a dma_fence representing the last copy batch, or
+ * an error pointer on failure. If there is a failure, any copy operation
+ * started by the function call has been synced.
+ */
+struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
+				  struct xe_bo *src_bo,
+				  struct xe_bo *dst_bo,
+				  struct ttm_resource *src,
+				  struct ttm_resource *dst,
+				  bool copy_only_ccs)
+{
+	struct xe_gt *gt = m->tile->primary_gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	struct dma_fence *fence = NULL;
+	u64 size = src_bo->size;
+	struct xe_res_cursor src_it, dst_it, ccs_it;
+	u64 src_L0_ofs, dst_L0_ofs;
+	u32 src_L0_pt, dst_L0_pt;
+	u64 src_L0, dst_L0;
+	int pass = 0;
+	int err;
+	bool src_is_pltt = src->mem_type == XE_PL_TT;
+	bool dst_is_pltt = dst->mem_type == XE_PL_TT;
+	bool src_is_vram = mem_type_is_vram(src->mem_type);
+	bool dst_is_vram = mem_type_is_vram(dst->mem_type);
+	bool copy_ccs = xe_device_has_flat_ccs(xe) &&
+		xe_bo_needs_ccs_pages(src_bo) && xe_bo_needs_ccs_pages(dst_bo);
+	bool copy_system_ccs = copy_ccs && (!src_is_vram || !dst_is_vram);
+
+	/* Copying CCS between two different BOs is not supported yet. */
+	if (XE_WARN_ON(copy_ccs && src_bo != dst_bo))
+		return ERR_PTR(-EINVAL);
+
+	if (src_bo != dst_bo && XE_WARN_ON(src_bo->size != dst_bo->size))
+		return ERR_PTR(-EINVAL);
+
+	if (!src_is_vram)
+		xe_res_first_sg(xe_bo_sg(src_bo), 0, size, &src_it);
+	else
+		xe_res_first(src, 0, size, &src_it);
+	if (!dst_is_vram)
+		xe_res_first_sg(xe_bo_sg(dst_bo), 0, size, &dst_it);
+	else
+		xe_res_first(dst, 0, size, &dst_it);
+
+	if (copy_system_ccs)
+		xe_res_first_sg(xe_bo_sg(src_bo), xe_bo_ccs_pages_start(src_bo),
+				PAGE_ALIGN(xe_device_ccs_bytes(xe, size)),
+				&ccs_it);
+
+	while (size) {
+		u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */
+		struct xe_sched_job *job;
+		struct xe_bb *bb;
+		u32 flush_flags;
+		u32 update_idx;
+		u64 ccs_ofs, ccs_size;
+		u32 ccs_pt;
+
+		bool usm = xe->info.has_usm;
+		u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE;
+
+		src_L0 = xe_migrate_res_sizes(m, &src_it);
+		dst_L0 = xe_migrate_res_sizes(m, &dst_it);
+
+		drm_dbg(&xe->drm, "Pass %u, sizes: %llu & %llu\n",
+			pass++, src_L0, dst_L0);
+
+		src_L0 = min(src_L0, dst_L0);
+
+		batch_size += pte_update_size(m, src_is_vram, src, &src_it, &src_L0,
+					      &src_L0_ofs, &src_L0_pt, 0, 0,
+					      avail_pts);
+
+		batch_size += pte_update_size(m, dst_is_vram, dst, &dst_it, &src_L0,
+					      &dst_L0_ofs, &dst_L0_pt, 0,
+					      avail_pts, avail_pts);
+
+		if (copy_system_ccs) {
+			ccs_size = xe_device_ccs_bytes(xe, src_L0);
+			batch_size += pte_update_size(m, false, NULL, &ccs_it, &ccs_size,
+						      &ccs_ofs, &ccs_pt, 0,
+						      2 * avail_pts,
+						      avail_pts);
+			xe_assert(xe, IS_ALIGNED(ccs_it.start, PAGE_SIZE));
+		}
+
+		/* Add copy commands size here */
+		batch_size += ((copy_only_ccs) ? 0 : EMIT_COPY_DW) +
+			((xe_device_has_flat_ccs(xe) ? EMIT_COPY_CCS_DW : 0));
+
+		bb = xe_bb_new(gt, batch_size, usm);
+		if (IS_ERR(bb)) {
+			err = PTR_ERR(bb);
+			goto err_sync;
+		}
+
+		if (src_is_vram && xe_migrate_allow_identity(src_L0, &src_it))
+			xe_res_next(&src_it, src_L0);
+		else
+			emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs,
+				 &src_it, src_L0, src);
+
+		if (dst_is_vram && xe_migrate_allow_identity(src_L0, &dst_it))
+			xe_res_next(&dst_it, src_L0);
+		else
+			emit_pte(m, bb, dst_L0_pt, dst_is_vram, copy_system_ccs,
+				 &dst_it, src_L0, dst);
+
+		if (copy_system_ccs)
+			emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src);
+
+		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+		update_idx = bb->len;
+
+		if (!copy_only_ccs)
+			emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, XE_PAGE_SIZE);
+
+		flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs,
+						  IS_DGFX(xe) ? src_is_vram : src_is_pltt,
+						  dst_L0_ofs,
+						  IS_DGFX(xe) ? dst_is_vram : dst_is_pltt,
+						  src_L0, ccs_ofs, copy_ccs);
+
+		mutex_lock(&m->job_mutex);
+		job = xe_bb_create_migration_job(m->q, bb,
+						 xe_migrate_batch_base(m, usm),
+						 update_idx);
+		if (IS_ERR(job)) {
+			err = PTR_ERR(job);
+			goto err;
+		}
+
+		xe_sched_job_add_migrate_flush(job, flush_flags);
+		if (!fence) {
+			err = job_add_deps(job, src_bo->ttm.base.resv,
+					   DMA_RESV_USAGE_BOOKKEEP);
+			if (!err && src_bo != dst_bo)
+				err = job_add_deps(job, dst_bo->ttm.base.resv,
+						   DMA_RESV_USAGE_BOOKKEEP);
+			if (err)
+				goto err_job;
+		}
+
+		xe_sched_job_arm(job);
+		dma_fence_put(fence);
+		fence = dma_fence_get(&job->drm.s_fence->finished);
+		xe_sched_job_push(job);
+
+		dma_fence_put(m->fence);
+		m->fence = dma_fence_get(fence);
+
+		mutex_unlock(&m->job_mutex);
+
+		xe_bb_free(bb, fence);
+		size -= src_L0;
+		continue;
+
+err_job:
+		xe_sched_job_put(job);
+err:
+		mutex_unlock(&m->job_mutex);
+		xe_bb_free(bb, NULL);
+
+err_sync:
+		/* Sync partial copy if any. FIXME: under job_mutex? */
+		if (fence) {
+			dma_fence_wait(fence, false);
+			dma_fence_put(fence);
+		}
+
+		return ERR_PTR(err);
+	}
+
+	return fence;
+}
+
+static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
+				 u32 size, u32 pitch)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 *cs = bb->cs + bb->len;
+	u32 len = PVC_MEM_SET_CMD_LEN_DW;
+
+	*cs++ = PVC_MEM_SET_CMD | PVC_MEM_SET_MATRIX | (len - 2);
+	*cs++ = pitch - 1;
+	*cs++ = (size / pitch) - 1;
+	*cs++ = pitch - 1;
+	*cs++ = lower_32_bits(src_ofs);
+	*cs++ = upper_32_bits(src_ofs);
+	if (GRAPHICS_VERx100(xe) >= 2000)
+		*cs++ = FIELD_PREP(XE2_MEM_SET_MOCS_INDEX_MASK, gt->mocs.uc_index);
+	else
+		*cs++ = FIELD_PREP(PVC_MEM_SET_MOCS_INDEX_MASK, gt->mocs.uc_index);
+
+	xe_gt_assert(gt, cs - bb->cs == len + bb->len);
+
+	bb->len += len;
+}
+
+static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb,
+				 u64 src_ofs, u32 size, u32 pitch, bool is_vram)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 *cs = bb->cs + bb->len;
+	u32 len = XY_FAST_COLOR_BLT_DW;
+
+	if (GRAPHICS_VERx100(xe) < 1250)
+		len = 11;
+
+	*cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 |
+		(len - 2);
+	if (GRAPHICS_VERx100(xe) >= 2000)
+		*cs++ = FIELD_PREP(XE2_XY_FAST_COLOR_BLT_MOCS_INDEX_MASK, gt->mocs.uc_index) |
+			(pitch - 1);
+	else
+		*cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, gt->mocs.uc_index) |
+			(pitch - 1);
+	*cs++ = 0;
+	*cs++ = (size / pitch) << 16 | pitch / 4;
+	*cs++ = lower_32_bits(src_ofs);
+	*cs++ = upper_32_bits(src_ofs);
+	*cs++ = (is_vram ? 0x0 : 0x1) <<  XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT;
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0;
+
+	if (len > 11) {
+		*cs++ = 0;
+		*cs++ = 0;
+		*cs++ = 0;
+		*cs++ = 0;
+		*cs++ = 0;
+	}
+
+	xe_gt_assert(gt, cs - bb->cs == len + bb->len);
+
+	bb->len += len;
+}
+
+static bool has_service_copy_support(struct xe_gt *gt)
+{
+	/*
+	 * What we care about is whether the architecture was designed with
+	 * service copy functionality (specifically the new MEM_SET / MEM_COPY
+	 * instructions) so check the architectural engine list rather than the
+	 * actual list since these instructions are usable on BCS0 even if
+	 * all of the actual service copy engines (BCS1-BCS8) have been fused
+	 * off.
+	 */
+	return gt->info.__engine_mask & GENMASK(XE_HW_ENGINE_BCS8,
+						XE_HW_ENGINE_BCS1);
+}
+
+static u32 emit_clear_cmd_len(struct xe_gt *gt)
+{
+	if (has_service_copy_support(gt))
+		return PVC_MEM_SET_CMD_LEN_DW;
+	else
+		return XY_FAST_COLOR_BLT_DW;
+}
+
+static void emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
+		       u32 size, u32 pitch, bool is_vram)
+{
+	if (has_service_copy_support(gt))
+		emit_clear_link_copy(gt, bb, src_ofs, size, pitch);
+	else
+		emit_clear_main_copy(gt, bb, src_ofs, size, pitch,
+				     is_vram);
+}
+
+/**
+ * xe_migrate_clear() - Copy content of TTM resources.
+ * @m: The migration context.
+ * @bo: The buffer object @dst is currently bound to.
+ * @dst: The dst TTM resource to be cleared.
+ *
+ * Clear the contents of @dst to zero. On flat CCS devices,
+ * the CCS metadata is cleared to zero as well on VRAM destinations.
+ * TODO: Eliminate the @bo argument.
+ *
+ * Return: Pointer to a dma_fence representing the last clear batch, or
+ * an error pointer on failure. If there is a failure, any clear operation
+ * started by the function call has been synced.
+ */
+struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
+				   struct xe_bo *bo,
+				   struct ttm_resource *dst)
+{
+	bool clear_vram = mem_type_is_vram(dst->mem_type);
+	struct xe_gt *gt = m->tile->primary_gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	bool clear_system_ccs = (xe_bo_needs_ccs_pages(bo) && !IS_DGFX(xe)) ? true : false;
+	struct dma_fence *fence = NULL;
+	u64 size = bo->size;
+	struct xe_res_cursor src_it;
+	struct ttm_resource *src = dst;
+	int err;
+	int pass = 0;
+
+	if (!clear_vram)
+		xe_res_first_sg(xe_bo_sg(bo), 0, bo->size, &src_it);
+	else
+		xe_res_first(src, 0, bo->size, &src_it);
+
+	while (size) {
+		u64 clear_L0_ofs;
+		u32 clear_L0_pt;
+		u32 flush_flags = 0;
+		u64 clear_L0;
+		struct xe_sched_job *job;
+		struct xe_bb *bb;
+		u32 batch_size, update_idx;
+
+		bool usm = xe->info.has_usm;
+		u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE;
+
+		clear_L0 = xe_migrate_res_sizes(m, &src_it);
+
+		drm_dbg(&xe->drm, "Pass %u, size: %llu\n", pass++, clear_L0);
+
+		/* Calculate final sizes and batch size.. */
+		batch_size = 2 +
+			pte_update_size(m, clear_vram, src, &src_it,
+					&clear_L0, &clear_L0_ofs, &clear_L0_pt,
+					clear_system_ccs ? 0 : emit_clear_cmd_len(gt), 0,
+					avail_pts);
+
+		if (xe_device_has_flat_ccs(xe))
+			batch_size += EMIT_COPY_CCS_DW;
+
+		/* Clear commands */
+
+		if (WARN_ON_ONCE(!clear_L0))
+			break;
+
+		bb = xe_bb_new(gt, batch_size, usm);
+		if (IS_ERR(bb)) {
+			err = PTR_ERR(bb);
+			goto err_sync;
+		}
+
+		size -= clear_L0;
+		/* Preemption is enabled again by the ring ops. */
+		if (clear_vram && xe_migrate_allow_identity(clear_L0, &src_it))
+			xe_res_next(&src_it, clear_L0);
+		else
+			emit_pte(m, bb, clear_L0_pt, clear_vram, clear_system_ccs,
+				 &src_it, clear_L0, dst);
+
+		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+		update_idx = bb->len;
+
+		if (!clear_system_ccs)
+			emit_clear(gt, bb, clear_L0_ofs, clear_L0, XE_PAGE_SIZE, clear_vram);
+
+		if (xe_device_has_flat_ccs(xe)) {
+			emit_copy_ccs(gt, bb, clear_L0_ofs, true,
+				      m->cleared_mem_ofs, false, clear_L0);
+			flush_flags = MI_FLUSH_DW_CCS;
+		}
+
+		mutex_lock(&m->job_mutex);
+		job = xe_bb_create_migration_job(m->q, bb,
+						 xe_migrate_batch_base(m, usm),
+						 update_idx);
+		if (IS_ERR(job)) {
+			err = PTR_ERR(job);
+			goto err;
+		}
+
+		xe_sched_job_add_migrate_flush(job, flush_flags);
+		if (!fence) {
+			/*
+			 * There can't be anything userspace related at this
+			 * point, so we just need to respect any potential move
+			 * fences, which are always tracked as
+			 * DMA_RESV_USAGE_KERNEL.
+			 */
+			err = job_add_deps(job, bo->ttm.base.resv,
+					   DMA_RESV_USAGE_KERNEL);
+			if (err)
+				goto err_job;
+		}
+
+		xe_sched_job_arm(job);
+		dma_fence_put(fence);
+		fence = dma_fence_get(&job->drm.s_fence->finished);
+		xe_sched_job_push(job);
+
+		dma_fence_put(m->fence);
+		m->fence = dma_fence_get(fence);
+
+		mutex_unlock(&m->job_mutex);
+
+		xe_bb_free(bb, fence);
+		continue;
+
+err_job:
+		xe_sched_job_put(job);
+err:
+		mutex_unlock(&m->job_mutex);
+		xe_bb_free(bb, NULL);
+err_sync:
+		/* Sync partial copies if any. FIXME: job_mutex? */
+		if (fence) {
+			dma_fence_wait(m->fence, false);
+			dma_fence_put(fence);
+		}
+
+		return ERR_PTR(err);
+	}
+
+	if (clear_system_ccs)
+		bo->ccs_cleared = true;
+
+	return fence;
+}
+
+static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs,
+			  const struct xe_vm_pgtable_update *update,
+			  struct xe_migrate_pt_update *pt_update)
+{
+	const struct xe_migrate_pt_update_ops *ops = pt_update->ops;
+	u32 chunk;
+	u32 ofs = update->ofs, size = update->qwords;
+
+	/*
+	 * If we have 512 entries (max), we would populate it ourselves,
+	 * and update the PDE above it to the new pointer.
+	 * The only time this can only happen if we have to update the top
+	 * PDE. This requires a BO that is almost vm->size big.
+	 *
+	 * This shouldn't be possible in practice.. might change when 16K
+	 * pages are used. Hence the assert.
+	 */
+	xe_tile_assert(tile, update->qwords <= 0x1ff);
+	if (!ppgtt_ofs)
+		ppgtt_ofs = xe_migrate_vram_ofs(tile_to_xe(tile),
+						xe_bo_addr(update->pt_bo, 0,
+							   XE_PAGE_SIZE));
+
+	do {
+		u64 addr = ppgtt_ofs + ofs * 8;
+
+		chunk = min(update->qwords, 0x1ffU);
+
+		/* Ensure populatefn can do memset64 by aligning bb->cs */
+		if (!(bb->len & 1))
+			bb->cs[bb->len++] = MI_NOOP;
+
+		bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk);
+		bb->cs[bb->len++] = lower_32_bits(addr);
+		bb->cs[bb->len++] = upper_32_bits(addr);
+		ops->populate(pt_update, tile, NULL, bb->cs + bb->len, ofs, chunk,
+			      update);
+
+		bb->len += chunk * 2;
+		ofs += chunk;
+		size -= chunk;
+	} while (size);
+}
+
+struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m)
+{
+	return xe_vm_get(m->q->vm);
+}
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+struct migrate_test_params {
+	struct xe_test_priv base;
+	bool force_gpu;
+};
+
+#define to_migrate_test_params(_priv) \
+	container_of(_priv, struct migrate_test_params, base)
+#endif
+
+static struct dma_fence *
+xe_migrate_update_pgtables_cpu(struct xe_migrate *m,
+			       struct xe_vm *vm, struct xe_bo *bo,
+			       const struct  xe_vm_pgtable_update *updates,
+			       u32 num_updates, bool wait_vm,
+			       struct xe_migrate_pt_update *pt_update)
+{
+	XE_TEST_DECLARE(struct migrate_test_params *test =
+			to_migrate_test_params
+			(xe_cur_kunit_priv(XE_TEST_LIVE_MIGRATE));)
+	const struct xe_migrate_pt_update_ops *ops = pt_update->ops;
+	struct dma_fence *fence;
+	int err;
+	u32 i;
+
+	if (XE_TEST_ONLY(test && test->force_gpu))
+		return ERR_PTR(-ETIME);
+
+	if (bo && !dma_resv_test_signaled(bo->ttm.base.resv,
+					  DMA_RESV_USAGE_KERNEL))
+		return ERR_PTR(-ETIME);
+
+	if (wait_vm && !dma_resv_test_signaled(xe_vm_resv(vm),
+					       DMA_RESV_USAGE_BOOKKEEP))
+		return ERR_PTR(-ETIME);
+
+	if (ops->pre_commit) {
+		pt_update->job = NULL;
+		err = ops->pre_commit(pt_update);
+		if (err)
+			return ERR_PTR(err);
+	}
+	for (i = 0; i < num_updates; i++) {
+		const struct xe_vm_pgtable_update *update = &updates[i];
+
+		ops->populate(pt_update, m->tile, &update->pt_bo->vmap, NULL,
+			      update->ofs, update->qwords, update);
+	}
+
+	if (vm) {
+		trace_xe_vm_cpu_bind(vm);
+		xe_device_wmb(vm->xe);
+	}
+
+	fence = dma_fence_get_stub();
+
+	return fence;
+}
+
+static bool no_in_syncs(struct xe_vm *vm, struct xe_exec_queue *q,
+			struct xe_sync_entry *syncs, u32 num_syncs)
+{
+	struct dma_fence *fence;
+	int i;
+
+	for (i = 0; i < num_syncs; i++) {
+		fence = syncs[i].fence;
+
+		if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+				       &fence->flags))
+			return false;
+	}
+	if (q) {
+		fence = xe_exec_queue_last_fence_get(q, vm);
+		if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
+			dma_fence_put(fence);
+			return false;
+		}
+		dma_fence_put(fence);
+	}
+
+	return true;
+}
+
+/**
+ * xe_migrate_update_pgtables() - Pipelined page-table update
+ * @m: The migrate context.
+ * @vm: The vm we'll be updating.
+ * @bo: The bo whose dma-resv we will await before updating, or NULL if userptr.
+ * @q: The exec queue to be used for the update or NULL if the default
+ * migration engine is to be used.
+ * @updates: An array of update descriptors.
+ * @num_updates: Number of descriptors in @updates.
+ * @syncs: Array of xe_sync_entry to await before updating. Note that waits
+ * will block the engine timeline.
+ * @num_syncs: Number of entries in @syncs.
+ * @pt_update: Pointer to a struct xe_migrate_pt_update, which contains
+ * pointers to callback functions and, if subclassed, private arguments to
+ * those.
+ *
+ * Perform a pipelined page-table update. The update descriptors are typically
+ * built under the same lock critical section as a call to this function. If
+ * using the default engine for the updates, they will be performed in the
+ * order they grab the job_mutex. If different engines are used, external
+ * synchronization is needed for overlapping updates to maintain page-table
+ * consistency. Note that the meaing of "overlapping" is that the updates
+ * touch the same page-table, which might be a higher-level page-directory.
+ * If no pipelining is needed, then updates may be performed by the cpu.
+ *
+ * Return: A dma_fence that, when signaled, indicates the update completion.
+ */
+struct dma_fence *
+xe_migrate_update_pgtables(struct xe_migrate *m,
+			   struct xe_vm *vm,
+			   struct xe_bo *bo,
+			   struct xe_exec_queue *q,
+			   const struct xe_vm_pgtable_update *updates,
+			   u32 num_updates,
+			   struct xe_sync_entry *syncs, u32 num_syncs,
+			   struct xe_migrate_pt_update *pt_update)
+{
+	const struct xe_migrate_pt_update_ops *ops = pt_update->ops;
+	struct xe_tile *tile = m->tile;
+	struct xe_gt *gt = tile->primary_gt;
+	struct xe_device *xe = tile_to_xe(tile);
+	struct xe_sched_job *job;
+	struct dma_fence *fence;
+	struct drm_suballoc *sa_bo = NULL;
+	struct xe_vma *vma = pt_update->vma;
+	struct xe_bb *bb;
+	u32 i, batch_size, ppgtt_ofs, update_idx, page_ofs = 0;
+	u64 addr;
+	int err = 0;
+	bool usm = !q && xe->info.has_usm;
+	bool first_munmap_rebind = vma &&
+		vma->gpuva.flags & XE_VMA_FIRST_REBIND;
+	struct xe_exec_queue *q_override = !q ? m->q : q;
+	u16 pat_index = xe->pat.idx[XE_CACHE_WB];
+
+	/* Use the CPU if no in syncs and engine is idle */
+	if (no_in_syncs(vm, q, syncs, num_syncs) && xe_exec_queue_is_idle(q_override)) {
+		fence =  xe_migrate_update_pgtables_cpu(m, vm, bo, updates,
+							num_updates,
+							first_munmap_rebind,
+							pt_update);
+		if (!IS_ERR(fence) || fence == ERR_PTR(-EAGAIN))
+			return fence;
+	}
+
+	/* fixed + PTE entries */
+	if (IS_DGFX(xe))
+		batch_size = 2;
+	else
+		batch_size = 6 + num_updates * 2;
+
+	for (i = 0; i < num_updates; i++) {
+		u32 num_cmds = DIV_ROUND_UP(updates[i].qwords, 0x1ff);
+
+		/* align noop + MI_STORE_DATA_IMM cmd prefix */
+		batch_size += 4 * num_cmds + updates[i].qwords * 2;
+	}
+
+	/*
+	 * XXX: Create temp bo to copy from, if batch_size becomes too big?
+	 *
+	 * Worst case: Sum(2 * (each lower level page size) + (top level page size))
+	 * Should be reasonably bound..
+	 */
+	xe_tile_assert(tile, batch_size < SZ_128K);
+
+	bb = xe_bb_new(gt, batch_size, !q && xe->info.has_usm);
+	if (IS_ERR(bb))
+		return ERR_CAST(bb);
+
+	/* For sysmem PTE's, need to map them in our hole.. */
+	if (!IS_DGFX(xe)) {
+		ppgtt_ofs = NUM_KERNEL_PDE - 1;
+		if (q) {
+			xe_tile_assert(tile, num_updates <= NUM_VMUSA_WRITES_PER_UNIT);
+
+			sa_bo = drm_suballoc_new(&m->vm_update_sa, 1,
+						 GFP_KERNEL, true, 0);
+			if (IS_ERR(sa_bo)) {
+				err = PTR_ERR(sa_bo);
+				goto err;
+			}
+
+			ppgtt_ofs = NUM_KERNEL_PDE +
+				(drm_suballoc_soffset(sa_bo) /
+				 NUM_VMUSA_UNIT_PER_PAGE);
+			page_ofs = (drm_suballoc_soffset(sa_bo) %
+				    NUM_VMUSA_UNIT_PER_PAGE) *
+				VM_SA_UPDATE_UNIT_SIZE;
+		}
+
+		/* Map our PT's to gtt */
+		bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(num_updates);
+		bb->cs[bb->len++] = ppgtt_ofs * XE_PAGE_SIZE + page_ofs;
+		bb->cs[bb->len++] = 0; /* upper_32_bits */
+
+		for (i = 0; i < num_updates; i++) {
+			struct xe_bo *pt_bo = updates[i].pt_bo;
+
+			xe_tile_assert(tile, pt_bo->size == SZ_4K);
+
+			addr = vm->pt_ops->pte_encode_bo(pt_bo, 0, pat_index, 0);
+			bb->cs[bb->len++] = lower_32_bits(addr);
+			bb->cs[bb->len++] = upper_32_bits(addr);
+		}
+
+		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+		update_idx = bb->len;
+
+		addr = xe_migrate_vm_addr(ppgtt_ofs, 0) +
+			(page_ofs / sizeof(u64)) * XE_PAGE_SIZE;
+		for (i = 0; i < num_updates; i++)
+			write_pgtable(tile, bb, addr + i * XE_PAGE_SIZE,
+				      &updates[i], pt_update);
+	} else {
+		/* phys pages, no preamble required */
+		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+		update_idx = bb->len;
+
+		for (i = 0; i < num_updates; i++)
+			write_pgtable(tile, bb, 0, &updates[i], pt_update);
+	}
+
+	if (!q)
+		mutex_lock(&m->job_mutex);
+
+	job = xe_bb_create_migration_job(q ?: m->q, bb,
+					 xe_migrate_batch_base(m, usm),
+					 update_idx);
+	if (IS_ERR(job)) {
+		err = PTR_ERR(job);
+		goto err_bb;
+	}
+
+	/* Wait on BO move */
+	if (bo) {
+		err = job_add_deps(job, bo->ttm.base.resv,
+				   DMA_RESV_USAGE_KERNEL);
+		if (err)
+			goto err_job;
+	}
+
+	/*
+	 * Munmap style VM unbind, need to wait for all jobs to be complete /
+	 * trigger preempts before moving forward
+	 */
+	if (first_munmap_rebind) {
+		err = job_add_deps(job, xe_vm_resv(vm),
+				   DMA_RESV_USAGE_BOOKKEEP);
+		if (err)
+			goto err_job;
+	}
+
+	err = xe_sched_job_last_fence_add_dep(job, vm);
+	for (i = 0; !err && i < num_syncs; i++)
+		err = xe_sync_entry_add_deps(&syncs[i], job);
+
+	if (err)
+		goto err_job;
+
+	if (ops->pre_commit) {
+		pt_update->job = job;
+		err = ops->pre_commit(pt_update);
+		if (err)
+			goto err_job;
+	}
+	xe_sched_job_arm(job);
+	fence = dma_fence_get(&job->drm.s_fence->finished);
+	xe_sched_job_push(job);
+
+	if (!q)
+		mutex_unlock(&m->job_mutex);
+
+	xe_bb_free(bb, fence);
+	drm_suballoc_free(sa_bo, fence);
+
+	return fence;
+
+err_job:
+	xe_sched_job_put(job);
+err_bb:
+	if (!q)
+		mutex_unlock(&m->job_mutex);
+	xe_bb_free(bb, NULL);
+err:
+	drm_suballoc_free(sa_bo, NULL);
+	return ERR_PTR(err);
+}
+
+/**
+ * xe_migrate_wait() - Complete all operations using the xe_migrate context
+ * @m: Migrate context to wait for.
+ *
+ * Waits until the GPU no longer uses the migrate context's default engine
+ * or its page-table objects. FIXME: What about separate page-table update
+ * engines?
+ */
+void xe_migrate_wait(struct xe_migrate *m)
+{
+	if (m->fence)
+		dma_fence_wait(m->fence, false);
+}
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_migrate.c"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
new file mode 100644
index 000000000000..951f19318ea4
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_migrate.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef _XE_MIGRATE_
+#define _XE_MIGRATE_
+
+#include <drm/drm_mm.h>
+
+struct dma_fence;
+struct iosys_map;
+struct ttm_resource;
+
+struct xe_bo;
+struct xe_gt;
+struct xe_exec_queue;
+struct xe_migrate;
+struct xe_migrate_pt_update;
+struct xe_sync_entry;
+struct xe_pt;
+struct xe_tile;
+struct xe_vm;
+struct xe_vm_pgtable_update;
+struct xe_vma;
+
+/**
+ * struct xe_migrate_pt_update_ops - Callbacks for the
+ * xe_migrate_update_pgtables() function.
+ */
+struct xe_migrate_pt_update_ops {
+	/**
+	 * @populate: Populate a command buffer or page-table with ptes.
+	 * @pt_update: Embeddable callback argument.
+	 * @tile: The tile for the current operation.
+	 * @map: struct iosys_map into the memory to be populated.
+	 * @pos: If @map is NULL, map into the memory to be populated.
+	 * @ofs: qword offset into @map, unused if @map is NULL.
+	 * @num_qwords: Number of qwords to write.
+	 * @update: Information about the PTEs to be inserted.
+	 *
+	 * This interface is intended to be used as a callback into the
+	 * page-table system to populate command buffers or shared
+	 * page-tables with PTEs.
+	 */
+	void (*populate)(struct xe_migrate_pt_update *pt_update,
+			 struct xe_tile *tile, struct iosys_map *map,
+			 void *pos, u32 ofs, u32 num_qwords,
+			 const struct xe_vm_pgtable_update *update);
+
+	/**
+	 * @pre_commit: Callback to be called just before arming the
+	 * sched_job.
+	 * @pt_update: Pointer to embeddable callback argument.
+	 *
+	 * Return: 0 on success, negative error code on error.
+	 */
+	int (*pre_commit)(struct xe_migrate_pt_update *pt_update);
+};
+
+/**
+ * struct xe_migrate_pt_update - Argument to the
+ * struct xe_migrate_pt_update_ops callbacks.
+ *
+ * Intended to be subclassed to support additional arguments if necessary.
+ */
+struct xe_migrate_pt_update {
+	/** @ops: Pointer to the struct xe_migrate_pt_update_ops callbacks */
+	const struct xe_migrate_pt_update_ops *ops;
+	/** @vma: The vma we're updating the pagetable for. */
+	struct xe_vma *vma;
+	/** @job: The job if a GPU page-table update. NULL otherwise */
+	struct xe_sched_job *job;
+	/** @start: Start of update for the range fence */
+	u64 start;
+	/** @last: Last of update for the range fence */
+	u64 last;
+	/** @tile_id: Tile ID of the update */
+	u8 tile_id;
+};
+
+struct xe_migrate *xe_migrate_init(struct xe_tile *tile);
+
+struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
+				  struct xe_bo *src_bo,
+				  struct xe_bo *dst_bo,
+				  struct ttm_resource *src,
+				  struct ttm_resource *dst,
+				  bool copy_only_ccs);
+
+struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
+				   struct xe_bo *bo,
+				   struct ttm_resource *dst);
+
+struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m);
+
+struct dma_fence *
+xe_migrate_update_pgtables(struct xe_migrate *m,
+			   struct xe_vm *vm,
+			   struct xe_bo *bo,
+			   struct xe_exec_queue *q,
+			   const struct xe_vm_pgtable_update *updates,
+			   u32 num_updates,
+			   struct xe_sync_entry *syncs, u32 num_syncs,
+			   struct xe_migrate_pt_update *pt_update);
+
+void xe_migrate_wait(struct xe_migrate *m);
+
+struct xe_exec_queue *xe_tile_migrate_engine(struct xe_tile *tile);
+#endif
diff --git a/drivers/gpu/drm/xe/xe_migrate_doc.h b/drivers/gpu/drm/xe/xe_migrate_doc.h
new file mode 100644
index 000000000000..63c7d67b5b62
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_migrate_doc.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_MIGRATE_DOC_H_
+#define _XE_MIGRATE_DOC_H_
+
+/**
+ * DOC: Migrate Layer
+ *
+ * The XE migrate layer is used generate jobs which can copy memory (eviction),
+ * clear memory, or program tables (binds). This layer exists in every GT, has
+ * a migrate engine, and uses a special VM for all generated jobs.
+ *
+ * Special VM details
+ * ==================
+ *
+ * The special VM is configured with a page structure where we can dynamically
+ * map BOs which need to be copied and cleared, dynamically map other VM's page
+ * table BOs for updates, and identity map the entire device's VRAM with 1 GB
+ * pages.
+ *
+ * Currently the page structure consists of 32 physical pages with 16 being
+ * reserved for BO mapping during copies and clear, 1 reserved for kernel binds,
+ * several pages are needed to setup the identity mappings (exact number based
+ * on how many bits of address space the device has), and the rest are reserved
+ * user bind operations.
+ *
+ * TODO: Diagram of layout
+ *
+ * Bind jobs
+ * =========
+ *
+ * A bind job consist of two batches and runs either on the migrate engine
+ * (kernel binds) or the bind engine passed in (user binds). In both cases the
+ * VM of the engine is the migrate VM.
+ *
+ * The first batch is used to update the migration VM page structure to point to
+ * the bind VM page table BOs which need to be updated. A physical page is
+ * required for this. If it is a user bind, the page is allocated from pool of
+ * pages reserved user bind operations with drm_suballoc managing this pool. If
+ * it is a kernel bind, the page reserved for kernel binds is used.
+ *
+ * The first batch is only required for devices without VRAM as when the device
+ * has VRAM the bind VM page table BOs are in VRAM and the identity mapping can
+ * be used.
+ *
+ * The second batch is used to program page table updated in the bind VM. Why
+ * not just one batch? Well the TLBs need to be invalidated between these two
+ * batches and that only can be done from the ring.
+ *
+ * When the bind job complete, the page allocated is returned the pool of pages
+ * reserved for user bind operations if a user bind. No need do this for kernel
+ * binds as the reserved kernel page is serially used by each job.
+ *
+ * Copy / clear jobs
+ * =================
+ *
+ * A copy or clear job consist of two batches and runs on the migrate engine.
+ *
+ * Like binds, the first batch is used update the migration VM page structure.
+ * In copy jobs, we need to map the source and destination of the BO into page
+ * the structure. In clear jobs, we just need to add 1 mapping of BO into the
+ * page structure. We use the 16 reserved pages in migration VM for mappings,
+ * this gives us a maximum copy size of 16 MB and maximum clear size of 32 MB.
+ *
+ * The second batch is used do either do the copy or clear. Again similar to
+ * binds, two batches are required as the TLBs need to be invalidated from the
+ * ring between the batches.
+ *
+ * More than one job will be generated if the BO is larger than maximum copy /
+ * clear size.
+ *
+ * Future work
+ * ===========
+ *
+ * Update copy and clear code to use identity mapped VRAM.
+ *
+ * Can we rework the use of the pages async binds to use all the entries in each
+ * page?
+ *
+ * Using large pages for sysmem mappings.
+ *
+ * Is it possible to identity map the sysmem? We should explore this.
+ */
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
new file mode 100644
index 000000000000..02f7808f28ca
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -0,0 +1,524 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021-2023 Intel Corporation
+ */
+
+#include <linux/minmax.h>
+
+#include "xe_mmio.h"
+
+#include <drm/drm_managed.h>
+#include <drm/xe_drm.h>
+
+#include "regs/xe_engine_regs.h"
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_regs.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_ggtt.h"
+#include "xe_gt.h"
+#include "xe_gt_mcr.h"
+#include "xe_macros.h"
+#include "xe_module.h"
+#include "xe_tile.h"
+
+#define XEHP_MTCFG_ADDR		XE_REG(0x101800)
+#define TILE_COUNT		REG_GENMASK(15, 8)
+
+#define BAR_SIZE_SHIFT 20
+
+static void
+_resize_bar(struct xe_device *xe, int resno, resource_size_t size)
+{
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	int bar_size = pci_rebar_bytes_to_size(size);
+	int ret;
+
+	if (pci_resource_len(pdev, resno))
+		pci_release_resource(pdev, resno);
+
+	ret = pci_resize_resource(pdev, resno, bar_size);
+	if (ret) {
+		drm_info(&xe->drm, "Failed to resize BAR%d to %dM (%pe). Consider enabling 'Resizable BAR' support in your BIOS\n",
+			 resno, 1 << bar_size, ERR_PTR(ret));
+		return;
+	}
+
+	drm_info(&xe->drm, "BAR%d resized to %dM\n", resno, 1 << bar_size);
+}
+
+/*
+ * if force_vram_bar_size is set, attempt to set to the requested size
+ * else set to maximum possible size
+ */
+static void xe_resize_vram_bar(struct xe_device *xe)
+{
+	u64 force_vram_bar_size = xe_modparam.force_vram_bar_size;
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	struct pci_bus *root = pdev->bus;
+	resource_size_t current_size;
+	resource_size_t rebar_size;
+	struct resource *root_res;
+	u32 bar_size_mask;
+	u32 pci_cmd;
+	int i;
+
+	/* gather some relevant info */
+	current_size = pci_resource_len(pdev, LMEM_BAR);
+	bar_size_mask = pci_rebar_get_possible_sizes(pdev, LMEM_BAR);
+
+	if (!bar_size_mask)
+		return;
+
+	/* set to a specific size? */
+	if (force_vram_bar_size) {
+		u32 bar_size_bit;
+
+		rebar_size = force_vram_bar_size * (resource_size_t)SZ_1M;
+
+		bar_size_bit = bar_size_mask & BIT(pci_rebar_bytes_to_size(rebar_size));
+
+		if (!bar_size_bit) {
+			drm_info(&xe->drm,
+				 "Requested size: %lluMiB is not supported by rebar sizes: 0x%x. Leaving default: %lluMiB\n",
+				 (u64)rebar_size >> 20, bar_size_mask, (u64)current_size >> 20);
+			return;
+		}
+
+		rebar_size = 1ULL << (__fls(bar_size_bit) + BAR_SIZE_SHIFT);
+
+		if (rebar_size == current_size)
+			return;
+	} else {
+		rebar_size = 1ULL << (__fls(bar_size_mask) + BAR_SIZE_SHIFT);
+
+		/* only resize if larger than current */
+		if (rebar_size <= current_size)
+			return;
+	}
+
+	drm_info(&xe->drm, "Attempting to resize bar from %lluMiB -> %lluMiB\n",
+		 (u64)current_size >> 20, (u64)rebar_size >> 20);
+
+	while (root->parent)
+		root = root->parent;
+
+	pci_bus_for_each_resource(root, root_res, i) {
+		if (root_res && root_res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
+		    (u64)root_res->start > 0x100000000ul)
+			break;
+	}
+
+	if (!root_res) {
+		drm_info(&xe->drm, "Can't resize VRAM BAR - platform support is missing. Consider enabling 'Resizable BAR' support in your BIOS\n");
+		return;
+	}
+
+	pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd);
+	pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd & ~PCI_COMMAND_MEMORY);
+
+	_resize_bar(xe, LMEM_BAR, rebar_size);
+
+	pci_assign_unassigned_bus_resources(pdev->bus);
+	pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd);
+}
+
+static bool xe_pci_resource_valid(struct pci_dev *pdev, int bar)
+{
+	if (!pci_resource_flags(pdev, bar))
+		return false;
+
+	if (pci_resource_flags(pdev, bar) & IORESOURCE_UNSET)
+		return false;
+
+	if (!pci_resource_len(pdev, bar))
+		return false;
+
+	return true;
+}
+
+static int xe_determine_lmem_bar_size(struct xe_device *xe)
+{
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+
+	if (!xe_pci_resource_valid(pdev, LMEM_BAR)) {
+		drm_err(&xe->drm, "pci resource is not valid\n");
+		return -ENXIO;
+	}
+
+	xe_resize_vram_bar(xe);
+
+	xe->mem.vram.io_start = pci_resource_start(pdev, LMEM_BAR);
+	xe->mem.vram.io_size = pci_resource_len(pdev, LMEM_BAR);
+	if (!xe->mem.vram.io_size)
+		return -EIO;
+
+	/* XXX: Need to change when xe link code is ready */
+	xe->mem.vram.dpa_base = 0;
+
+	/* set up a map to the total memory area. */
+	xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.io_size);
+
+	return 0;
+}
+
+/**
+ * xe_mmio_tile_vram_size() - Collect vram size and offset information
+ * @tile: tile to get info for
+ * @vram_size: available vram (size - device reserved portions)
+ * @tile_size: actual vram size
+ * @tile_offset: physical start point in the vram address space
+ *
+ * There are 4 places for size information:
+ * - io size (from pci_resource_len of LMEM bar) (only used for small bar and DG1)
+ * - TILEx size (actual vram size)
+ * - GSMBASE offset (TILEx - "stolen")
+ * - CSSBASE offset (TILEx - CSS space necessary)
+ *
+ * CSSBASE is always a lower/smaller offset then GSMBASE.
+ *
+ * The actual available size of memory is to the CCS or GSM base.
+ * NOTE: multi-tile bases will include the tile offset.
+ *
+ */
+static int xe_mmio_tile_vram_size(struct xe_tile *tile, u64 *vram_size,
+				  u64 *tile_size, u64 *tile_offset)
+{
+	struct xe_device *xe = tile_to_xe(tile);
+	struct xe_gt *gt = tile->primary_gt;
+	u64 offset;
+	int err;
+	u32 reg;
+
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		return err;
+
+	/* actual size */
+	if (unlikely(xe->info.platform == XE_DG1)) {
+		*tile_size = pci_resource_len(to_pci_dev(xe->drm.dev), LMEM_BAR);
+		*tile_offset = 0;
+	} else {
+		reg = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE_ADDR_RANGE(gt->info.id));
+		*tile_size = (u64)REG_FIELD_GET(GENMASK(14, 8), reg) * SZ_1G;
+		*tile_offset = (u64)REG_FIELD_GET(GENMASK(7, 1), reg) * SZ_1G;
+	}
+
+	/* minus device usage */
+	if (xe->info.has_flat_ccs) {
+		reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR);
+		offset = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K;
+	} else {
+		offset = xe_mmio_read64_2x32(gt, GSMBASE);
+	}
+
+	/* remove the tile offset so we have just the available size */
+	*vram_size = offset - *tile_offset;
+
+	return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
+
+int xe_mmio_probe_vram(struct xe_device *xe)
+{
+	struct xe_tile *tile;
+	resource_size_t io_size;
+	u64 available_size = 0;
+	u64 total_size = 0;
+	u64 tile_offset;
+	u64 tile_size;
+	u64 vram_size;
+	int err;
+	u8 id;
+
+	if (!IS_DGFX(xe))
+		return 0;
+
+	/* Get the size of the root tile's vram for later accessibility comparison */
+	tile = xe_device_get_root_tile(xe);
+	err = xe_mmio_tile_vram_size(tile, &vram_size, &tile_size, &tile_offset);
+	if (err)
+		return err;
+
+	err = xe_determine_lmem_bar_size(xe);
+	if (err)
+		return err;
+
+	drm_info(&xe->drm, "VISIBLE VRAM: %pa, %pa\n", &xe->mem.vram.io_start,
+		 &xe->mem.vram.io_size);
+
+	io_size = xe->mem.vram.io_size;
+
+	/* tile specific ranges */
+	for_each_tile(tile, xe, id) {
+		err = xe_mmio_tile_vram_size(tile, &vram_size, &tile_size, &tile_offset);
+		if (err)
+			return err;
+
+		tile->mem.vram.actual_physical_size = tile_size;
+		tile->mem.vram.io_start = xe->mem.vram.io_start + tile_offset;
+		tile->mem.vram.io_size = min_t(u64, vram_size, io_size);
+
+		if (!tile->mem.vram.io_size) {
+			drm_err(&xe->drm, "Tile without any CPU visible VRAM. Aborting.\n");
+			return -ENODEV;
+		}
+
+		tile->mem.vram.dpa_base = xe->mem.vram.dpa_base + tile_offset;
+		tile->mem.vram.usable_size = vram_size;
+		tile->mem.vram.mapping = xe->mem.vram.mapping + tile_offset;
+
+		if (tile->mem.vram.io_size < tile->mem.vram.usable_size)
+			drm_info(&xe->drm, "Small BAR device\n");
+		drm_info(&xe->drm, "VRAM[%u, %u]: Actual physical size %pa, usable size exclude stolen %pa, CPU accessible size %pa\n", id,
+			 tile->id, &tile->mem.vram.actual_physical_size, &tile->mem.vram.usable_size, &tile->mem.vram.io_size);
+		drm_info(&xe->drm, "VRAM[%u, %u]: DPA range: [%pa-%llx], io range: [%pa-%llx]\n", id, tile->id,
+			 &tile->mem.vram.dpa_base, tile->mem.vram.dpa_base + (u64)tile->mem.vram.actual_physical_size,
+			 &tile->mem.vram.io_start, tile->mem.vram.io_start + (u64)tile->mem.vram.io_size);
+
+		/* calculate total size using tile size to get the correct HW sizing */
+		total_size += tile_size;
+		available_size += vram_size;
+
+		if (total_size > xe->mem.vram.io_size) {
+			drm_info(&xe->drm, "VRAM: %pa is larger than resource %pa\n",
+				 &total_size, &xe->mem.vram.io_size);
+		}
+
+		io_size -= min_t(u64, tile_size, io_size);
+	}
+
+	xe->mem.vram.actual_physical_size = total_size;
+
+	drm_info(&xe->drm, "Total VRAM: %pa, %pa\n", &xe->mem.vram.io_start,
+		 &xe->mem.vram.actual_physical_size);
+	drm_info(&xe->drm, "Available VRAM: %pa, %pa\n", &xe->mem.vram.io_start,
+		 &available_size);
+
+	return 0;
+}
+
+void xe_mmio_probe_tiles(struct xe_device *xe)
+{
+	size_t tile_mmio_size = SZ_16M, tile_mmio_ext_size = xe->info.tile_mmio_ext_size;
+	u8 id, tile_count = xe->info.tile_count;
+	struct xe_gt *gt = xe_root_mmio_gt(xe);
+	struct xe_tile *tile;
+	void __iomem *regs;
+	u32 mtcfg;
+
+	if (tile_count == 1)
+		goto add_mmio_ext;
+
+	if (!xe->info.skip_mtcfg) {
+		mtcfg = xe_mmio_read64_2x32(gt, XEHP_MTCFG_ADDR);
+		tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1;
+		if (tile_count < xe->info.tile_count) {
+			drm_info(&xe->drm, "tile_count: %d, reduced_tile_count %d\n",
+					xe->info.tile_count, tile_count);
+			xe->info.tile_count = tile_count;
+
+			/*
+			 * FIXME: Needs some work for standalone media, but should be impossible
+			 * with multi-tile for now.
+			 */
+			xe->info.gt_count = xe->info.tile_count;
+		}
+	}
+
+	regs = xe->mmio.regs;
+	for_each_tile(tile, xe, id) {
+		tile->mmio.size = tile_mmio_size;
+		tile->mmio.regs = regs;
+		regs += tile_mmio_size;
+	}
+
+add_mmio_ext:
+	/*
+	 * By design, there's a contiguous multi-tile MMIO space (16MB hard coded per tile).
+	 * When supported, there could be an additional contiguous multi-tile MMIO extension
+	 * space ON TOP of it, and hence the necessity for distinguished MMIO spaces.
+	 */
+	if (xe->info.has_mmio_ext) {
+		regs = xe->mmio.regs + tile_mmio_size * tile_count;
+
+		for_each_tile(tile, xe, id) {
+			tile->mmio_ext.size = tile_mmio_ext_size;
+			tile->mmio_ext.regs = regs;
+
+			regs += tile_mmio_ext_size;
+		}
+	}
+}
+
+static void mmio_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_device *xe = arg;
+
+	pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs);
+	if (xe->mem.vram.mapping)
+		iounmap(xe->mem.vram.mapping);
+}
+
+static int xe_verify_lmem_ready(struct xe_device *xe)
+{
+	struct xe_gt *gt = xe_root_mmio_gt(xe);
+
+	/*
+	 * The boot firmware initializes local memory and assesses its health.
+	 * If memory training fails, the punit will have been instructed to
+	 * keep the GT powered down; we won't be able to communicate with it
+	 * and we should not continue with driver initialization.
+	 */
+	if (IS_DGFX(xe) && !(xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT)) {
+		drm_err(&xe->drm, "VRAM not initialized by firmware\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+int xe_mmio_init(struct xe_device *xe)
+{
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	const int mmio_bar = 0;
+
+	/*
+	 * Map the entire BAR.
+	 * The first 16MB of the BAR, belong to the root tile, and include:
+	 * registers (0-4MB), reserved space (4MB-8MB) and GGTT (8MB-16MB).
+	 */
+	xe->mmio.size = pci_resource_len(pdev, mmio_bar);
+	xe->mmio.regs = pci_iomap(pdev, mmio_bar, 0);
+	if (xe->mmio.regs == NULL) {
+		drm_err(&xe->drm, "failed to map registers\n");
+		return -EIO;
+	}
+
+	return drmm_add_action_or_reset(&xe->drm, mmio_fini, xe);
+}
+
+int xe_mmio_root_tile_init(struct xe_device *xe)
+{
+	struct xe_tile *root_tile = xe_device_get_root_tile(xe);
+	int err;
+
+	/* Setup first tile; other tiles (if present) will be setup later. */
+	root_tile->mmio.size = SZ_16M;
+	root_tile->mmio.regs = xe->mmio.regs;
+
+	err = xe_verify_lmem_ready(xe);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+/**
+ * xe_mmio_read64_2x32() - Read a 64-bit register as two 32-bit reads
+ * @gt: MMIO target GT
+ * @reg: register to read value from
+ *
+ * Although Intel GPUs have some 64-bit registers, the hardware officially
+ * only supports GTTMMADR register reads of 32 bits or smaller.  Even if
+ * a readq operation may return a reasonable value, that violation of the
+ * spec shouldn't be relied upon and all 64-bit register reads should be
+ * performed as two 32-bit reads of the upper and lower dwords.
+ *
+ * When reading registers that may be changing (such as
+ * counters), a rollover of the lower dword between the two 32-bit reads
+ * can be problematic.  This function attempts to ensure the upper dword has
+ * stabilized before returning the 64-bit value.
+ *
+ * Note that because this function may re-read the register multiple times
+ * while waiting for the value to stabilize it should not be used to read
+ * any registers where read operations have side effects.
+ *
+ * Returns the value of the 64-bit register.
+ */
+u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg)
+{
+	struct xe_reg reg_udw = { .addr = reg.addr + 0x4 };
+	u32 ldw, udw, oldudw, retries;
+
+	if (reg.addr < gt->mmio.adj_limit) {
+		reg.addr += gt->mmio.adj_offset;
+		reg_udw.addr += gt->mmio.adj_offset;
+	}
+
+	oldudw = xe_mmio_read32(gt, reg_udw);
+	for (retries = 5; retries; --retries) {
+		ldw = xe_mmio_read32(gt, reg);
+		udw = xe_mmio_read32(gt, reg_udw);
+
+		if (udw == oldudw)
+			break;
+
+		oldudw = udw;
+	}
+
+	xe_gt_WARN(gt, retries == 0,
+		   "64-bit read of %#x did not stabilize\n", reg.addr);
+
+	return (u64)udw << 32 | ldw;
+}
+
+/**
+ * xe_mmio_wait32() - Wait for a register to match the desired masked value
+ * @gt: MMIO target GT
+ * @reg: register to read value from
+ * @mask: mask to be applied to the value read from the register
+ * @val: desired value after applying the mask
+ * @timeout_us: time out after this period of time. Wait logic tries to be
+ * smart, applying an exponential backoff until @timeout_us is reached.
+ * @out_val: if not NULL, points where to store the last unmasked value
+ * @atomic: needs to be true if calling from an atomic context
+ *
+ * This function polls for the desired masked value and returns zero on success
+ * or -ETIMEDOUT if timed out.
+ *
+ * Note that @timeout_us represents the minimum amount of time to wait before
+ * giving up. The actual time taken by this function can be a little more than
+ * @timeout_us for different reasons, specially in non-atomic contexts. Thus,
+ * it is possible that this function succeeds even after @timeout_us has passed.
+ */
+int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
+		   u32 *out_val, bool atomic)
+{
+	ktime_t cur = ktime_get_raw();
+	const ktime_t end = ktime_add_us(cur, timeout_us);
+	int ret = -ETIMEDOUT;
+	s64 wait = 10;
+	u32 read;
+
+	for (;;) {
+		read = xe_mmio_read32(gt, reg);
+		if ((read & mask) == val) {
+			ret = 0;
+			break;
+		}
+
+		cur = ktime_get_raw();
+		if (!ktime_before(cur, end))
+			break;
+
+		if (ktime_after(ktime_add_us(cur, wait), end))
+			wait = ktime_us_delta(end, cur);
+
+		if (atomic)
+			udelay(wait);
+		else
+			usleep_range(wait, wait << 1);
+		wait <<= 1;
+	}
+
+	if (ret != 0) {
+		read = xe_mmio_read32(gt, reg);
+		if ((read & mask) == val)
+			ret = 0;
+	}
+
+	if (out_val)
+		*out_val = read;
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
new file mode 100644
index 000000000000..98de5c13c89b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021-2023 Intel Corporation
+ */
+
+#ifndef _XE_MMIO_H_
+#define _XE_MMIO_H_
+
+#include <linux/delay.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+
+#include "regs/xe_reg_defs.h"
+#include "xe_device_types.h"
+#include "xe_gt_printk.h"
+#include "xe_gt_types.h"
+
+struct drm_device;
+struct drm_file;
+struct xe_device;
+
+#define LMEM_BAR		2
+
+int xe_mmio_init(struct xe_device *xe);
+int xe_mmio_root_tile_init(struct xe_device *xe);
+void xe_mmio_probe_tiles(struct xe_device *xe);
+
+static inline u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg)
+{
+	struct xe_tile *tile = gt_to_tile(gt);
+
+	if (reg.addr < gt->mmio.adj_limit)
+		reg.addr += gt->mmio.adj_offset;
+
+	return readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr);
+}
+
+static inline u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg)
+{
+	struct xe_tile *tile = gt_to_tile(gt);
+
+	if (reg.addr < gt->mmio.adj_limit)
+		reg.addr += gt->mmio.adj_offset;
+
+	return readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr);
+}
+
+static inline void xe_mmio_write32(struct xe_gt *gt,
+				   struct xe_reg reg, u32 val)
+{
+	struct xe_tile *tile = gt_to_tile(gt);
+
+	if (reg.addr < gt->mmio.adj_limit)
+		reg.addr += gt->mmio.adj_offset;
+
+	writel(val, (reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr);
+}
+
+static inline u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg)
+{
+	struct xe_tile *tile = gt_to_tile(gt);
+
+	if (reg.addr < gt->mmio.adj_limit)
+		reg.addr += gt->mmio.adj_offset;
+
+	return readl((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr);
+}
+
+static inline u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr,
+				u32 set)
+{
+	u32 old, reg_val;
+
+	old = xe_mmio_read32(gt, reg);
+	reg_val = (old & ~clr) | set;
+	xe_mmio_write32(gt, reg, reg_val);
+
+	return old;
+}
+
+static inline int xe_mmio_write32_and_verify(struct xe_gt *gt,
+					     struct xe_reg reg, u32 val,
+					     u32 mask, u32 eval)
+{
+	u32 reg_val;
+
+	xe_mmio_write32(gt, reg, val);
+	reg_val = xe_mmio_read32(gt, reg);
+
+	return (reg_val & mask) != eval ? -EINVAL : 0;
+}
+
+static inline bool xe_mmio_in_range(const struct xe_gt *gt,
+				    const struct xe_mmio_range *range,
+				    struct xe_reg reg)
+{
+	if (reg.addr < gt->mmio.adj_limit)
+		reg.addr += gt->mmio.adj_offset;
+
+	return range && reg.addr >= range->start && reg.addr <= range->end;
+}
+
+int xe_mmio_probe_vram(struct xe_device *xe);
+u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg);
+int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
+		   u32 *out_val, bool atomic);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
new file mode 100644
index 000000000000..ef79552e4f2f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -0,0 +1,580 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_mocs.h"
+
+#include "regs/xe_gt_regs.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_exec_queue.h"
+#include "xe_gt.h"
+#include "xe_gt_mcr.h"
+#include "xe_mmio.h"
+#include "xe_platform_types.h"
+#include "xe_step_types.h"
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+#define mocs_dbg drm_dbg
+#else
+__printf(2, 3)
+static inline void mocs_dbg(const struct drm_device *dev,
+			    const char *format, ...)
+{ /* noop */ }
+#endif
+
+enum {
+	HAS_GLOBAL_MOCS = BIT(0),
+	HAS_LNCF_MOCS = BIT(1),
+};
+
+struct xe_mocs_entry {
+	u32 control_value;
+	u16 l3cc_value;
+	u16 used;
+};
+
+struct xe_mocs_info {
+	unsigned int size;
+	unsigned int n_entries;
+	const struct xe_mocs_entry *table;
+	u8 uc_index;
+	u8 wb_index;
+	u8 unused_entries_index;
+};
+
+/* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */
+#define _LE_CACHEABILITY(value)	((value) << 0)
+#define _LE_TGT_CACHE(value)	((value) << 2)
+#define LE_LRUM(value)		((value) << 4)
+#define LE_AOM(value)		((value) << 6)
+#define LE_RSC(value)		((value) << 7)
+#define LE_SCC(value)		((value) << 8)
+#define LE_PFM(value)		((value) << 11)
+#define LE_SCF(value)		((value) << 14)
+#define LE_COS(value)		((value) << 15)
+#define LE_SSE(value)		((value) << 17)
+
+/* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
+#define L3_ESC(value)		((value) << 0)
+#define L3_SCC(value)		((value) << 1)
+#define _L3_CACHEABILITY(value)	((value) << 4)
+#define L3_GLBGO(value)		((value) << 6)
+#define L3_LKUP(value)		((value) << 7)
+
+/* Defines for the tables (GLOB_MOCS_0 - GLOB_MOCS_16) */
+#define IG_PAT				REG_BIT(8)
+#define L3_CACHE_POLICY_MASK		REG_GENMASK(5, 4)
+#define L4_CACHE_POLICY_MASK		REG_GENMASK(3, 2)
+
+/* Helper defines */
+#define XELP_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
+#define PVC_NUM_MOCS_ENTRIES	3
+#define MTL_NUM_MOCS_ENTRIES    16
+#define XE2_NUM_MOCS_ENTRIES	16
+
+/* (e)LLC caching options */
+/*
+ * Note: LE_0_PAGETABLE works only up to Gen11; for newer gens it means
+ * the same as LE_UC
+ */
+#define LE_0_PAGETABLE		_LE_CACHEABILITY(0)
+#define LE_1_UC			_LE_CACHEABILITY(1)
+#define LE_2_WT			_LE_CACHEABILITY(2)
+#define LE_3_WB			_LE_CACHEABILITY(3)
+
+/* Target cache */
+#define LE_TC_0_PAGETABLE	_LE_TGT_CACHE(0)
+#define LE_TC_1_LLC		_LE_TGT_CACHE(1)
+#define LE_TC_2_LLC_ELLC	_LE_TGT_CACHE(2)
+#define LE_TC_3_LLC_ELLC_ALT	_LE_TGT_CACHE(3)
+
+/* L3 caching options */
+#define L3_0_DIRECT		_L3_CACHEABILITY(0)
+#define L3_1_UC			_L3_CACHEABILITY(1)
+#define L3_2_RESERVED		_L3_CACHEABILITY(2)
+#define L3_3_WB			_L3_CACHEABILITY(3)
+
+/* L4 caching options */
+#define L4_0_WB                 REG_FIELD_PREP(L4_CACHE_POLICY_MASK, 0)
+#define L4_1_WT                 REG_FIELD_PREP(L4_CACHE_POLICY_MASK, 1)
+#define L4_3_UC                 REG_FIELD_PREP(L4_CACHE_POLICY_MASK, 3)
+
+#define XE2_L3_0_WB		REG_FIELD_PREP(L3_CACHE_POLICY_MASK, 0)
+/* XD: WB Transient Display */
+#define XE2_L3_1_XD		REG_FIELD_PREP(L3_CACHE_POLICY_MASK, 1)
+#define XE2_L3_3_UC		REG_FIELD_PREP(L3_CACHE_POLICY_MASK, 3)
+
+#define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \
+	[__idx] = { \
+		.control_value = __control_value, \
+		.l3cc_value = __l3cc_value, \
+		.used = 1, \
+	}
+
+/*
+ * MOCS tables
+ *
+ * These are the MOCS tables that are programmed across all the rings.
+ * The control value is programmed to all the rings that support the
+ * MOCS registers. While the l3cc_values are only programmed to the
+ * LNCFCMOCS0 - LNCFCMOCS32 registers.
+ *
+ * These tables are intended to be kept reasonably consistent across
+ * HW platforms, and for ICL+, be identical across OSes. To achieve
+ * that, the list of entries is published as part of bspec.
+ *
+ * Entries not part of the following tables are undefined as far as userspace is
+ * concerned and shouldn't be relied upon. The last few entries are reserved by
+ * the hardware. They should be initialized according to bspec and never used.
+ *
+ * NOTE1: These tables are part of bspec and defined as part of the hardware
+ * interface. It is expected that, for specific hardware platform, existing
+ * entries will remain constant and the table will only be updated by adding new
+ * entries, filling unused positions.
+ *
+ * NOTE2: Reserved and unspecified MOCS indices have been set to L3 WB. These
+ * reserved entries should never be used. They may be changed to low performant
+ * variants with better coherency in the future if more entries are needed.
+ */
+
+static const struct xe_mocs_entry gen12_mocs_desc[] = {
+	/* Base - L3 + LLC */
+	MOCS_ENTRY(2,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+		   L3_3_WB),
+	/* Base - Uncached */
+	MOCS_ENTRY(3,
+		   LE_1_UC | LE_TC_1_LLC,
+		   L3_1_UC),
+	/* Base - L3 */
+	MOCS_ENTRY(4,
+		   LE_1_UC | LE_TC_1_LLC,
+		   L3_3_WB),
+	/* Base - LLC */
+	MOCS_ENTRY(5,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+		   L3_1_UC),
+	/* Age 0 - LLC */
+	MOCS_ENTRY(6,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(1),
+		   L3_1_UC),
+	/* Age 0 - L3 + LLC */
+	MOCS_ENTRY(7,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(1),
+		   L3_3_WB),
+	/* Age: Don't Chg. - LLC */
+	MOCS_ENTRY(8,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(2),
+		   L3_1_UC),
+	/* Age: Don't Chg. - L3 + LLC */
+	MOCS_ENTRY(9,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(2),
+		   L3_3_WB),
+	/* No AOM - LLC */
+	MOCS_ENTRY(10,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1),
+		   L3_1_UC),
+	/* No AOM - L3 + LLC */
+	MOCS_ENTRY(11,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1),
+		   L3_3_WB),
+	/* No AOM; Age 0 - LLC */
+	MOCS_ENTRY(12,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1),
+		   L3_1_UC),
+	/* No AOM; Age 0 - L3 + LLC */
+	MOCS_ENTRY(13,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1),
+		   L3_3_WB),
+	/* No AOM; Age:DC - LLC */
+	MOCS_ENTRY(14,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1),
+		   L3_1_UC),
+	/* No AOM; Age:DC - L3 + LLC */
+	MOCS_ENTRY(15,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1),
+		   L3_3_WB),
+	/* Self-Snoop - L3 + LLC */
+	MOCS_ENTRY(18,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3),
+		   L3_3_WB),
+	/* Skip Caching - L3 + LLC(12.5%) */
+	MOCS_ENTRY(19,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(7),
+		   L3_3_WB),
+	/* Skip Caching - L3 + LLC(25%) */
+	MOCS_ENTRY(20,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(3),
+		   L3_3_WB),
+	/* Skip Caching - L3 + LLC(50%) */
+	MOCS_ENTRY(21,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(1),
+		   L3_3_WB),
+	/* Skip Caching - L3 + LLC(75%) */
+	MOCS_ENTRY(22,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(3),
+		   L3_3_WB),
+	/* Skip Caching - L3 + LLC(87.5%) */
+	MOCS_ENTRY(23,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(7),
+		   L3_3_WB),
+	/* Implicitly enable L1 - HDC:L1 + L3 + LLC */
+	MOCS_ENTRY(48,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+		   L3_3_WB),
+	/* Implicitly enable L1 - HDC:L1 + L3 */
+	MOCS_ENTRY(49,
+		   LE_1_UC | LE_TC_1_LLC,
+		   L3_3_WB),
+	/* Implicitly enable L1 - HDC:L1 + LLC */
+	MOCS_ENTRY(50,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+		   L3_1_UC),
+	/* Implicitly enable L1 - HDC:L1 */
+	MOCS_ENTRY(51,
+		   LE_1_UC | LE_TC_1_LLC,
+		   L3_1_UC),
+	/* HW Special Case (CCS) */
+	MOCS_ENTRY(60,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+		   L3_1_UC),
+	/* HW Special Case (Displayable) */
+	MOCS_ENTRY(61,
+		   LE_1_UC | LE_TC_1_LLC,
+		   L3_3_WB),
+	/* HW Reserved - SW program but never use */
+	MOCS_ENTRY(62,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+		   L3_1_UC),
+	/* HW Reserved - SW program but never use */
+	MOCS_ENTRY(63,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+		   L3_1_UC)
+};
+
+static const struct xe_mocs_entry dg1_mocs_desc[] = {
+	/* UC */
+	MOCS_ENTRY(1, 0, L3_1_UC),
+	/* WB - L3 */
+	MOCS_ENTRY(5, 0, L3_3_WB),
+	/* WB - L3 50% */
+	MOCS_ENTRY(6, 0, L3_ESC(1) | L3_SCC(1) | L3_3_WB),
+	/* WB - L3 25% */
+	MOCS_ENTRY(7, 0, L3_ESC(1) | L3_SCC(3) | L3_3_WB),
+	/* WB - L3 12.5% */
+	MOCS_ENTRY(8, 0, L3_ESC(1) | L3_SCC(7) | L3_3_WB),
+
+	/* HDC:L1 + L3 */
+	MOCS_ENTRY(48, 0, L3_3_WB),
+	/* HDC:L1 */
+	MOCS_ENTRY(49, 0, L3_1_UC),
+
+	/* HW Reserved */
+	MOCS_ENTRY(60, 0, L3_1_UC),
+	MOCS_ENTRY(61, 0, L3_1_UC),
+	MOCS_ENTRY(62, 0, L3_1_UC),
+	MOCS_ENTRY(63, 0, L3_1_UC),
+};
+
+static const struct xe_mocs_entry dg2_mocs_desc[] = {
+	/* UC - Coherent; GO:L3 */
+	MOCS_ENTRY(0, 0, L3_1_UC | L3_LKUP(1)),
+	/* UC - Coherent; GO:Memory */
+	MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
+	/* UC - Non-Coherent; GO:Memory */
+	MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)),
+
+	/* WB - LC */
+	MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
+};
+
+static const struct xe_mocs_entry dg2_mocs_desc_g10_ax[] = {
+	/* Wa_14011441408: Set Go to Memory for MOCS#0 */
+	MOCS_ENTRY(0, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
+	/* UC - Coherent; GO:Memory */
+	MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
+	/* UC - Non-Coherent; GO:Memory */
+	MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)),
+
+	/* WB - LC */
+	MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
+};
+
+static const struct xe_mocs_entry pvc_mocs_desc[] = {
+	/* Error */
+	MOCS_ENTRY(0, 0, L3_3_WB),
+
+	/* UC */
+	MOCS_ENTRY(1, 0, L3_1_UC),
+
+	/* WB */
+	MOCS_ENTRY(2, 0, L3_3_WB),
+};
+
+static const struct xe_mocs_entry mtl_mocs_desc[] = {
+	/* Error - Reserved for Non-Use */
+	MOCS_ENTRY(0,
+		   0,
+		   L3_LKUP(1) | L3_3_WB),
+	/* Cached - L3 + L4 */
+	MOCS_ENTRY(1,
+		   IG_PAT,
+		   L3_LKUP(1) | L3_3_WB),
+	/* L4 - GO:L3 */
+	MOCS_ENTRY(2,
+		   IG_PAT,
+		   L3_LKUP(1) | L3_1_UC),
+	/* Uncached - GO:L3 */
+	MOCS_ENTRY(3,
+		   IG_PAT | L4_3_UC,
+		   L3_LKUP(1) | L3_1_UC),
+	/* L4 - GO:Mem */
+	MOCS_ENTRY(4,
+		   IG_PAT,
+		   L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC),
+	/* Uncached - GO:Mem */
+	MOCS_ENTRY(5,
+		   IG_PAT | L4_3_UC,
+		   L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC),
+	/* L4 - L3:NoLKUP; GO:L3 */
+	MOCS_ENTRY(6,
+		   IG_PAT,
+		   L3_1_UC),
+	/* Uncached - L3:NoLKUP; GO:L3 */
+	MOCS_ENTRY(7,
+		   IG_PAT | L4_3_UC,
+		   L3_1_UC),
+	/* L4 - L3:NoLKUP; GO:Mem */
+	MOCS_ENTRY(8,
+		   IG_PAT,
+		   L3_GLBGO(1) | L3_1_UC),
+	/* Uncached - L3:NoLKUP; GO:Mem */
+	MOCS_ENTRY(9,
+		   IG_PAT | L4_3_UC,
+		   L3_GLBGO(1) | L3_1_UC),
+	/* Display - L3; L4:WT */
+	MOCS_ENTRY(14,
+		   IG_PAT | L4_1_WT,
+		   L3_LKUP(1) | L3_3_WB),
+	/* CCS - Non-Displayable */
+	MOCS_ENTRY(15,
+		   IG_PAT,
+		   L3_GLBGO(1) | L3_1_UC),
+};
+
+static const struct xe_mocs_entry xe2_mocs_table[] = {
+	/* Defer to PAT */
+	MOCS_ENTRY(0, XE2_L3_0_WB | L4_3_UC, 0),
+	/* Cached L3, Uncached L4 */
+	MOCS_ENTRY(1, IG_PAT | XE2_L3_0_WB | L4_3_UC, 0),
+	/* Uncached L3, Cached L4 */
+	MOCS_ENTRY(2, IG_PAT | XE2_L3_3_UC | L4_0_WB, 0),
+	/* Uncached L3 + L4 */
+	MOCS_ENTRY(3, IG_PAT | XE2_L3_3_UC | L4_3_UC, 0),
+	/* Cached L3 + L4 */
+	MOCS_ENTRY(4, IG_PAT | XE2_L3_0_WB | L4_0_WB, 0),
+};
+
+static unsigned int get_mocs_settings(struct xe_device *xe,
+				      struct xe_mocs_info *info)
+{
+	unsigned int flags = 0;
+
+	memset(info, 0, sizeof(struct xe_mocs_info));
+
+	switch (xe->info.platform) {
+	case XE_LUNARLAKE:
+		info->size = ARRAY_SIZE(xe2_mocs_table);
+		info->table = xe2_mocs_table;
+		info->n_entries = XE2_NUM_MOCS_ENTRIES;
+		info->uc_index = 3;
+		info->wb_index = 4;
+		info->unused_entries_index = 4;
+		break;
+	case XE_PVC:
+		info->size = ARRAY_SIZE(pvc_mocs_desc);
+		info->table = pvc_mocs_desc;
+		info->n_entries = PVC_NUM_MOCS_ENTRIES;
+		info->uc_index = 1;
+		info->wb_index = 2;
+		info->unused_entries_index = 2;
+		break;
+	case XE_METEORLAKE:
+		info->size = ARRAY_SIZE(mtl_mocs_desc);
+		info->table = mtl_mocs_desc;
+		info->n_entries = MTL_NUM_MOCS_ENTRIES;
+		info->uc_index = 9;
+		info->unused_entries_index = 1;
+		break;
+	case XE_DG2:
+		if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G10 &&
+		    xe->info.step.graphics >= STEP_A0 &&
+		    xe->info.step.graphics <= STEP_B0) {
+			info->size = ARRAY_SIZE(dg2_mocs_desc_g10_ax);
+			info->table = dg2_mocs_desc_g10_ax;
+		} else {
+			info->size = ARRAY_SIZE(dg2_mocs_desc);
+			info->table = dg2_mocs_desc;
+		}
+		info->uc_index = 1;
+		info->n_entries = XELP_NUM_MOCS_ENTRIES;
+		info->unused_entries_index = 3;
+		break;
+	case XE_DG1:
+		info->size = ARRAY_SIZE(dg1_mocs_desc);
+		info->table = dg1_mocs_desc;
+		info->uc_index = 1;
+		info->n_entries = XELP_NUM_MOCS_ENTRIES;
+		info->unused_entries_index = 5;
+		break;
+	case XE_TIGERLAKE:
+	case XE_ROCKETLAKE:
+	case XE_ALDERLAKE_S:
+	case XE_ALDERLAKE_P:
+	case XE_ALDERLAKE_N:
+		info->size  = ARRAY_SIZE(gen12_mocs_desc);
+		info->table = gen12_mocs_desc;
+		info->n_entries = XELP_NUM_MOCS_ENTRIES;
+		info->uc_index = 3;
+		info->unused_entries_index = 2;
+		break;
+	default:
+		drm_err(&xe->drm, "Platform that should have a MOCS table does not.\n");
+		return 0;
+	}
+
+	/*
+	 * Index 0 is a reserved/unused table entry on most platforms, but
+	 * even on those where it does represent a legitimate MOCS entry, it
+	 * never represents the "most cached, least coherent" behavior we want
+	 * to populate undefined table rows with.  So if unused_entries_index
+	 * is still 0 at this point, we'll assume that it was omitted by
+	 * mistake in the switch statement above.
+	 */
+	xe_assert(xe, info->unused_entries_index != 0);
+
+	if (XE_WARN_ON(info->size > info->n_entries)) {
+		info->table = NULL;
+		return 0;
+	}
+
+	if (!IS_DGFX(xe) || GRAPHICS_VER(xe) >= 20)
+		flags |= HAS_GLOBAL_MOCS;
+	if (GRAPHICS_VER(xe) < 20)
+		flags |= HAS_LNCF_MOCS;
+
+	return flags;
+}
+
+/*
+ * Get control_value from MOCS entry.  If the table entry is not defined, the
+ * settings from unused_entries_index will be returned.
+ */
+static u32 get_entry_control(const struct xe_mocs_info *info,
+			     unsigned int index)
+{
+	if (index < info->size && info->table[index].used)
+		return info->table[index].control_value;
+	return info->table[info->unused_entries_index].control_value;
+}
+
+static void __init_mocs_table(struct xe_gt *gt,
+			      const struct xe_mocs_info *info)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	unsigned int i;
+	u32 mocs;
+
+	mocs_dbg(&gt_to_xe(gt)->drm, "entries:%d\n", info->n_entries);
+	drm_WARN_ONCE(&xe->drm, !info->unused_entries_index,
+		      "Unused entries index should have been defined\n");
+	for (i = 0;
+	     i < info->n_entries ? (mocs = get_entry_control(info, i)), 1 : 0;
+	     i++) {
+		mocs_dbg(&gt_to_xe(gt)->drm, "GLOB_MOCS[%d] 0x%x 0x%x\n", i,
+			 XELP_GLOBAL_MOCS(i).addr, mocs);
+
+		if (GRAPHICS_VERx100(gt_to_xe(gt)) > 1250)
+			xe_gt_mcr_multicast_write(gt, XEHP_GLOBAL_MOCS(i), mocs);
+		else
+			xe_mmio_write32(gt, XELP_GLOBAL_MOCS(i), mocs);
+	}
+}
+
+/*
+ * Get l3cc_value from MOCS entry taking into account when it's not used
+ * then if unused_entries_index is not zero then its value will be returned
+ * otherwise I915_MOCS_PTE's value is returned in this case.
+ */
+static u16 get_entry_l3cc(const struct xe_mocs_info *info,
+			  unsigned int index)
+{
+	if (index < info->size && info->table[index].used)
+		return info->table[index].l3cc_value;
+	return info->table[info->unused_entries_index].l3cc_value;
+}
+
+static u32 l3cc_combine(u16 low, u16 high)
+{
+	return low | (u32)high << 16;
+}
+
+static void init_l3cc_table(struct xe_gt *gt,
+			    const struct xe_mocs_info *info)
+{
+	unsigned int i;
+	u32 l3cc;
+
+	mocs_dbg(&gt_to_xe(gt)->drm, "entries:%d\n", info->n_entries);
+	for (i = 0;
+	     i < (info->n_entries + 1) / 2 ?
+	     (l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i),
+				  get_entry_l3cc(info, 2 * i + 1))), 1 : 0;
+	     i++) {
+		mocs_dbg(&gt_to_xe(gt)->drm, "LNCFCMOCS[%d] 0x%x 0x%x\n", i, XELP_LNCFCMOCS(i).addr,
+			 l3cc);
+
+		if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250)
+			xe_gt_mcr_multicast_write(gt, XEHP_LNCFCMOCS(i), l3cc);
+		else
+			xe_mmio_write32(gt, XELP_LNCFCMOCS(i), l3cc);
+	}
+}
+
+void xe_mocs_init_early(struct xe_gt *gt)
+{
+	struct xe_mocs_info table;
+
+	get_mocs_settings(gt_to_xe(gt), &table);
+	gt->mocs.uc_index = table.uc_index;
+	gt->mocs.wb_index = table.wb_index;
+}
+
+void xe_mocs_init(struct xe_gt *gt)
+{
+	struct xe_mocs_info table;
+	unsigned int flags;
+
+	/*
+	 * MOCS settings are split between "GLOB_MOCS" and/or "LNCFCMOCS"
+	 * registers depending on platform.
+	 *
+	 * These registers should be programmed before GuC initialization
+	 * since their values will affect some of the memory transactions
+	 * performed by the GuC.
+	 */
+	flags = get_mocs_settings(gt_to_xe(gt), &table);
+	mocs_dbg(&gt_to_xe(gt)->drm, "flag:0x%x\n", flags);
+
+	if (flags & HAS_GLOBAL_MOCS)
+		__init_mocs_table(gt, &table);
+	if (flags & HAS_LNCF_MOCS)
+		init_l3cc_table(gt, &table);
+}
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_mocs.c"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_mocs.h b/drivers/gpu/drm/xe/xe_mocs.h
new file mode 100644
index 000000000000..053754c5a94e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_mocs.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_MOCS_H_
+#define _XE_MOCS_H_
+
+#include <linux/types.h>
+
+struct xe_exec_queue;
+struct xe_gt;
+
+void xe_mocs_init_early(struct xe_gt *gt);
+void xe_mocs_init(struct xe_gt *gt);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
new file mode 100644
index 000000000000..110b69864656
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_module.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_module.h"
+
+#include <linux/init.h>
+#include <linux/module.h>
+
+#include "xe_drv.h"
+#include "xe_hw_fence.h"
+#include "xe_pci.h"
+#include "xe_sched_job.h"
+
+struct xe_modparam xe_modparam = {
+	.enable_display = true,
+	.guc_log_level = 5,
+	.force_probe = CONFIG_DRM_XE_FORCE_PROBE,
+	/* the rest are 0 by default */
+};
+
+module_param_named_unsafe(force_execlist, xe_modparam.force_execlist, bool, 0444);
+MODULE_PARM_DESC(force_execlist, "Force Execlist submission");
+
+module_param_named(enable_display, xe_modparam.enable_display, bool, 0444);
+MODULE_PARM_DESC(enable_display, "Enable display");
+
+module_param_named(vram_bar_size, xe_modparam.force_vram_bar_size, uint, 0600);
+MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size(in MiB)");
+
+module_param_named(guc_log_level, xe_modparam.guc_log_level, int, 0600);
+MODULE_PARM_DESC(guc_log_level, "GuC firmware logging level (0=disable, 1..5=enable with verbosity min..max)");
+
+module_param_named_unsafe(guc_firmware_path, xe_modparam.guc_firmware_path, charp, 0400);
+MODULE_PARM_DESC(guc_firmware_path,
+		 "GuC firmware path to use instead of the default one");
+
+module_param_named_unsafe(huc_firmware_path, xe_modparam.huc_firmware_path, charp, 0400);
+MODULE_PARM_DESC(huc_firmware_path,
+		 "HuC firmware path to use instead of the default one - empty string disables");
+
+module_param_named_unsafe(gsc_firmware_path, xe_modparam.gsc_firmware_path, charp, 0400);
+MODULE_PARM_DESC(gsc_firmware_path,
+		 "GSC firmware path to use instead of the default one - empty string disables");
+
+module_param_named_unsafe(force_probe, xe_modparam.force_probe, charp, 0400);
+MODULE_PARM_DESC(force_probe,
+		 "Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details.");
+
+struct init_funcs {
+	int (*init)(void);
+	void (*exit)(void);
+};
+
+static const struct init_funcs init_funcs[] = {
+	{
+		.init = xe_hw_fence_module_init,
+		.exit = xe_hw_fence_module_exit,
+	},
+	{
+		.init = xe_sched_job_module_init,
+		.exit = xe_sched_job_module_exit,
+	},
+	{
+		.init = xe_register_pci_driver,
+		.exit = xe_unregister_pci_driver,
+	},
+};
+
+static int __init xe_init(void)
+{
+	int err, i;
+
+	for (i = 0; i < ARRAY_SIZE(init_funcs); i++) {
+		err = init_funcs[i].init();
+		if (err) {
+			while (i--)
+				init_funcs[i].exit();
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static void __exit xe_exit(void)
+{
+	int i;
+
+	for (i = ARRAY_SIZE(init_funcs) - 1; i >= 0; i--)
+		init_funcs[i].exit();
+}
+
+module_init(xe_init);
+module_exit(xe_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL and additional rights");
diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h
new file mode 100644
index 000000000000..88ef0e8b2bfd
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_module.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_MODULE_H_
+#define _XE_MODULE_H_
+
+#include <linux/types.h>
+
+/* Module modprobe variables */
+struct xe_modparam {
+	bool force_execlist;
+	bool enable_display;
+	u32 force_vram_bar_size;
+	int guc_log_level;
+	char *guc_firmware_path;
+	char *huc_firmware_path;
+	char *gsc_firmware_path;
+	char *force_probe;
+};
+
+extern struct xe_modparam xe_modparam;
+
+#endif
+
diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
new file mode 100644
index 000000000000..1ff6bc79e7d4
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -0,0 +1,459 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_pat.h"
+
+#include <drm/xe_drm.h>
+
+#include "regs/xe_reg_defs.h"
+#include "xe_assert.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_mcr.h"
+#include "xe_mmio.h"
+
+#define _PAT_ATS				0x47fc
+#define _PAT_INDEX(index)			_PICK_EVEN_2RANGES(index, 8, \
+								   0x4800, 0x4804, \
+								   0x4848, 0x484c)
+#define _PAT_PTA				0x4820
+
+#define XE2_NO_PROMOTE				REG_BIT(10)
+#define XE2_COMP_EN				REG_BIT(9)
+#define XE2_L3_CLOS				REG_GENMASK(7, 6)
+#define XE2_L3_POLICY				REG_GENMASK(5, 4)
+#define XE2_L4_POLICY				REG_GENMASK(3, 2)
+#define XE2_COH_MODE				REG_GENMASK(1, 0)
+
+#define XELPG_L4_POLICY_MASK			REG_GENMASK(3, 2)
+#define XELPG_PAT_3_UC				REG_FIELD_PREP(XELPG_L4_POLICY_MASK, 3)
+#define XELPG_PAT_1_WT				REG_FIELD_PREP(XELPG_L4_POLICY_MASK, 1)
+#define XELPG_PAT_0_WB				REG_FIELD_PREP(XELPG_L4_POLICY_MASK, 0)
+#define XELPG_INDEX_COH_MODE_MASK		REG_GENMASK(1, 0)
+#define XELPG_3_COH_2W				REG_FIELD_PREP(XELPG_INDEX_COH_MODE_MASK, 3)
+#define XELPG_2_COH_1W				REG_FIELD_PREP(XELPG_INDEX_COH_MODE_MASK, 2)
+#define XELPG_0_COH_NON				REG_FIELD_PREP(XELPG_INDEX_COH_MODE_MASK, 0)
+
+#define XEHPC_CLOS_LEVEL_MASK			REG_GENMASK(3, 2)
+#define XEHPC_PAT_CLOS(x)			REG_FIELD_PREP(XEHPC_CLOS_LEVEL_MASK, x)
+
+#define XELP_MEM_TYPE_MASK			REG_GENMASK(1, 0)
+#define XELP_PAT_WB				REG_FIELD_PREP(XELP_MEM_TYPE_MASK, 3)
+#define XELP_PAT_WT				REG_FIELD_PREP(XELP_MEM_TYPE_MASK, 2)
+#define XELP_PAT_WC				REG_FIELD_PREP(XELP_MEM_TYPE_MASK, 1)
+#define XELP_PAT_UC				REG_FIELD_PREP(XELP_MEM_TYPE_MASK, 0)
+
+static const char *XELP_MEM_TYPE_STR_MAP[] = { "UC", "WC", "WT", "WB" };
+
+struct xe_pat_ops {
+	void (*program_graphics)(struct xe_gt *gt, const struct xe_pat_table_entry table[],
+				 int n_entries);
+	void (*program_media)(struct xe_gt *gt, const struct xe_pat_table_entry table[],
+			      int n_entries);
+	void (*dump)(struct xe_gt *gt, struct drm_printer *p);
+};
+
+static const struct xe_pat_table_entry xelp_pat_table[] = {
+	[0] = { XELP_PAT_WB, XE_COH_AT_LEAST_1WAY },
+	[1] = { XELP_PAT_WC, XE_COH_NONE },
+	[2] = { XELP_PAT_WT, XE_COH_NONE },
+	[3] = { XELP_PAT_UC, XE_COH_NONE },
+};
+
+static const struct xe_pat_table_entry xehpc_pat_table[] = {
+	[0] = { XELP_PAT_UC, XE_COH_NONE },
+	[1] = { XELP_PAT_WC, XE_COH_NONE },
+	[2] = { XELP_PAT_WT, XE_COH_NONE },
+	[3] = { XELP_PAT_WB, XE_COH_AT_LEAST_1WAY },
+	[4] = { XEHPC_PAT_CLOS(1) | XELP_PAT_WT, XE_COH_NONE },
+	[5] = { XEHPC_PAT_CLOS(1) | XELP_PAT_WB, XE_COH_AT_LEAST_1WAY },
+	[6] = { XEHPC_PAT_CLOS(2) | XELP_PAT_WT, XE_COH_NONE },
+	[7] = { XEHPC_PAT_CLOS(2) | XELP_PAT_WB, XE_COH_AT_LEAST_1WAY },
+};
+
+static const struct xe_pat_table_entry xelpg_pat_table[] = {
+	[0] = { XELPG_PAT_0_WB, XE_COH_NONE },
+	[1] = { XELPG_PAT_1_WT, XE_COH_NONE },
+	[2] = { XELPG_PAT_3_UC, XE_COH_NONE },
+	[3] = { XELPG_PAT_0_WB | XELPG_2_COH_1W, XE_COH_AT_LEAST_1WAY },
+	[4] = { XELPG_PAT_0_WB | XELPG_3_COH_2W, XE_COH_AT_LEAST_1WAY },
+};
+
+/*
+ * The Xe2 table is getting large/complicated so it's easier to review if
+ * provided in a form that exactly matches the bspec's formatting.  The meaning
+ * of the fields here are:
+ *   - no_promote:  0=promotable, 1=no promote
+ *   - comp_en:     0=disable, 1=enable
+ *   - l3clos:      L3 class of service (0-3)
+ *   - l3_policy:   0=WB, 1=XD ("WB - Transient Display"), 3=UC
+ *   - l4_policy:   0=WB, 1=WT, 3=UC
+ *   - coh_mode:    0=no snoop, 2=1-way coherent, 3=2-way coherent
+ *
+ * Reserved entries should be programmed with the maximum caching, minimum
+ * coherency (which matches an all-0's encoding), so we can just omit them
+ * in the table.
+ */
+#define XE2_PAT(no_promote, comp_en, l3clos, l3_policy, l4_policy, __coh_mode) \
+	{ \
+		.value = (no_promote ? XE2_NO_PROMOTE : 0) | \
+			(comp_en ? XE2_COMP_EN : 0) | \
+			REG_FIELD_PREP(XE2_L3_CLOS, l3clos) | \
+			REG_FIELD_PREP(XE2_L3_POLICY, l3_policy) | \
+			REG_FIELD_PREP(XE2_L4_POLICY, l4_policy) | \
+			REG_FIELD_PREP(XE2_COH_MODE, __coh_mode), \
+		.coh_mode = __coh_mode ? XE_COH_AT_LEAST_1WAY : XE_COH_NONE \
+	}
+
+static const struct xe_pat_table_entry xe2_pat_table[] = {
+	[ 0] = XE2_PAT( 0, 0, 0, 0, 3, 0 ),
+	[ 1] = XE2_PAT( 0, 0, 0, 0, 3, 2 ),
+	[ 2] = XE2_PAT( 0, 0, 0, 0, 3, 3 ),
+	[ 3] = XE2_PAT( 0, 0, 0, 3, 3, 0 ),
+	[ 4] = XE2_PAT( 0, 0, 0, 3, 0, 2 ),
+	[ 5] = XE2_PAT( 0, 0, 0, 3, 3, 2 ),
+	[ 6] = XE2_PAT( 1, 0, 0, 1, 3, 0 ),
+	[ 7] = XE2_PAT( 0, 0, 0, 3, 0, 3 ),
+	[ 8] = XE2_PAT( 0, 0, 0, 3, 0, 0 ),
+	[ 9] = XE2_PAT( 0, 1, 0, 0, 3, 0 ),
+	[10] = XE2_PAT( 0, 1, 0, 3, 0, 0 ),
+	[11] = XE2_PAT( 1, 1, 0, 1, 3, 0 ),
+	[12] = XE2_PAT( 0, 1, 0, 3, 3, 0 ),
+	[13] = XE2_PAT( 0, 0, 0, 0, 0, 0 ),
+	[14] = XE2_PAT( 0, 1, 0, 0, 0, 0 ),
+	[15] = XE2_PAT( 1, 1, 0, 1, 1, 0 ),
+	/* 16..19 are reserved; leave set to all 0's */
+	[20] = XE2_PAT( 0, 0, 1, 0, 3, 0 ),
+	[21] = XE2_PAT( 0, 1, 1, 0, 3, 0 ),
+	[22] = XE2_PAT( 0, 0, 1, 0, 3, 2 ),
+	[23] = XE2_PAT( 0, 0, 1, 0, 3, 3 ),
+	[24] = XE2_PAT( 0, 0, 2, 0, 3, 0 ),
+	[25] = XE2_PAT( 0, 1, 2, 0, 3, 0 ),
+	[26] = XE2_PAT( 0, 0, 2, 0, 3, 2 ),
+	[27] = XE2_PAT( 0, 0, 2, 0, 3, 3 ),
+	[28] = XE2_PAT( 0, 0, 3, 0, 3, 0 ),
+	[29] = XE2_PAT( 0, 1, 3, 0, 3, 0 ),
+	[30] = XE2_PAT( 0, 0, 3, 0, 3, 2 ),
+	[31] = XE2_PAT( 0, 0, 3, 0, 3, 3 ),
+};
+
+/* Special PAT values programmed outside the main table */
+static const struct xe_pat_table_entry xe2_pat_ats = XE2_PAT( 0, 0, 0, 0, 3, 3 );
+
+u16 xe_pat_index_get_coh_mode(struct xe_device *xe, u16 pat_index)
+{
+	WARN_ON(pat_index >= xe->pat.n_entries);
+	return xe->pat.table[pat_index].coh_mode;
+}
+
+static void program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[],
+			int n_entries)
+{
+	for (int i = 0; i < n_entries; i++) {
+		struct xe_reg reg = XE_REG(_PAT_INDEX(i));
+
+		xe_mmio_write32(gt, reg, table[i].value);
+	}
+}
+
+static void program_pat_mcr(struct xe_gt *gt, const struct xe_pat_table_entry table[],
+			    int n_entries)
+{
+	for (int i = 0; i < n_entries; i++) {
+		struct xe_reg_mcr reg_mcr = XE_REG_MCR(_PAT_INDEX(i));
+
+		xe_gt_mcr_multicast_write(gt, reg_mcr, table[i].value);
+	}
+}
+
+static void xelp_dump(struct xe_gt *gt, struct drm_printer *p)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int i, err;
+
+	xe_device_mem_access_get(xe);
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		goto err_fw;
+
+	drm_printf(p, "PAT table:\n");
+
+	for (i = 0; i < xe->pat.n_entries; i++) {
+		u32 pat = xe_mmio_read32(gt, XE_REG(_PAT_INDEX(i)));
+		u8 mem_type = REG_FIELD_GET(XELP_MEM_TYPE_MASK, pat);
+
+		drm_printf(p, "PAT[%2d] = %s (%#8x)\n", i,
+			   XELP_MEM_TYPE_STR_MAP[mem_type], pat);
+	}
+
+	err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+err_fw:
+	xe_assert(xe, !err);
+	xe_device_mem_access_put(xe);
+}
+
+static const struct xe_pat_ops xelp_pat_ops = {
+	.program_graphics = program_pat,
+	.dump = xelp_dump,
+};
+
+static void xehp_dump(struct xe_gt *gt, struct drm_printer *p)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int i, err;
+
+	xe_device_mem_access_get(xe);
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		goto err_fw;
+
+	drm_printf(p, "PAT table:\n");
+
+	for (i = 0; i < xe->pat.n_entries; i++) {
+		u32 pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i)));
+		u8 mem_type;
+
+		mem_type = REG_FIELD_GET(XELP_MEM_TYPE_MASK, pat);
+
+		drm_printf(p, "PAT[%2d] = %s (%#8x)\n", i,
+			   XELP_MEM_TYPE_STR_MAP[mem_type], pat);
+	}
+
+	err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+err_fw:
+	xe_assert(xe, !err);
+	xe_device_mem_access_put(xe);
+}
+
+static const struct xe_pat_ops xehp_pat_ops = {
+	.program_graphics = program_pat_mcr,
+	.dump = xehp_dump,
+};
+
+static void xehpc_dump(struct xe_gt *gt, struct drm_printer *p)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int i, err;
+
+	xe_device_mem_access_get(xe);
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		goto err_fw;
+
+	drm_printf(p, "PAT table:\n");
+
+	for (i = 0; i < xe->pat.n_entries; i++) {
+		u32 pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i)));
+
+		drm_printf(p, "PAT[%2d] = [ %u, %u ] (%#8x)\n", i,
+			   REG_FIELD_GET(XELP_MEM_TYPE_MASK, pat),
+			   REG_FIELD_GET(XEHPC_CLOS_LEVEL_MASK, pat), pat);
+	}
+
+	err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+err_fw:
+	xe_assert(xe, !err);
+	xe_device_mem_access_put(xe);
+}
+
+static const struct xe_pat_ops xehpc_pat_ops = {
+	.program_graphics = program_pat_mcr,
+	.dump = xehpc_dump,
+};
+
+static void xelpg_dump(struct xe_gt *gt, struct drm_printer *p)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int i, err;
+
+	xe_device_mem_access_get(xe);
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		goto err_fw;
+
+	drm_printf(p, "PAT table:\n");
+
+	for (i = 0; i < xe->pat.n_entries; i++) {
+		u32 pat;
+
+		if (xe_gt_is_media_type(gt))
+			pat = xe_mmio_read32(gt, XE_REG(_PAT_INDEX(i)));
+		else
+			pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i)));
+
+		drm_printf(p, "PAT[%2d] = [ %u, %u ] (%#8x)\n", i,
+			   REG_FIELD_GET(XELPG_L4_POLICY_MASK, pat),
+			   REG_FIELD_GET(XELPG_INDEX_COH_MODE_MASK, pat), pat);
+	}
+
+	err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+err_fw:
+	xe_assert(xe, !err);
+	xe_device_mem_access_put(xe);
+}
+
+/*
+ * SAMedia register offsets are adjusted by the write methods and they target
+ * registers that are not MCR, while for normal GT they are MCR
+ */
+static const struct xe_pat_ops xelpg_pat_ops = {
+	.program_graphics = program_pat,
+	.program_media = program_pat_mcr,
+	.dump = xelpg_dump,
+};
+
+static void xe2lpg_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[],
+			       int n_entries)
+{
+	program_pat_mcr(gt, table, n_entries);
+	xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_ATS), xe2_pat_ats.value);
+}
+
+static void xe2lpm_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[],
+			       int n_entries)
+{
+	program_pat(gt, table, n_entries);
+	xe_mmio_write32(gt, XE_REG(_PAT_ATS), xe2_pat_ats.value);
+}
+
+static void xe2_dump(struct xe_gt *gt, struct drm_printer *p)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int i, err;
+	u32 pat;
+
+	xe_device_mem_access_get(xe);
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		goto err_fw;
+
+	drm_printf(p, "PAT table:\n");
+
+	for (i = 0; i < xe->pat.n_entries; i++) {
+		if (xe_gt_is_media_type(gt))
+			pat = xe_mmio_read32(gt, XE_REG(_PAT_INDEX(i)));
+		else
+			pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i)));
+
+		drm_printf(p, "PAT[%2d] = [ %u, %u, %u, %u, %u, %u ]  (%#8x)\n", i,
+			   !!(pat & XE2_NO_PROMOTE),
+			   !!(pat & XE2_COMP_EN),
+			   REG_FIELD_GET(XE2_L3_CLOS, pat),
+			   REG_FIELD_GET(XE2_L3_POLICY, pat),
+			   REG_FIELD_GET(XE2_L4_POLICY, pat),
+			   REG_FIELD_GET(XE2_COH_MODE, pat),
+			   pat);
+	}
+
+	/*
+	 * Also print PTA_MODE, which describes how the hardware accesses
+	 * PPGTT entries.
+	 */
+	if (xe_gt_is_media_type(gt))
+		pat = xe_mmio_read32(gt, XE_REG(_PAT_PTA));
+	else
+		pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_PTA));
+
+	drm_printf(p, "Page Table Access:\n");
+	drm_printf(p, "PTA_MODE= [ %u, %u, %u, %u, %u, %u ]  (%#8x)\n",
+		   !!(pat & XE2_NO_PROMOTE),
+		   !!(pat & XE2_COMP_EN),
+		   REG_FIELD_GET(XE2_L3_CLOS, pat),
+		   REG_FIELD_GET(XE2_L3_POLICY, pat),
+		   REG_FIELD_GET(XE2_L4_POLICY, pat),
+		   REG_FIELD_GET(XE2_COH_MODE, pat),
+		   pat);
+
+	err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+err_fw:
+	xe_assert(xe, !err);
+	xe_device_mem_access_put(xe);
+}
+
+static const struct xe_pat_ops xe2_pat_ops = {
+	.program_graphics = xe2lpg_program_pat,
+	.program_media = xe2lpm_program_pat,
+	.dump = xe2_dump,
+};
+
+void xe_pat_init_early(struct xe_device *xe)
+{
+	if (GRAPHICS_VER(xe) == 20) {
+		xe->pat.ops = &xe2_pat_ops;
+		xe->pat.table = xe2_pat_table;
+		xe->pat.n_entries = ARRAY_SIZE(xe2_pat_table);
+		xe->pat.idx[XE_CACHE_NONE] = 3;
+		xe->pat.idx[XE_CACHE_WT] = 15;
+		xe->pat.idx[XE_CACHE_WB] = 2;
+		xe->pat.idx[XE_CACHE_NONE_COMPRESSION] = 12; /*Applicable on xe2 and beyond */
+	} else if (xe->info.platform == XE_METEORLAKE) {
+		xe->pat.ops = &xelpg_pat_ops;
+		xe->pat.table = xelpg_pat_table;
+		xe->pat.n_entries = ARRAY_SIZE(xelpg_pat_table);
+		xe->pat.idx[XE_CACHE_NONE] = 2;
+		xe->pat.idx[XE_CACHE_WT] = 1;
+		xe->pat.idx[XE_CACHE_WB] = 3;
+	} else if (xe->info.platform == XE_PVC) {
+		xe->pat.ops = &xehpc_pat_ops;
+		xe->pat.table = xehpc_pat_table;
+		xe->pat.n_entries = ARRAY_SIZE(xehpc_pat_table);
+		xe->pat.idx[XE_CACHE_NONE] = 0;
+		xe->pat.idx[XE_CACHE_WT] = 2;
+		xe->pat.idx[XE_CACHE_WB] = 3;
+	} else if (xe->info.platform == XE_DG2) {
+		/*
+		 * Table is the same as previous platforms, but programming
+		 * method has changed.
+		 */
+		xe->pat.ops = &xehp_pat_ops;
+		xe->pat.table = xelp_pat_table;
+		xe->pat.n_entries = ARRAY_SIZE(xelp_pat_table);
+		xe->pat.idx[XE_CACHE_NONE] = 3;
+		xe->pat.idx[XE_CACHE_WT] = 2;
+		xe->pat.idx[XE_CACHE_WB] = 0;
+	} else if (GRAPHICS_VERx100(xe) <= 1210) {
+		WARN_ON_ONCE(!IS_DGFX(xe) && !xe->info.has_llc);
+		xe->pat.ops = &xelp_pat_ops;
+		xe->pat.table = xelp_pat_table;
+		xe->pat.n_entries = ARRAY_SIZE(xelp_pat_table);
+		xe->pat.idx[XE_CACHE_NONE] = 3;
+		xe->pat.idx[XE_CACHE_WT] = 2;
+		xe->pat.idx[XE_CACHE_WB] = 0;
+	} else {
+		/*
+		 * Going forward we expect to need new PAT settings for most
+		 * new platforms; failure to provide a new table can easily
+		 * lead to subtle, hard-to-debug problems.  If none of the
+		 * conditions above match the platform we're running on we'll
+		 * raise an error rather than trying to silently inherit the
+		 * most recent platform's behavior.
+		 */
+		drm_err(&xe->drm, "Missing PAT table for platform with graphics version %d.%02d!\n",
+			GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
+	}
+}
+
+void xe_pat_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	if (!xe->pat.ops)
+		return;
+
+	if (xe_gt_is_media_type(gt))
+		xe->pat.ops->program_media(gt, xe->pat.table, xe->pat.n_entries);
+	else
+		xe->pat.ops->program_graphics(gt, xe->pat.table, xe->pat.n_entries);
+}
+
+void xe_pat_dump(struct xe_gt *gt, struct drm_printer *p)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	if (!xe->pat.ops->dump)
+		return;
+
+	xe->pat.ops->dump(gt, p);
+}
diff --git a/drivers/gpu/drm/xe/xe_pat.h b/drivers/gpu/drm/xe/xe_pat.h
new file mode 100644
index 000000000000..fa0dfbe525cd
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pat.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_PAT_H_
+#define _XE_PAT_H_
+
+#include <linux/types.h>
+
+struct drm_printer;
+struct xe_device;
+struct xe_gt;
+
+/**
+ * struct xe_pat_table_entry - The pat_index encoding and other meta information.
+ */
+struct xe_pat_table_entry {
+	/**
+	 * @value: The platform specific value encoding the various memory
+	 * attributes (this maps to some fixed pat_index). So things like
+	 * caching, coherency, compression etc can be encoded here.
+	 */
+	u32 value;
+
+	/**
+	 * @coh_mode: The GPU coherency mode that @value maps to.
+	 */
+#define XE_COH_NONE          1
+#define XE_COH_AT_LEAST_1WAY 2
+	u16 coh_mode;
+};
+
+/**
+ * xe_pat_init_early - SW initialization, setting up data based on device
+ * @xe: xe device
+ */
+void xe_pat_init_early(struct xe_device *xe);
+
+/**
+ * xe_pat_init - Program HW PAT table
+ * @gt: GT structure
+ */
+void xe_pat_init(struct xe_gt *gt);
+
+/**
+ * xe_pat_dump - Dump PAT table
+ * @gt: GT structure
+ * @p: Printer to dump info to
+ */
+void xe_pat_dump(struct xe_gt *gt, struct drm_printer *p);
+
+/**
+ * xe_pat_index_get_coh_mode - Extract the coherency mode for the given
+ * pat_index.
+ * @xe: xe device
+ * @pat_index: The pat_index to query
+ */
+u16 xe_pat_index_get_coh_mode(struct xe_device *xe, u16 pat_index);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
new file mode 100644
index 000000000000..dcc5ded1558e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -0,0 +1,951 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_pci.h"
+
+#include <kunit/static_stub.h>
+#include <linux/device/driver.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/pm_runtime.h>
+
+#include <drm/drm_color_mgmt.h>
+#include <drm/drm_drv.h>
+#include <drm/xe_pciids.h>
+
+#include "regs/xe_gt_regs.h"
+#include "xe_device.h"
+#include "xe_display.h"
+#include "xe_drv.h"
+#include "xe_gt.h"
+#include "xe_macros.h"
+#include "xe_mmio.h"
+#include "xe_module.h"
+#include "xe_pci_types.h"
+#include "xe_pm.h"
+#include "xe_sriov.h"
+#include "xe_step.h"
+#include "xe_tile.h"
+
+enum toggle_d3cold {
+	D3COLD_DISABLE,
+	D3COLD_ENABLE,
+};
+
+struct xe_subplatform_desc {
+	enum xe_subplatform subplatform;
+	const char *name;
+	const u16 *pciidlist;
+};
+
+struct xe_gt_desc {
+	enum xe_gt_type type;
+	u32 mmio_adj_limit;
+	u32 mmio_adj_offset;
+};
+
+struct xe_device_desc {
+	/* Should only ever be set for platforms without GMD_ID */
+	const struct xe_graphics_desc *graphics;
+	/* Should only ever be set for platforms without GMD_ID */
+	const struct xe_media_desc *media;
+
+	const char *platform_name;
+	const struct xe_subplatform_desc *subplatforms;
+
+	enum xe_platform platform;
+
+	u8 require_force_probe:1;
+	u8 is_dgfx:1;
+
+	u8 has_display:1;
+	u8 has_heci_gscfi:1;
+	u8 has_llc:1;
+	u8 has_mmio_ext:1;
+	u8 has_sriov:1;
+	u8 skip_guc_pc:1;
+	u8 skip_mtcfg:1;
+	u8 skip_pcode:1;
+};
+
+__diag_push();
+__diag_ignore_all("-Woverride-init", "Allow field overrides in table");
+
+#define PLATFORM(x)		\
+	.platform = (x),	\
+	.platform_name = #x
+
+#define NOP(x)	x
+
+static const struct xe_graphics_desc graphics_xelp = {
+	.name = "Xe_LP",
+	.ver = 12,
+	.rel = 0,
+
+	.hw_engine_mask = BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0),
+
+	.dma_mask_size = 39,
+	.va_bits = 48,
+	.vm_max_level = 3,
+};
+
+static const struct xe_graphics_desc graphics_xelpp = {
+	.name = "Xe_LP+",
+	.ver = 12,
+	.rel = 10,
+
+	.hw_engine_mask = BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0),
+
+	.dma_mask_size = 39,
+	.va_bits = 48,
+	.vm_max_level = 3,
+};
+
+#define XE_HP_FEATURES \
+	.has_range_tlb_invalidation = true, \
+	.has_flat_ccs = true, \
+	.dma_mask_size = 46, \
+	.va_bits = 48, \
+	.vm_max_level = 3
+
+static const struct xe_graphics_desc graphics_xehpg = {
+	.name = "Xe_HPG",
+	.ver = 12,
+	.rel = 55,
+
+	.hw_engine_mask =
+		BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) |
+		BIT(XE_HW_ENGINE_CCS0) | BIT(XE_HW_ENGINE_CCS1) |
+		BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3),
+
+	XE_HP_FEATURES,
+	.vram_flags = XE_VRAM_FLAGS_NEED64K,
+};
+
+static const struct xe_graphics_desc graphics_xehpc = {
+	.name = "Xe_HPC",
+	.ver = 12,
+	.rel = 60,
+
+	.hw_engine_mask =
+		BIT(XE_HW_ENGINE_BCS0) | BIT(XE_HW_ENGINE_BCS1) |
+		BIT(XE_HW_ENGINE_BCS2) | BIT(XE_HW_ENGINE_BCS3) |
+		BIT(XE_HW_ENGINE_BCS4) | BIT(XE_HW_ENGINE_BCS5) |
+		BIT(XE_HW_ENGINE_BCS6) | BIT(XE_HW_ENGINE_BCS7) |
+		BIT(XE_HW_ENGINE_BCS8) |
+		BIT(XE_HW_ENGINE_CCS0) | BIT(XE_HW_ENGINE_CCS1) |
+		BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3),
+
+	XE_HP_FEATURES,
+	.dma_mask_size = 52,
+	.max_remote_tiles = 1,
+	.va_bits = 57,
+	.vm_max_level = 4,
+	.vram_flags = XE_VRAM_FLAGS_NEED64K,
+
+	.has_asid = 1,
+	.has_flat_ccs = 0,
+	.has_usm = 1,
+};
+
+static const struct xe_graphics_desc graphics_xelpg = {
+	.name = "Xe_LPG",
+	.hw_engine_mask =
+		BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) |
+		BIT(XE_HW_ENGINE_CCS0),
+
+	XE_HP_FEATURES,
+	.has_flat_ccs = 0,
+};
+
+#define XE2_GFX_FEATURES \
+	.dma_mask_size = 46, \
+	.has_asid = 1, \
+	.has_flat_ccs = 1, \
+	.has_range_tlb_invalidation = 1, \
+	.has_usm = 0 /* FIXME: implementation missing */, \
+	.va_bits = 48, \
+	.vm_max_level = 4, \
+	.hw_engine_mask = \
+		BIT(XE_HW_ENGINE_RCS0) | \
+		BIT(XE_HW_ENGINE_BCS8) | BIT(XE_HW_ENGINE_BCS0) | \
+		GENMASK(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0)
+
+static const struct xe_graphics_desc graphics_xe2 = {
+	.name = "Xe2_LPG",
+
+	XE2_GFX_FEATURES,
+};
+
+static const struct xe_media_desc media_xem = {
+	.name = "Xe_M",
+	.ver = 12,
+	.rel = 0,
+
+	.hw_engine_mask =
+		BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) |
+		BIT(XE_HW_ENGINE_VECS0),
+};
+
+static const struct xe_media_desc media_xehpm = {
+	.name = "Xe_HPM",
+	.ver = 12,
+	.rel = 55,
+
+	.hw_engine_mask =
+		BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) |
+		BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VECS1),
+};
+
+static const struct xe_media_desc media_xelpmp = {
+	.name = "Xe_LPM+",
+	.hw_engine_mask =
+		BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) |
+		BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_GSCCS0)
+};
+
+static const struct xe_media_desc media_xe2 = {
+	.name = "Xe2_LPM",
+	.hw_engine_mask =
+		BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VECS0), /* TODO: GSC0 */
+};
+
+static const struct xe_device_desc tgl_desc = {
+	.graphics = &graphics_xelp,
+	.media = &media_xem,
+	PLATFORM(XE_TIGERLAKE),
+	.has_display = true,
+	.has_llc = true,
+	.require_force_probe = true,
+};
+
+static const struct xe_device_desc rkl_desc = {
+	.graphics = &graphics_xelp,
+	.media = &media_xem,
+	PLATFORM(XE_ROCKETLAKE),
+	.has_display = true,
+	.has_llc = true,
+	.require_force_probe = true,
+};
+
+static const u16 adls_rpls_ids[] = { XE_RPLS_IDS(NOP), 0 };
+
+static const struct xe_device_desc adl_s_desc = {
+	.graphics = &graphics_xelp,
+	.media = &media_xem,
+	PLATFORM(XE_ALDERLAKE_S),
+	.has_display = true,
+	.has_llc = true,
+	.require_force_probe = true,
+	.subplatforms = (const struct xe_subplatform_desc[]) {
+		{ XE_SUBPLATFORM_ALDERLAKE_S_RPLS, "RPLS", adls_rpls_ids },
+		{},
+	},
+};
+
+static const u16 adlp_rplu_ids[] = { XE_RPLU_IDS(NOP), 0 };
+
+static const struct xe_device_desc adl_p_desc = {
+	.graphics = &graphics_xelp,
+	.media = &media_xem,
+	PLATFORM(XE_ALDERLAKE_P),
+	.has_display = true,
+	.has_llc = true,
+	.require_force_probe = true,
+	.subplatforms = (const struct xe_subplatform_desc[]) {
+		{ XE_SUBPLATFORM_ALDERLAKE_P_RPLU, "RPLU", adlp_rplu_ids },
+		{},
+	},
+};
+
+static const struct xe_device_desc adl_n_desc = {
+	.graphics = &graphics_xelp,
+	.media = &media_xem,
+	PLATFORM(XE_ALDERLAKE_N),
+	.has_display = true,
+	.has_llc = true,
+	.require_force_probe = true,
+};
+
+#define DGFX_FEATURES \
+	.is_dgfx = 1
+
+static const struct xe_device_desc dg1_desc = {
+	.graphics = &graphics_xelpp,
+	.media = &media_xem,
+	DGFX_FEATURES,
+	PLATFORM(XE_DG1),
+	.has_display = true,
+	.has_heci_gscfi = 1,
+	.require_force_probe = true,
+};
+
+static const u16 dg2_g10_ids[] = { XE_DG2_G10_IDS(NOP), XE_ATS_M150_IDS(NOP), 0 };
+static const u16 dg2_g11_ids[] = { XE_DG2_G11_IDS(NOP), XE_ATS_M75_IDS(NOP), 0 };
+static const u16 dg2_g12_ids[] = { XE_DG2_G12_IDS(NOP), 0 };
+
+#define DG2_FEATURES \
+	DGFX_FEATURES, \
+	PLATFORM(XE_DG2), \
+	.has_heci_gscfi = 1, \
+	.subplatforms = (const struct xe_subplatform_desc[]) { \
+		{ XE_SUBPLATFORM_DG2_G10, "G10", dg2_g10_ids }, \
+		{ XE_SUBPLATFORM_DG2_G11, "G11", dg2_g11_ids }, \
+		{ XE_SUBPLATFORM_DG2_G12, "G12", dg2_g12_ids }, \
+		{ } \
+	}
+
+static const struct xe_device_desc ats_m_desc = {
+	.graphics = &graphics_xehpg,
+	.media = &media_xehpm,
+	.require_force_probe = true,
+
+	DG2_FEATURES,
+	.has_display = false,
+};
+
+static const struct xe_device_desc dg2_desc = {
+	.graphics = &graphics_xehpg,
+	.media = &media_xehpm,
+	.require_force_probe = true,
+
+	DG2_FEATURES,
+	.has_display = true,
+};
+
+static const __maybe_unused struct xe_device_desc pvc_desc = {
+	.graphics = &graphics_xehpc,
+	DGFX_FEATURES,
+	PLATFORM(XE_PVC),
+	.has_display = false,
+	.has_heci_gscfi = 1,
+	.require_force_probe = true,
+};
+
+static const struct xe_device_desc mtl_desc = {
+	/* .graphics and .media determined via GMD_ID */
+	.require_force_probe = true,
+	PLATFORM(XE_METEORLAKE),
+	.has_display = true,
+};
+
+static const struct xe_device_desc lnl_desc = {
+	PLATFORM(XE_LUNARLAKE),
+	.require_force_probe = true,
+};
+
+#undef PLATFORM
+__diag_pop();
+
+/* Map of GMD_ID values to graphics IP */
+static struct gmdid_map graphics_ip_map[] = {
+	{ 1270, &graphics_xelpg },
+	{ 1271, &graphics_xelpg },
+	{ 2004, &graphics_xe2 },
+};
+
+/* Map of GMD_ID values to media IP */
+static struct gmdid_map media_ip_map[] = {
+	{ 1300, &media_xelpmp },
+	{ 2000, &media_xe2 },
+};
+
+#define INTEL_VGA_DEVICE(id, info) {			\
+	PCI_DEVICE(PCI_VENDOR_ID_INTEL, id),		\
+	PCI_BASE_CLASS_DISPLAY << 16, 0xff << 16,	\
+	(unsigned long) info }
+
+/*
+ * Make sure any device matches here are from most specific to most
+ * general.  For example, since the Quanta match is based on the subsystem
+ * and subvendor IDs, we need it to come before the more general IVB
+ * PCI ID matches, otherwise we'll use the wrong info struct above.
+ */
+static const struct pci_device_id pciidlist[] = {
+	XE_TGL_IDS(INTEL_VGA_DEVICE, &tgl_desc),
+	XE_RKL_IDS(INTEL_VGA_DEVICE, &rkl_desc),
+	XE_ADLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc),
+	XE_ADLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc),
+	XE_ADLN_IDS(INTEL_VGA_DEVICE, &adl_n_desc),
+	XE_RPLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc),
+	XE_RPLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc),
+	XE_DG1_IDS(INTEL_VGA_DEVICE, &dg1_desc),
+	XE_ATS_M_IDS(INTEL_VGA_DEVICE, &ats_m_desc),
+	XE_DG2_IDS(INTEL_VGA_DEVICE, &dg2_desc),
+	XE_MTL_IDS(INTEL_VGA_DEVICE, &mtl_desc),
+	XE_LNL_IDS(INTEL_VGA_DEVICE, &lnl_desc),
+	{ }
+};
+MODULE_DEVICE_TABLE(pci, pciidlist);
+
+#undef INTEL_VGA_DEVICE
+
+/* is device_id present in comma separated list of ids */
+static bool device_id_in_list(u16 device_id, const char *devices, bool negative)
+{
+	char *s, *p, *tok;
+	bool ret;
+
+	if (!devices || !*devices)
+		return false;
+
+	/* match everything */
+	if (negative && strcmp(devices, "!*") == 0)
+		return true;
+	if (!negative && strcmp(devices, "*") == 0)
+		return true;
+
+	s = kstrdup(devices, GFP_KERNEL);
+	if (!s)
+		return false;
+
+	for (p = s, ret = false; (tok = strsep(&p, ",")) != NULL; ) {
+		u16 val;
+
+		if (negative && tok[0] == '!')
+			tok++;
+		else if ((negative && tok[0] != '!') ||
+			 (!negative && tok[0] == '!'))
+			continue;
+
+		if (kstrtou16(tok, 16, &val) == 0 && val == device_id) {
+			ret = true;
+			break;
+		}
+	}
+
+	kfree(s);
+
+	return ret;
+}
+
+static bool id_forced(u16 device_id)
+{
+	return device_id_in_list(device_id, xe_modparam.force_probe, false);
+}
+
+static bool id_blocked(u16 device_id)
+{
+	return device_id_in_list(device_id, xe_modparam.force_probe, true);
+}
+
+static const struct xe_subplatform_desc *
+find_subplatform(const struct xe_device *xe, const struct xe_device_desc *desc)
+{
+	const struct xe_subplatform_desc *sp;
+	const u16 *id;
+
+	for (sp = desc->subplatforms; sp && sp->subplatform; sp++)
+		for (id = sp->pciidlist; *id; id++)
+			if (*id == xe->info.devid)
+				return sp;
+
+	return NULL;
+}
+
+enum xe_gmdid_type {
+	GMDID_GRAPHICS,
+	GMDID_MEDIA
+};
+
+static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, u32 *revid)
+{
+	struct xe_gt *gt = xe_root_mmio_gt(xe);
+	struct xe_reg gmdid_reg = GMD_ID;
+	u32 val;
+
+	KUNIT_STATIC_STUB_REDIRECT(read_gmdid, xe, type, ver, revid);
+
+	if (type == GMDID_MEDIA)
+		gmdid_reg.addr += MEDIA_GT_GSI_OFFSET;
+
+	val = xe_mmio_read32(gt, gmdid_reg);
+	*ver = REG_FIELD_GET(GMD_ID_ARCH_MASK, val) * 100 + REG_FIELD_GET(GMD_ID_RELEASE_MASK, val);
+	*revid = REG_FIELD_GET(GMD_ID_REVID, val);
+}
+
+/*
+ * Pre-GMD_ID platform: device descriptor already points to the appropriate
+ * graphics descriptor. Simply forward the description and calculate the version
+ * appropriately. "graphics" should be present in all such platforms, while
+ * media is optional.
+ */
+static void handle_pre_gmdid(struct xe_device *xe,
+			     const struct xe_graphics_desc *graphics,
+			     const struct xe_media_desc *media)
+{
+	xe->info.graphics_verx100 = graphics->ver * 100 + graphics->rel;
+
+	if (media)
+		xe->info.media_verx100 = media->ver * 100 + media->rel;
+
+}
+
+/*
+ * GMD_ID platform: read IP version from hardware and select graphics descriptor
+ * based on the result.
+ */
+static void handle_gmdid(struct xe_device *xe,
+			 const struct xe_graphics_desc **graphics,
+			 const struct xe_media_desc **media,
+			 u32 *graphics_revid,
+			 u32 *media_revid)
+{
+	u32 ver;
+
+	read_gmdid(xe, GMDID_GRAPHICS, &ver, graphics_revid);
+
+	for (int i = 0; i < ARRAY_SIZE(graphics_ip_map); i++) {
+		if (ver == graphics_ip_map[i].ver) {
+			xe->info.graphics_verx100 = ver;
+			*graphics = graphics_ip_map[i].ip;
+
+			break;
+		}
+	}
+
+	if (!xe->info.graphics_verx100) {
+		drm_err(&xe->drm, "Hardware reports unknown graphics version %u.%02u\n",
+			ver / 100, ver % 100);
+	}
+
+	read_gmdid(xe, GMDID_MEDIA, &ver, media_revid);
+
+	/* Media may legitimately be fused off / not present */
+	if (ver == 0)
+		return;
+
+	for (int i = 0; i < ARRAY_SIZE(media_ip_map); i++) {
+		if (ver == media_ip_map[i].ver) {
+			xe->info.media_verx100 = ver;
+			*media = media_ip_map[i].ip;
+
+			break;
+		}
+	}
+
+	if (!xe->info.media_verx100) {
+		drm_err(&xe->drm, "Hardware reports unknown media version %u.%02u\n",
+			ver / 100, ver % 100);
+	}
+}
+
+/*
+ * Initialize device info content that only depends on static driver_data
+ * passed to the driver at probe time from PCI ID table.
+ */
+static int xe_info_init_early(struct xe_device *xe,
+			      const struct xe_device_desc *desc,
+			      const struct xe_subplatform_desc *subplatform_desc)
+{
+	int err;
+
+	xe->info.platform = desc->platform;
+	xe->info.subplatform = subplatform_desc ?
+		subplatform_desc->subplatform : XE_SUBPLATFORM_NONE;
+
+	xe->info.is_dgfx = desc->is_dgfx;
+	xe->info.has_heci_gscfi = desc->has_heci_gscfi;
+	xe->info.has_llc = desc->has_llc;
+	xe->info.has_mmio_ext = desc->has_mmio_ext;
+	xe->info.has_sriov = desc->has_sriov;
+	xe->info.skip_guc_pc = desc->skip_guc_pc;
+	xe->info.skip_mtcfg = desc->skip_mtcfg;
+	xe->info.skip_pcode = desc->skip_pcode;
+
+	xe->info.enable_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) &&
+				  xe_modparam.enable_display &&
+				  desc->has_display;
+
+	err = xe_tile_init_early(xe_device_get_root_tile(xe), xe, 0);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+/*
+ * Initialize device info content that does require knowledge about
+ * graphics / media IP version.
+ * Make sure that GT / tile structures allocated by the driver match the data
+ * present in device info.
+ */
+static int xe_info_init(struct xe_device *xe,
+			const struct xe_graphics_desc *graphics_desc,
+			const struct xe_media_desc *media_desc)
+{
+	u32 graphics_gmdid_revid = 0, media_gmdid_revid = 0;
+	struct xe_tile *tile;
+	struct xe_gt *gt;
+	u8 id;
+
+	/*
+	 * If this platform supports GMD_ID, we'll detect the proper IP
+	 * descriptor to use from hardware registers. desc->graphics will only
+	 * ever be set at this point for platforms before GMD_ID. In that case
+	 * the IP descriptions and versions are simply derived from that.
+	 */
+	if (graphics_desc) {
+		handle_pre_gmdid(xe, graphics_desc, media_desc);
+		xe->info.step = xe_step_pre_gmdid_get(xe);
+	} else {
+		xe_assert(xe, !media_desc);
+		handle_gmdid(xe, &graphics_desc, &media_desc,
+			     &graphics_gmdid_revid, &media_gmdid_revid);
+		xe->info.step = xe_step_gmdid_get(xe,
+						  graphics_gmdid_revid,
+						  media_gmdid_revid);
+	}
+
+	/*
+	 * If we couldn't detect the graphics IP, that's considered a fatal
+	 * error and we should abort driver load.  Failing to detect media
+	 * IP is non-fatal; we'll just proceed without enabling media support.
+	 */
+	if (!graphics_desc)
+		return -ENODEV;
+
+	xe->info.graphics_name = graphics_desc->name;
+	xe->info.media_name = media_desc ? media_desc->name : "none";
+	xe->info.tile_mmio_ext_size = graphics_desc->tile_mmio_ext_size;
+
+	xe->info.dma_mask_size = graphics_desc->dma_mask_size;
+	xe->info.vram_flags = graphics_desc->vram_flags;
+	xe->info.va_bits = graphics_desc->va_bits;
+	xe->info.vm_max_level = graphics_desc->vm_max_level;
+	xe->info.has_asid = graphics_desc->has_asid;
+	xe->info.has_flat_ccs = graphics_desc->has_flat_ccs;
+	xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation;
+	xe->info.has_usm = graphics_desc->has_usm;
+
+	/*
+	 * All platforms have at least one primary GT.  Any platform with media
+	 * version 13 or higher has an additional dedicated media GT.  And
+	 * depending on the graphics IP there may be additional "remote tiles."
+	 * All of these together determine the overall GT count.
+	 *
+	 * FIXME: 'tile_count' here is misnamed since the rest of the driver
+	 * treats it as the number of GTs rather than just the number of tiles.
+	 */
+	xe->info.tile_count = 1 + graphics_desc->max_remote_tiles;
+
+	for_each_remote_tile(tile, xe, id) {
+		int err;
+
+		err = xe_tile_init_early(tile, xe, id);
+		if (err)
+			return err;
+	}
+
+	for_each_tile(tile, xe, id) {
+		gt = tile->primary_gt;
+		gt->info.id = xe->info.gt_count++;
+		gt->info.type = XE_GT_TYPE_MAIN;
+		gt->info.__engine_mask = graphics_desc->hw_engine_mask;
+		if (MEDIA_VER(xe) < 13 && media_desc)
+			gt->info.__engine_mask |= media_desc->hw_engine_mask;
+
+		if (MEDIA_VER(xe) < 13 || !media_desc)
+			continue;
+
+		/*
+		 * Allocate and setup media GT for platforms with standalone
+		 * media.
+		 */
+		tile->media_gt = xe_gt_alloc(tile);
+		if (IS_ERR(tile->media_gt))
+			return PTR_ERR(tile->media_gt);
+
+		gt = tile->media_gt;
+		gt->info.type = XE_GT_TYPE_MEDIA;
+		gt->info.__engine_mask = media_desc->hw_engine_mask;
+		gt->mmio.adj_offset = MEDIA_GT_GSI_OFFSET;
+		gt->mmio.adj_limit = MEDIA_GT_GSI_LENGTH;
+
+		/*
+		 * FIXME: At the moment multi-tile and standalone media are
+		 * mutually exclusive on current platforms.  We'll need to
+		 * come up with a better way to number GTs if we ever wind
+		 * up with platforms that support both together.
+		 */
+		drm_WARN_ON(&xe->drm, id != 0);
+		gt->info.id = xe->info.gt_count++;
+	}
+
+	return 0;
+}
+
+static void xe_pci_remove(struct pci_dev *pdev)
+{
+	struct xe_device *xe;
+
+	xe = pci_get_drvdata(pdev);
+	if (!xe) /* driver load aborted, nothing to cleanup */
+		return;
+
+	xe_device_remove(xe);
+	xe_pm_runtime_fini(xe);
+	pci_set_drvdata(pdev, NULL);
+}
+
+static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	const struct xe_device_desc *desc = (const void *)ent->driver_data;
+	const struct xe_subplatform_desc *subplatform_desc;
+	struct xe_device *xe;
+	int err;
+
+	if (desc->require_force_probe && !id_forced(pdev->device)) {
+		dev_info(&pdev->dev,
+			 "Your graphics device %04x is not officially supported\n"
+			 "by xe driver in this kernel version. To force Xe probe,\n"
+			 "use xe.force_probe='%04x' and i915.force_probe='!%04x'\n"
+			 "module parameters or CONFIG_DRM_XE_FORCE_PROBE='%04x' and\n"
+			 "CONFIG_DRM_I915_FORCE_PROBE='!%04x' configuration options.\n",
+			 pdev->device, pdev->device, pdev->device,
+			 pdev->device, pdev->device);
+		return -ENODEV;
+	}
+
+	if (id_blocked(pdev->device)) {
+		dev_info(&pdev->dev, "Probe blocked for device [%04x:%04x].\n",
+			 pdev->vendor, pdev->device);
+		return -ENODEV;
+	}
+
+	if (xe_display_driver_probe_defer(pdev))
+		return -EPROBE_DEFER;
+
+	err = pcim_enable_device(pdev);
+	if (err)
+		return err;
+
+	xe = xe_device_create(pdev, ent);
+	if (IS_ERR(xe))
+		return PTR_ERR(xe);
+
+	pci_set_drvdata(pdev, xe);
+
+	xe_pm_assert_unbounded_bridge(xe);
+	subplatform_desc = find_subplatform(xe, desc);
+
+	pci_set_master(pdev);
+
+	err = xe_info_init_early(xe, desc, subplatform_desc);
+	if (err)
+		return err;
+
+	xe_sriov_probe_early(xe, desc->has_sriov);
+
+	err = xe_device_probe_early(xe);
+	if (err)
+		return err;
+
+	err = xe_info_init(xe, desc->graphics, desc->media);
+	if (err)
+		return err;
+
+	xe_display_probe(xe);
+
+	drm_dbg(&xe->drm, "%s %s %04x:%04x dgfx:%d gfx:%s (%d.%02d) media:%s (%d.%02d) display:%s dma_m_s:%d tc:%d gscfi:%d",
+		desc->platform_name,
+		subplatform_desc ? subplatform_desc->name : "",
+		xe->info.devid, xe->info.revid,
+		xe->info.is_dgfx,
+		xe->info.graphics_name,
+		xe->info.graphics_verx100 / 100,
+		xe->info.graphics_verx100 % 100,
+		xe->info.media_name,
+		xe->info.media_verx100 / 100,
+		xe->info.media_verx100 % 100,
+		str_yes_no(xe->info.enable_display),
+		xe->info.dma_mask_size, xe->info.tile_count,
+		xe->info.has_heci_gscfi);
+
+	drm_dbg(&xe->drm, "Stepping = (G:%s, M:%s, D:%s, B:%s)\n",
+		xe_step_name(xe->info.step.graphics),
+		xe_step_name(xe->info.step.media),
+		xe_step_name(xe->info.step.display),
+		xe_step_name(xe->info.step.basedie));
+
+	drm_dbg(&xe->drm, "SR-IOV support: %s (mode: %s)\n",
+		str_yes_no(xe_device_has_sriov(xe)),
+		xe_sriov_mode_to_string(xe_device_sriov_mode(xe)));
+
+	err = xe_device_probe(xe);
+	if (err)
+		return err;
+
+	xe_pm_init(xe);
+
+	drm_dbg(&xe->drm, "d3cold: capable=%s\n",
+		str_yes_no(xe->d3cold.capable));
+
+	return 0;
+}
+
+static void xe_pci_shutdown(struct pci_dev *pdev)
+{
+	xe_device_shutdown(pdev_to_xe_device(pdev));
+}
+
+#ifdef CONFIG_PM_SLEEP
+static void d3cold_toggle(struct pci_dev *pdev, enum toggle_d3cold toggle)
+{
+	struct xe_device *xe = pdev_to_xe_device(pdev);
+	struct pci_dev *root_pdev;
+
+	if (!xe->d3cold.capable)
+		return;
+
+	root_pdev = pcie_find_root_port(pdev);
+	if (!root_pdev)
+		return;
+
+	switch (toggle) {
+	case D3COLD_DISABLE:
+		pci_d3cold_disable(root_pdev);
+		break;
+	case D3COLD_ENABLE:
+		pci_d3cold_enable(root_pdev);
+		break;
+	}
+}
+
+static int xe_pci_suspend(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int err;
+
+	err = xe_pm_suspend(pdev_to_xe_device(pdev));
+	if (err)
+		return err;
+
+	/*
+	 * Enabling D3Cold is needed for S2Idle/S0ix.
+	 * It is save to allow here since xe_pm_suspend has evicted
+	 * the local memory and the direct complete optimization is disabled.
+	 */
+	d3cold_toggle(pdev, D3COLD_ENABLE);
+
+	pci_save_state(pdev);
+	pci_disable_device(pdev);
+
+	return 0;
+}
+
+static int xe_pci_resume(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int err;
+
+	/* Give back the D3Cold decision to the runtime P M*/
+	d3cold_toggle(pdev, D3COLD_DISABLE);
+
+	err = pci_set_power_state(pdev, PCI_D0);
+	if (err)
+		return err;
+
+	err = pci_enable_device(pdev);
+	if (err)
+		return err;
+
+	pci_set_master(pdev);
+
+	err = xe_pm_resume(pdev_to_xe_device(pdev));
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int xe_pci_runtime_suspend(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct xe_device *xe = pdev_to_xe_device(pdev);
+	int err;
+
+	err = xe_pm_runtime_suspend(xe);
+	if (err)
+		return err;
+
+	pci_save_state(pdev);
+
+	if (xe->d3cold.allowed) {
+		d3cold_toggle(pdev, D3COLD_ENABLE);
+		pci_disable_device(pdev);
+		pci_ignore_hotplug(pdev);
+		pci_set_power_state(pdev, PCI_D3cold);
+	} else {
+		d3cold_toggle(pdev, D3COLD_DISABLE);
+		pci_set_power_state(pdev, PCI_D3hot);
+	}
+
+	return 0;
+}
+
+static int xe_pci_runtime_resume(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct xe_device *xe = pdev_to_xe_device(pdev);
+	int err;
+
+	err = pci_set_power_state(pdev, PCI_D0);
+	if (err)
+		return err;
+
+	pci_restore_state(pdev);
+
+	if (xe->d3cold.allowed) {
+		err = pci_enable_device(pdev);
+		if (err)
+			return err;
+
+		pci_set_master(pdev);
+	}
+
+	return xe_pm_runtime_resume(xe);
+}
+
+static int xe_pci_runtime_idle(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct xe_device *xe = pdev_to_xe_device(pdev);
+
+	xe_pm_d3cold_allowed_toggle(xe);
+
+	return 0;
+}
+
+static const struct dev_pm_ops xe_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(xe_pci_suspend, xe_pci_resume)
+	SET_RUNTIME_PM_OPS(xe_pci_runtime_suspend, xe_pci_runtime_resume, xe_pci_runtime_idle)
+};
+#endif
+
+static struct pci_driver xe_pci_driver = {
+	.name = DRIVER_NAME,
+	.id_table = pciidlist,
+	.probe = xe_pci_probe,
+	.remove = xe_pci_remove,
+	.shutdown = xe_pci_shutdown,
+#ifdef CONFIG_PM_SLEEP
+	.driver.pm = &xe_pm_ops,
+#endif
+};
+
+int xe_register_pci_driver(void)
+{
+	return pci_register_driver(&xe_pci_driver);
+}
+
+void xe_unregister_pci_driver(void)
+{
+	pci_unregister_driver(&xe_pci_driver);
+}
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_pci.c"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pci.h b/drivers/gpu/drm/xe/xe_pci.h
new file mode 100644
index 000000000000..611c1209b14c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pci.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_PCI_H_
+#define _XE_PCI_H_
+
+int xe_register_pci_driver(void);
+void xe_unregister_pci_driver(void);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h
new file mode 100644
index 000000000000..b1ad12fa22d6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pci_types.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_PCI_TYPES_H_
+#define _XE_PCI_TYPES_H_
+
+#include <linux/types.h>
+
+struct xe_graphics_desc {
+	const char *name;
+	u8 ver;
+	u8 rel;
+
+	u8 dma_mask_size;	/* available DMA address bits */
+	u8 va_bits;
+	u8 vm_max_level;
+	u8 vram_flags;
+
+	u64 hw_engine_mask;	/* hardware engines provided by graphics IP */
+
+	u32 tile_mmio_ext_size; /* size of MMIO extension space, per-tile */
+
+	u8 max_remote_tiles:2;
+
+	u8 has_asid:1;
+	u8 has_flat_ccs:1;
+	u8 has_range_tlb_invalidation:1;
+	u8 has_usm:1;
+};
+
+struct xe_media_desc {
+	const char *name;
+	u8 ver;
+	u8 rel;
+
+	u64 hw_engine_mask;	/* hardware engines provided by media IP */
+};
+
+struct gmdid_map {
+	unsigned int ver;
+	const void *ip;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c
new file mode 100644
index 000000000000..b324dc2a5deb
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pcode.c
@@ -0,0 +1,296 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_pcode.h"
+
+#include <linux/delay.h>
+#include <linux/errno.h>
+
+#include <drm/drm_managed.h>
+
+#include "xe_gt.h"
+#include "xe_mmio.h"
+#include "xe_pcode_api.h"
+
+/**
+ * DOC: PCODE
+ *
+ * Xe PCODE is the component responsible for interfacing with the PCODE
+ * firmware.
+ * It shall provide a very simple ABI to other Xe components, but be the
+ * single and consolidated place that will communicate with PCODE. All read
+ * and write operations to PCODE will be internal and private to this component.
+ *
+ * What's next:
+ * - PCODE hw metrics
+ * - PCODE for display operations
+ */
+
+static int pcode_mailbox_status(struct xe_gt *gt)
+{
+	u32 err;
+	static const struct pcode_err_decode err_decode[] = {
+		[PCODE_ILLEGAL_CMD] = {-ENXIO, "Illegal Command"},
+		[PCODE_TIMEOUT] = {-ETIMEDOUT, "Timed out"},
+		[PCODE_ILLEGAL_DATA] = {-EINVAL, "Illegal Data"},
+		[PCODE_ILLEGAL_SUBCOMMAND] = {-ENXIO, "Illegal Subcommand"},
+		[PCODE_LOCKED] = {-EBUSY, "PCODE Locked"},
+		[PCODE_GT_RATIO_OUT_OF_RANGE] = {-EOVERFLOW,
+			"GT ratio out of range"},
+		[PCODE_REJECTED] = {-EACCES, "PCODE Rejected"},
+		[PCODE_ERROR_MASK] = {-EPROTO, "Unknown"},
+	};
+
+	lockdep_assert_held(&gt->pcode.lock);
+
+	err = xe_mmio_read32(gt, PCODE_MAILBOX) & PCODE_ERROR_MASK;
+	if (err) {
+		drm_err(&gt_to_xe(gt)->drm, "PCODE Mailbox failed: %d %s", err,
+			err_decode[err].str ?: "Unknown");
+		return err_decode[err].errno ?: -EPROTO;
+	}
+
+	return 0;
+}
+
+static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1,
+			    unsigned int timeout_ms, bool return_data,
+			    bool atomic)
+{
+	int err;
+
+	if (gt_to_xe(gt)->info.skip_pcode)
+		return 0;
+
+	lockdep_assert_held(&gt->pcode.lock);
+
+	if ((xe_mmio_read32(gt, PCODE_MAILBOX) & PCODE_READY) != 0)
+		return -EAGAIN;
+
+	xe_mmio_write32(gt, PCODE_DATA0, *data0);
+	xe_mmio_write32(gt, PCODE_DATA1, data1 ? *data1 : 0);
+	xe_mmio_write32(gt, PCODE_MAILBOX, PCODE_READY | mbox);
+
+	err = xe_mmio_wait32(gt, PCODE_MAILBOX, PCODE_READY, 0,
+			     timeout_ms * 1000, NULL, atomic);
+	if (err)
+		return err;
+
+	if (return_data) {
+		*data0 = xe_mmio_read32(gt, PCODE_DATA0);
+		if (data1)
+			*data1 = xe_mmio_read32(gt, PCODE_DATA1);
+	}
+
+	return pcode_mailbox_status(gt);
+}
+
+int xe_pcode_write_timeout(struct xe_gt *gt, u32 mbox, u32 data, int timeout)
+{
+	int err;
+
+	mutex_lock(&gt->pcode.lock);
+	err = pcode_mailbox_rw(gt, mbox, &data, NULL, timeout, false, false);
+	mutex_unlock(&gt->pcode.lock);
+
+	return err;
+}
+
+int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1)
+{
+	int err;
+
+	mutex_lock(&gt->pcode.lock);
+	err = pcode_mailbox_rw(gt, mbox, val, val1, 1, true, false);
+	mutex_unlock(&gt->pcode.lock);
+
+	return err;
+}
+
+static int xe_pcode_try_request(struct xe_gt *gt, u32 mbox,
+				u32 request, u32 reply_mask, u32 reply,
+				u32 *status, bool atomic, int timeout_us)
+{
+	int slept, wait = 10;
+
+	for (slept = 0; slept < timeout_us; slept += wait) {
+		*status = pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true,
+					   atomic);
+		if ((*status == 0) && ((request & reply_mask) == reply))
+			return 0;
+
+		if (atomic)
+			udelay(wait);
+		else
+			usleep_range(wait, wait << 1);
+		wait <<= 1;
+	}
+
+	return -ETIMEDOUT;
+}
+
+/**
+ * xe_pcode_request - send PCODE request until acknowledgment
+ * @gt: gt
+ * @mbox: PCODE mailbox ID the request is targeted for
+ * @request: request ID
+ * @reply_mask: mask used to check for request acknowledgment
+ * @reply: value used to check for request acknowledgment
+ * @timeout_base_ms: timeout for polling with preemption enabled
+ *
+ * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE
+ * reports an error or an overall timeout of @timeout_base_ms+50 ms expires.
+ * The request is acknowledged once the PCODE reply dword equals @reply after
+ * applying @reply_mask. Polling is first attempted with preemption enabled
+ * for @timeout_base_ms and if this times out for another 50 ms with
+ * preemption disabled.
+ *
+ * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some
+ * other error as reported by PCODE.
+ */
+int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request,
+		      u32 reply_mask, u32 reply, int timeout_base_ms)
+{
+	u32 status;
+	int ret;
+
+	mutex_lock(&gt->pcode.lock);
+
+	ret = xe_pcode_try_request(gt, mbox, request, reply_mask, reply, &status,
+				   false, timeout_base_ms * 1000);
+	if (!ret)
+		goto out;
+
+	/*
+	 * The above can time out if the number of requests was low (2 in the
+	 * worst case) _and_ PCODE was busy for some reason even after a
+	 * (queued) request and @timeout_base_ms delay. As a workaround retry
+	 * the poll with preemption disabled to maximize the number of
+	 * requests. Increase the timeout from @timeout_base_ms to 50ms to
+	 * account for interrupts that could reduce the number of these
+	 * requests, and for any quirks of the PCODE firmware that delays
+	 * the request completion.
+	 */
+	drm_err(&gt_to_xe(gt)->drm,
+		"PCODE timeout, retrying with preemption disabled\n");
+	drm_WARN_ON_ONCE(&gt_to_xe(gt)->drm, timeout_base_ms > 1);
+	preempt_disable();
+	ret = xe_pcode_try_request(gt, mbox, request, reply_mask, reply, &status,
+				   true, timeout_base_ms * 1000);
+	preempt_enable();
+
+out:
+	mutex_unlock(&gt->pcode.lock);
+	return status ? status : ret;
+}
+/**
+ * xe_pcode_init_min_freq_table - Initialize PCODE's QOS frequency table
+ * @gt: gt instance
+ * @min_gt_freq: Minimal (RPn) GT frequency in units of 50MHz.
+ * @max_gt_freq: Maximal (RP0) GT frequency in units of 50MHz.
+ *
+ * This function initialize PCODE's QOS frequency table for a proper minimal
+ * frequency/power steering decision, depending on the current requested GT
+ * frequency. For older platforms this was a more complete table including
+ * the IA freq. However for the latest platforms this table become a simple
+ * 1-1 Ring vs GT frequency. Even though, without setting it, PCODE might
+ * not take the right decisions for some memory frequencies and affect latency.
+ *
+ * It returns 0 on success, and -ERROR number on failure, -EINVAL if max
+ * frequency is higher then the minimal, and other errors directly translated
+ * from the PCODE Error returs:
+ * - -ENXIO: "Illegal Command"
+ * - -ETIMEDOUT: "Timed out"
+ * - -EINVAL: "Illegal Data"
+ * - -ENXIO, "Illegal Subcommand"
+ * - -EBUSY: "PCODE Locked"
+ * - -EOVERFLOW, "GT ratio out of range"
+ * - -EACCES, "PCODE Rejected"
+ * - -EPROTO, "Unknown"
+ */
+int xe_pcode_init_min_freq_table(struct xe_gt *gt, u32 min_gt_freq,
+				 u32 max_gt_freq)
+{
+	int ret;
+	u32 freq;
+
+	if (!gt_to_xe(gt)->info.has_llc)
+		return 0;
+
+	if (max_gt_freq <= min_gt_freq)
+		return -EINVAL;
+
+	mutex_lock(&gt->pcode.lock);
+	for (freq = min_gt_freq; freq <= max_gt_freq; freq++) {
+		u32 data = freq << PCODE_FREQ_RING_RATIO_SHIFT | freq;
+
+		ret = pcode_mailbox_rw(gt, PCODE_WRITE_MIN_FREQ_TABLE,
+				       &data, NULL, 1, false, false);
+		if (ret)
+			goto unlock;
+	}
+
+unlock:
+	mutex_unlock(&gt->pcode.lock);
+	return ret;
+}
+
+/**
+ * xe_pcode_init - Ensure PCODE is initialized
+ * @gt: gt instance
+ *
+ * This function ensures that PCODE is properly initialized. To be called during
+ * probe and resume paths.
+ *
+ * It returns 0 on success, and -error number on failure.
+ */
+int xe_pcode_init(struct xe_gt *gt)
+{
+	u32 status, request = DGFX_GET_INIT_STATUS;
+	int timeout_us = 180000000; /* 3 min */
+	int ret;
+
+	if (gt_to_xe(gt)->info.skip_pcode)
+		return 0;
+
+	if (!IS_DGFX(gt_to_xe(gt)))
+		return 0;
+
+	mutex_lock(&gt->pcode.lock);
+	ret = xe_pcode_try_request(gt, DGFX_PCODE_STATUS, request,
+				   DGFX_INIT_STATUS_COMPLETE,
+				   DGFX_INIT_STATUS_COMPLETE,
+				   &status, false, timeout_us);
+	mutex_unlock(&gt->pcode.lock);
+
+	if (ret)
+		drm_err(&gt_to_xe(gt)->drm,
+			"PCODE initialization timedout after: 3 min\n");
+
+	return ret;
+}
+
+/**
+ * xe_pcode_probe - Prepare xe_pcode and also ensure PCODE is initialized.
+ * @gt: gt instance
+ *
+ * This function initializes the xe_pcode component, and when needed, it ensures
+ * that PCODE has properly performed its initialization and it is really ready
+ * to go. To be called once only during probe.
+ *
+ * It returns 0 on success, and -error number on failure.
+ */
+int xe_pcode_probe(struct xe_gt *gt)
+{
+	drmm_mutex_init(&gt_to_xe(gt)->drm, &gt->pcode.lock);
+
+	if (gt_to_xe(gt)->info.skip_pcode)
+		return 0;
+
+	if (!IS_DGFX(gt_to_xe(gt)))
+		return 0;
+
+	return xe_pcode_init(gt);
+}
diff --git a/drivers/gpu/drm/xe/xe_pcode.h b/drivers/gpu/drm/xe/xe_pcode.h
new file mode 100644
index 000000000000..08cb1d047cba
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pcode.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_PCODE_H_
+#define _XE_PCODE_H_
+
+#include <linux/types.h>
+struct xe_gt;
+
+int xe_pcode_probe(struct xe_gt *gt);
+int xe_pcode_init(struct xe_gt *gt);
+int xe_pcode_init_min_freq_table(struct xe_gt *gt, u32 min_gt_freq,
+				 u32 max_gt_freq);
+int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1);
+int xe_pcode_write_timeout(struct xe_gt *gt, u32 mbox, u32 val,
+			   int timeout_ms);
+#define xe_pcode_write(gt, mbox, val) \
+	xe_pcode_write_timeout(gt, mbox, val, 1)
+
+int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request,
+		     u32 reply_mask, u32 reply, int timeout_ms);
+
+#define PCODE_MBOX(mbcmd, param1, param2)\
+	(FIELD_PREP(PCODE_MB_COMMAND, mbcmd)\
+	| FIELD_PREP(PCODE_MB_PARAM1, param1)\
+	| FIELD_PREP(PCODE_MB_PARAM2, param2))
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h
new file mode 100644
index 000000000000..5935cfe30204
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pcode_api.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+/* Internal to xe_pcode */
+
+#include "regs/xe_reg_defs.h"
+
+#define PCODE_MAILBOX			XE_REG(0x138124)
+#define   PCODE_READY			REG_BIT(31)
+#define   PCODE_MB_PARAM2		REG_GENMASK(23, 16)
+#define   PCODE_MB_PARAM1		REG_GENMASK(15, 8)
+#define   PCODE_MB_COMMAND		REG_GENMASK(7, 0)
+#define   PCODE_ERROR_MASK		0xFF
+#define     PCODE_SUCCESS		0x0
+#define     PCODE_ILLEGAL_CMD		0x1
+#define     PCODE_TIMEOUT		0x2
+#define     PCODE_ILLEGAL_DATA		0x3
+#define     PCODE_ILLEGAL_SUBCOMMAND	0x4
+#define     PCODE_LOCKED		0x6
+#define     PCODE_GT_RATIO_OUT_OF_RANGE	0x10
+#define     PCODE_REJECTED		0x11
+
+#define PCODE_DATA0			XE_REG(0x138128)
+#define PCODE_DATA1			XE_REG(0x13812C)
+
+/* Min Freq QOS Table */
+#define   PCODE_WRITE_MIN_FREQ_TABLE	0x8
+#define   PCODE_READ_MIN_FREQ_TABLE	0x9
+#define   PCODE_FREQ_RING_RATIO_SHIFT	16
+
+/* PCODE Init */
+#define   DGFX_PCODE_STATUS		0x7E
+#define     DGFX_GET_INIT_STATUS	0x0
+#define     DGFX_INIT_STATUS_COMPLETE	0x1
+
+#define   PCODE_POWER_SETUP			0x7C
+#define     POWER_SETUP_SUBCOMMAND_READ_I1	0x4
+#define     POWER_SETUP_SUBCOMMAND_WRITE_I1	0x5
+#define	    POWER_SETUP_I1_WATTS		REG_BIT(31)
+#define	    POWER_SETUP_I1_SHIFT		6	/* 10.6 fixed point format */
+#define	    POWER_SETUP_I1_DATA_MASK		REG_GENMASK(15, 0)
+
+struct pcode_err_decode {
+	int errno;
+	const char *str;
+};
+
diff --git a/drivers/gpu/drm/xe/xe_platform_types.h b/drivers/gpu/drm/xe/xe_platform_types.h
new file mode 100644
index 000000000000..553f53dbd093
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_platform_types.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_PLATFORM_INFO_TYPES_H_
+#define _XE_PLATFORM_INFO_TYPES_H_
+
+/*
+ * Keep this in graphics version based order and chronological order within a
+ * version
+ */
+enum xe_platform {
+	XE_PLATFORM_UNINITIALIZED = 0,
+	XE_TIGERLAKE,
+	XE_ROCKETLAKE,
+	XE_ALDERLAKE_S,
+	XE_ALDERLAKE_P,
+	XE_ALDERLAKE_N,
+	XE_DG1,
+	XE_DG2,
+	XE_PVC,
+	XE_METEORLAKE,
+	XE_LUNARLAKE,
+};
+
+enum xe_subplatform {
+	XE_SUBPLATFORM_UNINITIALIZED = 0,
+	XE_SUBPLATFORM_NONE,
+	XE_SUBPLATFORM_ALDERLAKE_P_RPLU,
+	XE_SUBPLATFORM_ALDERLAKE_S_RPLS,
+	XE_SUBPLATFORM_DG2_G10,
+	XE_SUBPLATFORM_DG2_G11,
+	XE_SUBPLATFORM_DG2_G12,
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
new file mode 100644
index 000000000000..b429c2876a76
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -0,0 +1,405 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_pm.h"
+
+#include <linux/pm_runtime.h>
+
+#include <drm/drm_managed.h>
+#include <drm/ttm/ttm_placement.h>
+
+#include "xe_bo.h"
+#include "xe_bo_evict.h"
+#include "xe_device.h"
+#include "xe_device_sysfs.h"
+#include "xe_display.h"
+#include "xe_ggtt.h"
+#include "xe_gt.h"
+#include "xe_guc.h"
+#include "xe_irq.h"
+#include "xe_pcode.h"
+#include "xe_wa.h"
+
+/**
+ * DOC: Xe Power Management
+ *
+ * Xe PM shall be guided by the simplicity.
+ * Use the simplest hook options whenever possible.
+ * Let's not reinvent the runtime_pm references and hooks.
+ * Shall have a clear separation of display and gt underneath this component.
+ *
+ * What's next:
+ *
+ * For now s2idle and s3 are only working in integrated devices. The next step
+ * is to iterate through all VRAM's BO backing them up into the system memory
+ * before allowing the system suspend.
+ *
+ * Also runtime_pm needs to be here from the beginning.
+ *
+ * RC6/RPS are also critical PM features. Let's start with GuCRC and GuC SLPC
+ * and no wait boost. Frequency optimizations should come on a next stage.
+ */
+
+/**
+ * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle
+ * @xe: xe device instance
+ *
+ * Return: 0 on success
+ */
+int xe_pm_suspend(struct xe_device *xe)
+{
+	struct xe_gt *gt;
+	u8 id;
+	int err;
+
+	for_each_gt(gt, xe, id)
+		xe_gt_suspend_prepare(gt);
+
+	/* FIXME: Super racey... */
+	err = xe_bo_evict_all(xe);
+	if (err)
+		return err;
+
+	xe_display_pm_suspend(xe);
+
+	for_each_gt(gt, xe, id) {
+		err = xe_gt_suspend(gt);
+		if (err) {
+			xe_display_pm_resume(xe);
+			return err;
+		}
+	}
+
+	xe_irq_suspend(xe);
+
+	xe_display_pm_suspend_late(xe);
+
+	return 0;
+}
+
+/**
+ * xe_pm_resume - Helper for System resume S3->S0 / S2idle->S0
+ * @xe: xe device instance
+ *
+ * Return: 0 on success
+ */
+int xe_pm_resume(struct xe_device *xe)
+{
+	struct xe_tile *tile;
+	struct xe_gt *gt;
+	u8 id;
+	int err;
+
+	for_each_tile(tile, xe, id)
+		xe_wa_apply_tile_workarounds(tile);
+
+	for_each_gt(gt, xe, id) {
+		err = xe_pcode_init(gt);
+		if (err)
+			return err;
+	}
+
+	xe_display_pm_resume_early(xe);
+
+	/*
+	 * This only restores pinned memory which is the memory required for the
+	 * GT(s) to resume.
+	 */
+	err = xe_bo_restore_kernel(xe);
+	if (err)
+		return err;
+
+	xe_irq_resume(xe);
+
+	xe_display_pm_resume(xe);
+
+	for_each_gt(gt, xe, id)
+		xe_gt_resume(gt);
+
+	err = xe_bo_restore_user(xe);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static bool xe_pm_pci_d3cold_capable(struct pci_dev *pdev)
+{
+	struct pci_dev *root_pdev;
+
+	root_pdev = pcie_find_root_port(pdev);
+	if (!root_pdev)
+		return false;
+
+	/* D3Cold requires PME capability and _PR3 power resource */
+	if (!pci_pme_capable(root_pdev, PCI_D3cold) || !pci_pr3_present(root_pdev))
+		return false;
+
+	return true;
+}
+
+static void xe_pm_runtime_init(struct xe_device *xe)
+{
+	struct device *dev = xe->drm.dev;
+
+	/*
+	 * Disable the system suspend direct complete optimization.
+	 * We need to ensure that the regular device suspend/resume functions
+	 * are called since our runtime_pm cannot guarantee local memory
+	 * eviction for d3cold.
+	 * TODO: Check HDA audio dependencies claimed by i915, and then enforce
+	 *       this option to integrated graphics as well.
+	 */
+	if (IS_DGFX(xe))
+		dev_pm_set_driver_flags(dev, DPM_FLAG_NO_DIRECT_COMPLETE);
+
+	pm_runtime_use_autosuspend(dev);
+	pm_runtime_set_autosuspend_delay(dev, 1000);
+	pm_runtime_set_active(dev);
+	pm_runtime_allow(dev);
+	pm_runtime_mark_last_busy(dev);
+	pm_runtime_put(dev);
+}
+
+void xe_pm_init(struct xe_device *xe)
+{
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+
+	/* For now suspend/resume is only allowed with GuC */
+	if (!xe_device_uc_enabled(xe))
+		return;
+
+	drmm_mutex_init(&xe->drm, &xe->d3cold.lock);
+
+	xe->d3cold.capable = xe_pm_pci_d3cold_capable(pdev);
+
+	if (xe->d3cold.capable) {
+		xe_device_sysfs_init(xe);
+		xe_pm_set_vram_threshold(xe, DEFAULT_VRAM_THRESHOLD);
+	}
+
+	xe_pm_runtime_init(xe);
+}
+
+void xe_pm_runtime_fini(struct xe_device *xe)
+{
+	struct device *dev = xe->drm.dev;
+
+	pm_runtime_get_sync(dev);
+	pm_runtime_forbid(dev);
+}
+
+static void xe_pm_write_callback_task(struct xe_device *xe,
+				      struct task_struct *task)
+{
+	WRITE_ONCE(xe->pm_callback_task, task);
+
+	/*
+	 * Just in case it's somehow possible for our writes to be reordered to
+	 * the extent that something else re-uses the task written in
+	 * pm_callback_task. For example after returning from the callback, but
+	 * before the reordered write that resets pm_callback_task back to NULL.
+	 */
+	smp_mb(); /* pairs with xe_pm_read_callback_task */
+}
+
+struct task_struct *xe_pm_read_callback_task(struct xe_device *xe)
+{
+	smp_mb(); /* pairs with xe_pm_write_callback_task */
+
+	return READ_ONCE(xe->pm_callback_task);
+}
+
+int xe_pm_runtime_suspend(struct xe_device *xe)
+{
+	struct xe_gt *gt;
+	u8 id;
+	int err = 0;
+
+	if (xe->d3cold.allowed && xe_device_mem_access_ongoing(xe))
+		return -EBUSY;
+
+	/* Disable access_ongoing asserts and prevent recursive pm calls */
+	xe_pm_write_callback_task(xe, current);
+
+	/*
+	 * The actual xe_device_mem_access_put() is always async underneath, so
+	 * exactly where that is called should makes no difference to us. However
+	 * we still need to be very careful with the locks that this callback
+	 * acquires and the locks that are acquired and held by any callers of
+	 * xe_device_mem_access_get(). We already have the matching annotation
+	 * on that side, but we also need it here. For example lockdep should be
+	 * able to tell us if the following scenario is in theory possible:
+	 *
+	 * CPU0                          | CPU1 (kworker)
+	 * lock(A)                       |
+	 *                               | xe_pm_runtime_suspend()
+	 *                               |      lock(A)
+	 * xe_device_mem_access_get()    |
+	 *
+	 * This will clearly deadlock since rpm core needs to wait for
+	 * xe_pm_runtime_suspend() to complete, but here we are holding lock(A)
+	 * on CPU0 which prevents CPU1 making forward progress.  With the
+	 * annotation here and in xe_device_mem_access_get() lockdep will see
+	 * the potential lock inversion and give us a nice splat.
+	 */
+	lock_map_acquire(&xe_device_mem_access_lockdep_map);
+
+	if (xe->d3cold.allowed) {
+		err = xe_bo_evict_all(xe);
+		if (err)
+			goto out;
+	}
+
+	for_each_gt(gt, xe, id) {
+		err = xe_gt_suspend(gt);
+		if (err)
+			goto out;
+	}
+
+	xe_irq_suspend(xe);
+out:
+	lock_map_release(&xe_device_mem_access_lockdep_map);
+	xe_pm_write_callback_task(xe, NULL);
+	return err;
+}
+
+int xe_pm_runtime_resume(struct xe_device *xe)
+{
+	struct xe_gt *gt;
+	u8 id;
+	int err = 0;
+
+	/* Disable access_ongoing asserts and prevent recursive pm calls */
+	xe_pm_write_callback_task(xe, current);
+
+	lock_map_acquire(&xe_device_mem_access_lockdep_map);
+
+	/*
+	 * It can be possible that xe has allowed d3cold but other pcie devices
+	 * in gfx card soc would have blocked d3cold, therefore card has not
+	 * really lost power. Detecting primary Gt power is sufficient.
+	 */
+	gt = xe_device_get_gt(xe, 0);
+	xe->d3cold.power_lost = xe_guc_in_reset(&gt->uc.guc);
+
+	if (xe->d3cold.allowed && xe->d3cold.power_lost) {
+		for_each_gt(gt, xe, id) {
+			err = xe_pcode_init(gt);
+			if (err)
+				goto out;
+		}
+
+		/*
+		 * This only restores pinned memory which is the memory
+		 * required for the GT(s) to resume.
+		 */
+		err = xe_bo_restore_kernel(xe);
+		if (err)
+			goto out;
+	}
+
+	xe_irq_resume(xe);
+
+	for_each_gt(gt, xe, id)
+		xe_gt_resume(gt);
+
+	if (xe->d3cold.allowed && xe->d3cold.power_lost) {
+		err = xe_bo_restore_user(xe);
+		if (err)
+			goto out;
+	}
+out:
+	lock_map_release(&xe_device_mem_access_lockdep_map);
+	xe_pm_write_callback_task(xe, NULL);
+	return err;
+}
+
+int xe_pm_runtime_get(struct xe_device *xe)
+{
+	return pm_runtime_get_sync(xe->drm.dev);
+}
+
+int xe_pm_runtime_put(struct xe_device *xe)
+{
+	pm_runtime_mark_last_busy(xe->drm.dev);
+	return pm_runtime_put(xe->drm.dev);
+}
+
+int xe_pm_runtime_get_if_active(struct xe_device *xe)
+{
+	return pm_runtime_get_if_active(xe->drm.dev, true);
+}
+
+void xe_pm_assert_unbounded_bridge(struct xe_device *xe)
+{
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	struct pci_dev *bridge = pci_upstream_bridge(pdev);
+
+	if (!bridge)
+		return;
+
+	if (!bridge->driver) {
+		drm_warn(&xe->drm, "unbounded parent pci bridge, device won't support any PM support.\n");
+		device_set_pm_not_required(&pdev->dev);
+	}
+}
+
+int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold)
+{
+	struct ttm_resource_manager *man;
+	u32 vram_total_mb = 0;
+	int i;
+
+	for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
+		man = ttm_manager_type(&xe->ttm, i);
+		if (man)
+			vram_total_mb += DIV_ROUND_UP_ULL(man->size, 1024 * 1024);
+	}
+
+	drm_dbg(&xe->drm, "Total vram %u mb\n", vram_total_mb);
+
+	if (threshold > vram_total_mb)
+		return -EINVAL;
+
+	mutex_lock(&xe->d3cold.lock);
+	xe->d3cold.vram_threshold = threshold;
+	mutex_unlock(&xe->d3cold.lock);
+
+	return 0;
+}
+
+void xe_pm_d3cold_allowed_toggle(struct xe_device *xe)
+{
+	struct ttm_resource_manager *man;
+	u32 total_vram_used_mb = 0;
+	u64 vram_used;
+	int i;
+
+	if (!xe->d3cold.capable) {
+		xe->d3cold.allowed = false;
+		return;
+	}
+
+	for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
+		man = ttm_manager_type(&xe->ttm, i);
+		if (man) {
+			vram_used = ttm_resource_manager_usage(man);
+			total_vram_used_mb += DIV_ROUND_UP_ULL(vram_used, 1024 * 1024);
+		}
+	}
+
+	mutex_lock(&xe->d3cold.lock);
+
+	if (total_vram_used_mb < xe->d3cold.vram_threshold)
+		xe->d3cold.allowed = true;
+	else
+		xe->d3cold.allowed = false;
+
+	mutex_unlock(&xe->d3cold.lock);
+
+	drm_dbg(&xe->drm,
+		"d3cold: allowed=%s\n", str_yes_no(xe->d3cold.allowed));
+}
diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h
new file mode 100644
index 000000000000..6b9031f7af24
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pm.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_PM_H_
+#define _XE_PM_H_
+
+#include <linux/pm_runtime.h>
+
+/*
+ * TODO: Threshold = 0 will block D3Cold.
+ *       Before we can move this to a higher value (like 300), we need to:
+ *           1. rewrite the VRAM save / restore to avoid buffer object locks
+ */
+#define DEFAULT_VRAM_THRESHOLD 0 /* in MB */
+
+struct xe_device;
+
+int xe_pm_suspend(struct xe_device *xe);
+int xe_pm_resume(struct xe_device *xe);
+
+void xe_pm_init(struct xe_device *xe);
+void xe_pm_runtime_fini(struct xe_device *xe);
+int xe_pm_runtime_suspend(struct xe_device *xe);
+int xe_pm_runtime_resume(struct xe_device *xe);
+int xe_pm_runtime_get(struct xe_device *xe);
+int xe_pm_runtime_put(struct xe_device *xe);
+int xe_pm_runtime_get_if_active(struct xe_device *xe);
+void xe_pm_assert_unbounded_bridge(struct xe_device *xe);
+int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold);
+void xe_pm_d3cold_allowed_toggle(struct xe_device *xe);
+struct task_struct *xe_pm_read_callback_task(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c
new file mode 100644
index 000000000000..7bce2a332603
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_preempt_fence.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_preempt_fence.h"
+
+#include <linux/slab.h>
+
+#include "xe_exec_queue.h"
+#include "xe_vm.h"
+
+static void preempt_fence_work_func(struct work_struct *w)
+{
+	bool cookie = dma_fence_begin_signalling();
+	struct xe_preempt_fence *pfence =
+		container_of(w, typeof(*pfence), preempt_work);
+	struct xe_exec_queue *q = pfence->q;
+
+	if (pfence->error)
+		dma_fence_set_error(&pfence->base, pfence->error);
+	else
+		q->ops->suspend_wait(q);
+
+	dma_fence_signal(&pfence->base);
+	dma_fence_end_signalling(cookie);
+
+	xe_vm_queue_rebind_worker(q->vm);
+
+	xe_exec_queue_put(q);
+}
+
+static const char *
+preempt_fence_get_driver_name(struct dma_fence *fence)
+{
+	return "xe";
+}
+
+static const char *
+preempt_fence_get_timeline_name(struct dma_fence *fence)
+{
+	return "preempt";
+}
+
+static bool preempt_fence_enable_signaling(struct dma_fence *fence)
+{
+	struct xe_preempt_fence *pfence =
+		container_of(fence, typeof(*pfence), base);
+	struct xe_exec_queue *q = pfence->q;
+
+	pfence->error = q->ops->suspend(q);
+	queue_work(system_unbound_wq, &pfence->preempt_work);
+	return true;
+}
+
+static const struct dma_fence_ops preempt_fence_ops = {
+	.get_driver_name = preempt_fence_get_driver_name,
+	.get_timeline_name = preempt_fence_get_timeline_name,
+	.enable_signaling = preempt_fence_enable_signaling,
+};
+
+/**
+ * xe_preempt_fence_alloc() - Allocate a preempt fence with minimal
+ * initialization
+ *
+ * Allocate a preempt fence, and initialize its list head.
+ * If the preempt_fence allocated has been armed with
+ * xe_preempt_fence_arm(), it must be freed using dma_fence_put(). If not,
+ * it must be freed using xe_preempt_fence_free().
+ *
+ * Return: A struct xe_preempt_fence pointer used for calling into
+ * xe_preempt_fence_arm() or xe_preempt_fence_free().
+ * An error pointer on error.
+ */
+struct xe_preempt_fence *xe_preempt_fence_alloc(void)
+{
+	struct xe_preempt_fence *pfence;
+
+	pfence = kmalloc(sizeof(*pfence), GFP_KERNEL);
+	if (!pfence)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&pfence->link);
+	INIT_WORK(&pfence->preempt_work, preempt_fence_work_func);
+
+	return pfence;
+}
+
+/**
+ * xe_preempt_fence_free() - Free a preempt fence allocated using
+ * xe_preempt_fence_alloc().
+ * @pfence: pointer obtained from xe_preempt_fence_alloc();
+ *
+ * Free a preempt fence that has not yet been armed.
+ */
+void xe_preempt_fence_free(struct xe_preempt_fence *pfence)
+{
+	list_del(&pfence->link);
+	kfree(pfence);
+}
+
+/**
+ * xe_preempt_fence_arm() - Arm a preempt fence allocated using
+ * xe_preempt_fence_alloc().
+ * @pfence: The struct xe_preempt_fence pointer returned from
+ *          xe_preempt_fence_alloc().
+ * @q: The struct xe_exec_queue used for arming.
+ * @context: The dma-fence context used for arming.
+ * @seqno: The dma-fence seqno used for arming.
+ *
+ * Inserts the preempt fence into @context's timeline, takes @link off any
+ * list, and registers the struct xe_exec_queue as the xe_engine to be preempted.
+ *
+ * Return: A pointer to a struct dma_fence embedded into the preempt fence.
+ * This function doesn't error.
+ */
+struct dma_fence *
+xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_exec_queue *q,
+		     u64 context, u32 seqno)
+{
+	list_del_init(&pfence->link);
+	pfence->q = xe_exec_queue_get(q);
+	dma_fence_init(&pfence->base, &preempt_fence_ops,
+		      &q->compute.lock, context, seqno);
+
+	return &pfence->base;
+}
+
+/**
+ * xe_preempt_fence_create() - Helper to create and arm a preempt fence.
+ * @q: The struct xe_exec_queue used for arming.
+ * @context: The dma-fence context used for arming.
+ * @seqno: The dma-fence seqno used for arming.
+ *
+ * Allocates and inserts the preempt fence into @context's timeline,
+ * and registers @e as the struct xe_exec_queue to be preempted.
+ *
+ * Return: A pointer to the resulting struct dma_fence on success. An error
+ * pointer on error. In particular if allocation fails it returns
+ * ERR_PTR(-ENOMEM);
+ */
+struct dma_fence *
+xe_preempt_fence_create(struct xe_exec_queue *q,
+			u64 context, u32 seqno)
+{
+	struct xe_preempt_fence *pfence;
+
+	pfence = xe_preempt_fence_alloc();
+	if (IS_ERR(pfence))
+		return ERR_CAST(pfence);
+
+	return xe_preempt_fence_arm(pfence, q, context, seqno);
+}
+
+bool xe_fence_is_xe_preempt(const struct dma_fence *fence)
+{
+	return fence->ops == &preempt_fence_ops;
+}
diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.h b/drivers/gpu/drm/xe/xe_preempt_fence.h
new file mode 100644
index 000000000000..9406c6fea525
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_preempt_fence.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_PREEMPT_FENCE_H_
+#define _XE_PREEMPT_FENCE_H_
+
+#include "xe_preempt_fence_types.h"
+
+struct list_head;
+
+struct dma_fence *
+xe_preempt_fence_create(struct xe_exec_queue *q,
+			u64 context, u32 seqno);
+
+struct xe_preempt_fence *xe_preempt_fence_alloc(void);
+
+void xe_preempt_fence_free(struct xe_preempt_fence *pfence);
+
+struct dma_fence *
+xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_exec_queue *q,
+		     u64 context, u32 seqno);
+
+static inline struct xe_preempt_fence *
+to_preempt_fence(struct dma_fence *fence)
+{
+	return container_of(fence, struct xe_preempt_fence, base);
+}
+
+/**
+ * xe_preempt_fence_link() - Return a link used to keep unarmed preempt
+ * fences on a list.
+ * @pfence: Pointer to the preempt fence.
+ *
+ * The link is embedded in the struct xe_preempt_fence. Use
+ * link_to_preempt_fence() to convert back to the preempt fence.
+ *
+ * Return: A pointer to an embedded struct list_head.
+ */
+static inline struct list_head *
+xe_preempt_fence_link(struct xe_preempt_fence *pfence)
+{
+	return &pfence->link;
+}
+
+/**
+ * to_preempt_fence_from_link() - Convert back to a preempt fence pointer
+ * from a link obtained with xe_preempt_fence_link().
+ * @link: The struct list_head obtained from xe_preempt_fence_link().
+ *
+ * Return: A pointer to the embedding struct xe_preempt_fence.
+ */
+static inline struct xe_preempt_fence *
+to_preempt_fence_from_link(struct list_head *link)
+{
+	return container_of(link, struct xe_preempt_fence, link);
+}
+
+bool xe_fence_is_xe_preempt(const struct dma_fence *fence);
+#endif
diff --git a/drivers/gpu/drm/xe/xe_preempt_fence_types.h b/drivers/gpu/drm/xe/xe_preempt_fence_types.h
new file mode 100644
index 000000000000..b54b5c29b533
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_preempt_fence_types.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_PREEMPT_FENCE_TYPES_H_
+#define _XE_PREEMPT_FENCE_TYPES_H_
+
+#include <linux/dma-fence.h>
+#include <linux/workqueue.h>
+
+struct xe_exec_queue;
+
+/**
+ * struct xe_preempt_fence - XE preempt fence
+ *
+ * hardware and triggers a callback once the xe_engine is complete.
+ */
+struct xe_preempt_fence {
+	/** @base: dma fence base */
+	struct dma_fence base;
+	/** @link: link into list of pending preempt fences */
+	struct list_head link;
+	/** @q: exec queue for this preempt fence */
+	struct xe_exec_queue *q;
+	/** @preempt_work: work struct which issues preemption */
+	struct work_struct preempt_work;
+	/** @error: preempt fence is in error state */
+	int error;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
new file mode 100644
index 000000000000..6653c045f3c9
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -0,0 +1,1669 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_pt.h"
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_drm_client.h"
+#include "xe_gt.h"
+#include "xe_gt_tlb_invalidation.h"
+#include "xe_migrate.h"
+#include "xe_pt_types.h"
+#include "xe_pt_walk.h"
+#include "xe_res_cursor.h"
+#include "xe_trace.h"
+#include "xe_ttm_stolen_mgr.h"
+#include "xe_vm.h"
+
+struct xe_pt_dir {
+	struct xe_pt pt;
+	/** @children: Array of page-table child nodes */
+	struct xe_ptw *children[XE_PDES];
+};
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
+#define xe_pt_set_addr(__xe_pt, __addr) ((__xe_pt)->addr = (__addr))
+#define xe_pt_addr(__xe_pt) ((__xe_pt)->addr)
+#else
+#define xe_pt_set_addr(__xe_pt, __addr)
+#define xe_pt_addr(__xe_pt) 0ull
+#endif
+
+static const u64 xe_normal_pt_shifts[] = {12, 21, 30, 39, 48};
+static const u64 xe_compact_pt_shifts[] = {16, 21, 30, 39, 48};
+
+#define XE_PT_HIGHEST_LEVEL (ARRAY_SIZE(xe_normal_pt_shifts) - 1)
+
+static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt)
+{
+	return container_of(pt, struct xe_pt_dir, pt);
+}
+
+static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index)
+{
+	return container_of(pt_dir->children[index], struct xe_pt, base);
+}
+
+static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm,
+			     unsigned int level)
+{
+	struct xe_device *xe = tile_to_xe(tile);
+	u16 pat_index = xe->pat.idx[XE_CACHE_WB];
+	u8 id = tile->id;
+
+	if (!xe_vm_has_scratch(vm))
+		return 0;
+
+	if (level > MAX_HUGEPTE_LEVEL)
+		return vm->pt_ops->pde_encode_bo(vm->scratch_pt[id][level - 1]->bo,
+						 0, pat_index);
+
+	return vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, IS_DGFX(xe), 0) |
+		XE_PTE_NULL;
+}
+
+static void xe_pt_free(struct xe_pt *pt)
+{
+	if (pt->level)
+		kfree(as_xe_pt_dir(pt));
+	else
+		kfree(pt);
+}
+
+/**
+ * xe_pt_create() - Create a page-table.
+ * @vm: The vm to create for.
+ * @tile: The tile to create for.
+ * @level: The page-table level.
+ *
+ * Allocate and initialize a single struct xe_pt metadata structure. Also
+ * create the corresponding page-table bo, but don't initialize it. If the
+ * level is grater than zero, then it's assumed to be a directory page-
+ * table and the directory structure is also allocated and initialized to
+ * NULL pointers.
+ *
+ * Return: A valid struct xe_pt pointer on success, Pointer error code on
+ * error.
+ */
+struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
+			   unsigned int level)
+{
+	struct xe_pt *pt;
+	struct xe_bo *bo;
+	int err;
+
+	if (level) {
+		struct xe_pt_dir *dir = kzalloc(sizeof(*dir), GFP_KERNEL);
+
+		pt = (dir) ? &dir->pt : NULL;
+	} else {
+		pt = kzalloc(sizeof(*pt), GFP_KERNEL);
+	}
+	if (!pt)
+		return ERR_PTR(-ENOMEM);
+
+	pt->level = level;
+	bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K,
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+				  XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT |
+				  XE_BO_CREATE_PINNED_BIT |
+				  XE_BO_CREATE_NO_RESV_EVICT |
+				  XE_BO_PAGETABLE);
+	if (IS_ERR(bo)) {
+		err = PTR_ERR(bo);
+		goto err_kfree;
+	}
+	pt->bo = bo;
+	pt->base.children = level ? as_xe_pt_dir(pt)->children : NULL;
+
+	if (vm->xef)
+		xe_drm_client_add_bo(vm->xef->client, pt->bo);
+	xe_tile_assert(tile, level <= XE_VM_MAX_LEVEL);
+
+	return pt;
+
+err_kfree:
+	xe_pt_free(pt);
+	return ERR_PTR(err);
+}
+
+/**
+ * xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero
+ * entries.
+ * @tile: The tile the scratch pagetable of which to use.
+ * @vm: The vm we populate for.
+ * @pt: The pagetable the bo of which to initialize.
+ *
+ * Populate the page-table bo of @pt with entries pointing into the tile's
+ * scratch page-table tree if any. Otherwise populate with zeros.
+ */
+void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm,
+			  struct xe_pt *pt)
+{
+	struct iosys_map *map = &pt->bo->vmap;
+	u64 empty;
+	int i;
+
+	if (!xe_vm_has_scratch(vm)) {
+		/*
+		 * FIXME: Some memory is allocated already allocated to zero?
+		 * Find out which memory that is and avoid this memset...
+		 */
+		xe_map_memset(vm->xe, map, 0, 0, SZ_4K);
+	} else {
+		empty = __xe_pt_empty_pte(tile, vm, pt->level);
+		for (i = 0; i < XE_PDES; i++)
+			xe_pt_write(vm->xe, map, i, empty);
+	}
+}
+
+/**
+ * xe_pt_shift() - Return the ilog2 value of the size of the address range of
+ * a page-table at a certain level.
+ * @level: The level.
+ *
+ * Return: The ilog2 value of the size of the address range of a page-table
+ * at level @level.
+ */
+unsigned int xe_pt_shift(unsigned int level)
+{
+	return XE_PTE_SHIFT + XE_PDE_SHIFT * level;
+}
+
+/**
+ * xe_pt_destroy() - Destroy a page-table tree.
+ * @pt: The root of the page-table tree to destroy.
+ * @flags: vm flags. Currently unused.
+ * @deferred: List head of lockless list for deferred putting. NULL for
+ *            immediate putting.
+ *
+ * Puts the page-table bo, recursively calls xe_pt_destroy on all children
+ * and finally frees @pt. TODO: Can we remove the @flags argument?
+ */
+void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred)
+{
+	int i;
+
+	if (!pt)
+		return;
+
+	XE_WARN_ON(!list_empty(&pt->bo->ttm.base.gpuva.list));
+	xe_bo_unpin(pt->bo);
+	xe_bo_put_deferred(pt->bo, deferred);
+
+	if (pt->level > 0 && pt->num_live) {
+		struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt);
+
+		for (i = 0; i < XE_PDES; i++) {
+			if (xe_pt_entry(pt_dir, i))
+				xe_pt_destroy(xe_pt_entry(pt_dir, i), flags,
+					      deferred);
+		}
+	}
+	xe_pt_free(pt);
+}
+
+/**
+ * DOC: Pagetable building
+ *
+ * Below we use the term "page-table" for both page-directories, containing
+ * pointers to lower level page-directories or page-tables, and level 0
+ * page-tables that contain only page-table-entries pointing to memory pages.
+ *
+ * When inserting an address range in an already existing page-table tree
+ * there will typically be a set of page-tables that are shared with other
+ * address ranges, and a set that are private to this address range.
+ * The set of shared page-tables can be at most two per level,
+ * and those can't be updated immediately because the entries of those
+ * page-tables may still be in use by the gpu for other mappings. Therefore
+ * when inserting entries into those, we instead stage those insertions by
+ * adding insertion data into struct xe_vm_pgtable_update structures. This
+ * data, (subtrees for the cpu and page-table-entries for the gpu) is then
+ * added in a separate commit step. CPU-data is committed while still under the
+ * vm lock, the object lock and for userptr, the notifier lock in read mode.
+ * The GPU async data is committed either by the GPU or CPU after fulfilling
+ * relevant dependencies.
+ * For non-shared page-tables (and, in fact, for shared ones that aren't
+ * existing at the time of staging), we add the data in-place without the
+ * special update structures. This private part of the page-table tree will
+ * remain disconnected from the vm page-table tree until data is committed to
+ * the shared page tables of the vm tree in the commit phase.
+ */
+
+struct xe_pt_update {
+	/** @update: The update structure we're building for this parent. */
+	struct xe_vm_pgtable_update *update;
+	/** @parent: The parent. Used to detect a parent change. */
+	struct xe_pt *parent;
+	/** @preexisting: Whether the parent was pre-existing or allocated */
+	bool preexisting;
+};
+
+struct xe_pt_stage_bind_walk {
+	/** base: The base class. */
+	struct xe_pt_walk base;
+
+	/* Input parameters for the walk */
+	/** @vm: The vm we're building for. */
+	struct xe_vm *vm;
+	/** @tile: The tile we're building for. */
+	struct xe_tile *tile;
+	/** @default_pte: PTE flag only template. No address is associated */
+	u64 default_pte;
+	/** @dma_offset: DMA offset to add to the PTE. */
+	u64 dma_offset;
+	/**
+	 * @needs_64k: This address range enforces 64K alignment and
+	 * granularity.
+	 */
+	bool needs_64K;
+	/**
+	 * @vma: VMA being mapped
+	 */
+	struct xe_vma *vma;
+
+	/* Also input, but is updated during the walk*/
+	/** @curs: The DMA address cursor. */
+	struct xe_res_cursor *curs;
+	/** @va_curs_start: The Virtual address coresponding to @curs->start */
+	u64 va_curs_start;
+
+	/* Output */
+	struct xe_walk_update {
+		/** @wupd.entries: Caller provided storage. */
+		struct xe_vm_pgtable_update *entries;
+		/** @wupd.num_used_entries: Number of update @entries used. */
+		unsigned int num_used_entries;
+		/** @wupd.updates: Tracks the update entry at a given level */
+		struct xe_pt_update updates[XE_VM_MAX_LEVEL + 1];
+	} wupd;
+
+	/* Walk state */
+	/**
+	 * @l0_end_addr: The end address of the current l0 leaf. Used for
+	 * 64K granularity detection.
+	 */
+	u64 l0_end_addr;
+	/** @addr_64K: The start address of the current 64K chunk. */
+	u64 addr_64K;
+	/** @found_64: Whether @add_64K actually points to a 64K chunk. */
+	bool found_64K;
+};
+
+static int
+xe_pt_new_shared(struct xe_walk_update *wupd, struct xe_pt *parent,
+		 pgoff_t offset, bool alloc_entries)
+{
+	struct xe_pt_update *upd = &wupd->updates[parent->level];
+	struct xe_vm_pgtable_update *entry;
+
+	/*
+	 * For *each level*, we could only have one active
+	 * struct xt_pt_update at any one time. Once we move on to a
+	 * new parent and page-directory, the old one is complete, and
+	 * updates are either already stored in the build tree or in
+	 * @wupd->entries
+	 */
+	if (likely(upd->parent == parent))
+		return 0;
+
+	upd->parent = parent;
+	upd->preexisting = true;
+
+	if (wupd->num_used_entries == XE_VM_MAX_LEVEL * 2 + 1)
+		return -EINVAL;
+
+	entry = wupd->entries + wupd->num_used_entries++;
+	upd->update = entry;
+	entry->ofs = offset;
+	entry->pt_bo = parent->bo;
+	entry->pt = parent;
+	entry->flags = 0;
+	entry->qwords = 0;
+
+	if (alloc_entries) {
+		entry->pt_entries = kmalloc_array(XE_PDES,
+						  sizeof(*entry->pt_entries),
+						  GFP_KERNEL);
+		if (!entry->pt_entries)
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/*
+ * NOTE: This is a very frequently called function so we allow ourselves
+ * to annotate (using branch prediction hints) the fastpath of updating a
+ * non-pre-existing pagetable with leaf ptes.
+ */
+static int
+xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent,
+		   pgoff_t offset, struct xe_pt *xe_child, u64 pte)
+{
+	struct xe_pt_update *upd = &xe_walk->wupd.updates[parent->level];
+	struct xe_pt_update *child_upd = xe_child ?
+		&xe_walk->wupd.updates[xe_child->level] : NULL;
+	int ret;
+
+	ret = xe_pt_new_shared(&xe_walk->wupd, parent, offset, true);
+	if (unlikely(ret))
+		return ret;
+
+	/*
+	 * Register this new pagetable so that it won't be recognized as
+	 * a shared pagetable by a subsequent insertion.
+	 */
+	if (unlikely(child_upd)) {
+		child_upd->update = NULL;
+		child_upd->parent = xe_child;
+		child_upd->preexisting = false;
+	}
+
+	if (likely(!upd->preexisting)) {
+		/* Continue building a non-connected subtree. */
+		struct iosys_map *map = &parent->bo->vmap;
+
+		if (unlikely(xe_child))
+			parent->base.children[offset] = &xe_child->base;
+
+		xe_pt_write(xe_walk->vm->xe, map, offset, pte);
+		parent->num_live++;
+	} else {
+		/* Shared pt. Stage update. */
+		unsigned int idx;
+		struct xe_vm_pgtable_update *entry = upd->update;
+
+		idx = offset - entry->ofs;
+		entry->pt_entries[idx].pt = xe_child;
+		entry->pt_entries[idx].pte = pte;
+		entry->qwords++;
+	}
+
+	return 0;
+}
+
+static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level,
+				   struct xe_pt_stage_bind_walk *xe_walk)
+{
+	u64 size, dma;
+
+	if (level > MAX_HUGEPTE_LEVEL)
+		return false;
+
+	/* Does the virtual range requested cover a huge pte? */
+	if (!xe_pt_covers(addr, next, level, &xe_walk->base))
+		return false;
+
+	/* Does the DMA segment cover the whole pte? */
+	if (next - xe_walk->va_curs_start > xe_walk->curs->size)
+		return false;
+
+	/* null VMA's do not have dma addresses */
+	if (xe_vma_is_null(xe_walk->vma))
+		return true;
+
+	/* Is the DMA address huge PTE size aligned? */
+	size = next - addr;
+	dma = addr - xe_walk->va_curs_start + xe_res_dma(xe_walk->curs);
+
+	return IS_ALIGNED(dma, size);
+}
+
+/*
+ * Scan the requested mapping to check whether it can be done entirely
+ * with 64K PTEs.
+ */
+static bool
+xe_pt_scan_64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk)
+{
+	struct xe_res_cursor curs = *xe_walk->curs;
+
+	if (!IS_ALIGNED(addr, SZ_64K))
+		return false;
+
+	if (next > xe_walk->l0_end_addr)
+		return false;
+
+	/* null VMA's do not have dma addresses */
+	if (xe_vma_is_null(xe_walk->vma))
+		return true;
+
+	xe_res_next(&curs, addr - xe_walk->va_curs_start);
+	for (; addr < next; addr += SZ_64K) {
+		if (!IS_ALIGNED(xe_res_dma(&curs), SZ_64K) || curs.size < SZ_64K)
+			return false;
+
+		xe_res_next(&curs, SZ_64K);
+	}
+
+	return addr == next;
+}
+
+/*
+ * For non-compact "normal" 4K level-0 pagetables, we want to try to group
+ * addresses together in 64K-contigous regions to add a 64K TLB hint for the
+ * device to the PTE.
+ * This function determines whether the address is part of such a
+ * segment. For VRAM in normal pagetables, this is strictly necessary on
+ * some devices.
+ */
+static bool
+xe_pt_is_pte_ps64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk)
+{
+	/* Address is within an already found 64k region */
+	if (xe_walk->found_64K && addr - xe_walk->addr_64K < SZ_64K)
+		return true;
+
+	xe_walk->found_64K = xe_pt_scan_64K(addr, addr + SZ_64K, xe_walk);
+	xe_walk->addr_64K = addr;
+
+	return xe_walk->found_64K;
+}
+
+static int
+xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
+		       unsigned int level, u64 addr, u64 next,
+		       struct xe_ptw **child,
+		       enum page_walk_action *action,
+		       struct xe_pt_walk *walk)
+{
+	struct xe_pt_stage_bind_walk *xe_walk =
+		container_of(walk, typeof(*xe_walk), base);
+	u16 pat_index = xe_walk->vma->pat_index;
+	struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base);
+	struct xe_vm *vm = xe_walk->vm;
+	struct xe_pt *xe_child;
+	bool covers;
+	int ret = 0;
+	u64 pte;
+
+	/* Is this a leaf entry ?*/
+	if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) {
+		struct xe_res_cursor *curs = xe_walk->curs;
+		bool is_null = xe_vma_is_null(xe_walk->vma);
+
+		XE_WARN_ON(xe_walk->va_curs_start != addr);
+
+		pte = vm->pt_ops->pte_encode_vma(is_null ? 0 :
+						 xe_res_dma(curs) + xe_walk->dma_offset,
+						 xe_walk->vma, pat_index, level);
+		pte |= xe_walk->default_pte;
+
+		/*
+		 * Set the XE_PTE_PS64 hint if possible, otherwise if
+		 * this device *requires* 64K PTE size for VRAM, fail.
+		 */
+		if (level == 0 && !xe_parent->is_compact) {
+			if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) {
+				xe_walk->vma->gpuva.flags |= XE_VMA_PTE_64K;
+				pte |= XE_PTE_PS64;
+			} else if (XE_WARN_ON(xe_walk->needs_64K)) {
+				return -EINVAL;
+			}
+		}
+
+		ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, NULL, pte);
+		if (unlikely(ret))
+			return ret;
+
+		if (!is_null)
+			xe_res_next(curs, next - addr);
+		xe_walk->va_curs_start = next;
+		xe_walk->vma->gpuva.flags |= (XE_VMA_PTE_4K << level);
+		*action = ACTION_CONTINUE;
+
+		return ret;
+	}
+
+	/*
+	 * Descending to lower level. Determine if we need to allocate a
+	 * new page table or -directory, which we do if there is no
+	 * previous one or there is one we can completely replace.
+	 */
+	if (level == 1) {
+		walk->shifts = xe_normal_pt_shifts;
+		xe_walk->l0_end_addr = next;
+	}
+
+	covers = xe_pt_covers(addr, next, level, &xe_walk->base);
+	if (covers || !*child) {
+		u64 flags = 0;
+
+		xe_child = xe_pt_create(xe_walk->vm, xe_walk->tile, level - 1);
+		if (IS_ERR(xe_child))
+			return PTR_ERR(xe_child);
+
+		xe_pt_set_addr(xe_child,
+			       round_down(addr, 1ull << walk->shifts[level]));
+
+		if (!covers)
+			xe_pt_populate_empty(xe_walk->tile, xe_walk->vm, xe_child);
+
+		*child = &xe_child->base;
+
+		/*
+		 * Prefer the compact pagetable layout for L0 if possible. Only
+		 * possible if VMA covers entire 2MB region as compact 64k and
+		 * 4k pages cannot be mixed within a 2MB region.
+		 * TODO: Suballocate the pt bo to avoid wasting a lot of
+		 * memory.
+		 */
+		if (GRAPHICS_VERx100(tile_to_xe(xe_walk->tile)) >= 1250 && level == 1 &&
+		    covers && xe_pt_scan_64K(addr, next, xe_walk)) {
+			walk->shifts = xe_compact_pt_shifts;
+			xe_walk->vma->gpuva.flags |= XE_VMA_PTE_COMPACT;
+			flags |= XE_PDE_64K;
+			xe_child->is_compact = true;
+		}
+
+		pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0, pat_index) | flags;
+		ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child,
+					 pte);
+	}
+
+	*action = ACTION_SUBTREE;
+	return ret;
+}
+
+static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = {
+	.pt_entry = xe_pt_stage_bind_entry,
+};
+
+/**
+ * xe_pt_stage_bind() - Build a disconnected page-table tree for a given address
+ * range.
+ * @tile: The tile we're building for.
+ * @vma: The vma indicating the address range.
+ * @entries: Storage for the update entries used for connecting the tree to
+ * the main tree at commit time.
+ * @num_entries: On output contains the number of @entries used.
+ *
+ * This function builds a disconnected page-table tree for a given address
+ * range. The tree is connected to the main vm tree for the gpu using
+ * xe_migrate_update_pgtables() and for the cpu using xe_pt_commit_bind().
+ * The function builds xe_vm_pgtable_update structures for already existing
+ * shared page-tables, and non-existing shared and non-shared page-tables
+ * are built and populated directly.
+ *
+ * Return 0 on success, negative error code on error.
+ */
+static int
+xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
+		 struct xe_vm_pgtable_update *entries, u32 *num_entries)
+{
+	struct xe_device *xe = tile_to_xe(tile);
+	struct xe_bo *bo = xe_vma_bo(vma);
+	bool is_devmem = !xe_vma_is_userptr(vma) && bo &&
+		(xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo));
+	struct xe_res_cursor curs;
+	struct xe_pt_stage_bind_walk xe_walk = {
+		.base = {
+			.ops = &xe_pt_stage_bind_ops,
+			.shifts = xe_normal_pt_shifts,
+			.max_level = XE_PT_HIGHEST_LEVEL,
+		},
+		.vm = xe_vma_vm(vma),
+		.tile = tile,
+		.curs = &curs,
+		.va_curs_start = xe_vma_start(vma),
+		.vma = vma,
+		.wupd.entries = entries,
+		.needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_devmem,
+	};
+	struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id];
+	int ret;
+
+	if (vma && (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) &&
+	    (is_devmem || !IS_DGFX(xe)))
+		xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
+
+	if (is_devmem) {
+		xe_walk.default_pte |= XE_PPGTT_PTE_DM;
+		xe_walk.dma_offset = vram_region_gpu_offset(bo->ttm.resource);
+	}
+
+	if (!xe_vma_has_no_bo(vma) && xe_bo_is_stolen(bo))
+		xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo));
+
+	xe_bo_assert_held(bo);
+
+	if (!xe_vma_is_null(vma)) {
+		if (xe_vma_is_userptr(vma))
+			xe_res_first_sg(to_userptr_vma(vma)->userptr.sg, 0,
+					xe_vma_size(vma), &curs);
+		else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo))
+			xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma),
+				     xe_vma_size(vma), &curs);
+		else
+			xe_res_first_sg(xe_bo_sg(bo), xe_vma_bo_offset(vma),
+					xe_vma_size(vma), &curs);
+	} else {
+		curs.size = xe_vma_size(vma);
+	}
+
+	ret = xe_pt_walk_range(&pt->base, pt->level, xe_vma_start(vma),
+			       xe_vma_end(vma), &xe_walk.base);
+
+	*num_entries = xe_walk.wupd.num_used_entries;
+	return ret;
+}
+
+/**
+ * xe_pt_nonshared_offsets() - Determine the non-shared entry offsets of a
+ * shared pagetable.
+ * @addr: The start address within the non-shared pagetable.
+ * @end: The end address within the non-shared pagetable.
+ * @level: The level of the non-shared pagetable.
+ * @walk: Walk info. The function adjusts the walk action.
+ * @action: next action to perform (see enum page_walk_action)
+ * @offset: Ignored on input, First non-shared entry on output.
+ * @end_offset: Ignored on input, Last non-shared entry + 1 on output.
+ *
+ * A non-shared page-table has some entries that belong to the address range
+ * and others that don't. This function determines the entries that belong
+ * fully to the address range. Depending on level, some entries may
+ * partially belong to the address range (that can't happen at level 0).
+ * The function detects that and adjust those offsets to not include those
+ * partial entries. Iff it does detect partial entries, we know that there must
+ * be shared page tables also at lower levels, so it adjusts the walk action
+ * accordingly.
+ *
+ * Return: true if there were non-shared entries, false otherwise.
+ */
+static bool xe_pt_nonshared_offsets(u64 addr, u64 end, unsigned int level,
+				    struct xe_pt_walk *walk,
+				    enum page_walk_action *action,
+				    pgoff_t *offset, pgoff_t *end_offset)
+{
+	u64 size = 1ull << walk->shifts[level];
+
+	*offset = xe_pt_offset(addr, level, walk);
+	*end_offset = xe_pt_num_entries(addr, end, level, walk) + *offset;
+
+	if (!level)
+		return true;
+
+	/*
+	 * If addr or next are not size aligned, there are shared pts at lower
+	 * level, so in that case traverse down the subtree
+	 */
+	*action = ACTION_CONTINUE;
+	if (!IS_ALIGNED(addr, size)) {
+		*action = ACTION_SUBTREE;
+		(*offset)++;
+	}
+
+	if (!IS_ALIGNED(end, size)) {
+		*action = ACTION_SUBTREE;
+		(*end_offset)--;
+	}
+
+	return *end_offset > *offset;
+}
+
+struct xe_pt_zap_ptes_walk {
+	/** @base: The walk base-class */
+	struct xe_pt_walk base;
+
+	/* Input parameters for the walk */
+	/** @tile: The tile we're building for */
+	struct xe_tile *tile;
+
+	/* Output */
+	/** @needs_invalidate: Whether we need to invalidate TLB*/
+	bool needs_invalidate;
+};
+
+static int xe_pt_zap_ptes_entry(struct xe_ptw *parent, pgoff_t offset,
+				unsigned int level, u64 addr, u64 next,
+				struct xe_ptw **child,
+				enum page_walk_action *action,
+				struct xe_pt_walk *walk)
+{
+	struct xe_pt_zap_ptes_walk *xe_walk =
+		container_of(walk, typeof(*xe_walk), base);
+	struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base);
+	pgoff_t end_offset;
+
+	XE_WARN_ON(!*child);
+	XE_WARN_ON(!level && xe_child->is_compact);
+
+	/*
+	 * Note that we're called from an entry callback, and we're dealing
+	 * with the child of that entry rather than the parent, so need to
+	 * adjust level down.
+	 */
+	if (xe_pt_nonshared_offsets(addr, next, --level, walk, action, &offset,
+				    &end_offset)) {
+		xe_map_memset(tile_to_xe(xe_walk->tile), &xe_child->bo->vmap,
+			      offset * sizeof(u64), 0,
+			      (end_offset - offset) * sizeof(u64));
+		xe_walk->needs_invalidate = true;
+	}
+
+	return 0;
+}
+
+static const struct xe_pt_walk_ops xe_pt_zap_ptes_ops = {
+	.pt_entry = xe_pt_zap_ptes_entry,
+};
+
+/**
+ * xe_pt_zap_ptes() - Zap (zero) gpu ptes of an address range
+ * @tile: The tile we're zapping for.
+ * @vma: GPU VMA detailing address range.
+ *
+ * Eviction and Userptr invalidation needs to be able to zap the
+ * gpu ptes of a given address range in pagefaulting mode.
+ * In order to be able to do that, that function needs access to the shared
+ * page-table entrieaso it can either clear the leaf PTEs or
+ * clear the pointers to lower-level page-tables. The caller is required
+ * to hold the necessary locks to ensure neither the page-table connectivity
+ * nor the page-table entries of the range is updated from under us.
+ *
+ * Return: Whether ptes were actually updated and a TLB invalidation is
+ * required.
+ */
+bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma)
+{
+	struct xe_pt_zap_ptes_walk xe_walk = {
+		.base = {
+			.ops = &xe_pt_zap_ptes_ops,
+			.shifts = xe_normal_pt_shifts,
+			.max_level = XE_PT_HIGHEST_LEVEL,
+		},
+		.tile = tile,
+	};
+	struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id];
+
+	if (!(vma->tile_present & BIT(tile->id)))
+		return false;
+
+	(void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma),
+				xe_vma_end(vma), &xe_walk.base);
+
+	return xe_walk.needs_invalidate;
+}
+
+static void
+xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_tile *tile,
+		       struct iosys_map *map, void *data,
+		       u32 qword_ofs, u32 num_qwords,
+		       const struct xe_vm_pgtable_update *update)
+{
+	struct xe_pt_entry *ptes = update->pt_entries;
+	u64 *ptr = data;
+	u32 i;
+
+	for (i = 0; i < num_qwords; i++) {
+		if (map)
+			xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) *
+				  sizeof(u64), u64, ptes[i].pte);
+		else
+			ptr[i] = ptes[i].pte;
+	}
+}
+
+static void xe_pt_abort_bind(struct xe_vma *vma,
+			     struct xe_vm_pgtable_update *entries,
+			     u32 num_entries)
+{
+	u32 i, j;
+
+	for (i = 0; i < num_entries; i++) {
+		if (!entries[i].pt_entries)
+			continue;
+
+		for (j = 0; j < entries[i].qwords; j++)
+			xe_pt_destroy(entries[i].pt_entries[j].pt, xe_vma_vm(vma)->flags, NULL);
+		kfree(entries[i].pt_entries);
+	}
+}
+
+static void xe_pt_commit_locks_assert(struct xe_vma *vma)
+{
+	struct xe_vm *vm = xe_vma_vm(vma);
+
+	lockdep_assert_held(&vm->lock);
+
+	if (xe_vma_is_userptr(vma))
+		lockdep_assert_held_read(&vm->userptr.notifier_lock);
+	else if (!xe_vma_is_null(vma))
+		dma_resv_assert_held(xe_vma_bo(vma)->ttm.base.resv);
+
+	xe_vm_assert_held(vm);
+}
+
+static void xe_pt_commit_bind(struct xe_vma *vma,
+			      struct xe_vm_pgtable_update *entries,
+			      u32 num_entries, bool rebind,
+			      struct llist_head *deferred)
+{
+	u32 i, j;
+
+	xe_pt_commit_locks_assert(vma);
+
+	for (i = 0; i < num_entries; i++) {
+		struct xe_pt *pt = entries[i].pt;
+		struct xe_pt_dir *pt_dir;
+
+		if (!rebind)
+			pt->num_live += entries[i].qwords;
+
+		if (!pt->level) {
+			kfree(entries[i].pt_entries);
+			continue;
+		}
+
+		pt_dir = as_xe_pt_dir(pt);
+		for (j = 0; j < entries[i].qwords; j++) {
+			u32 j_ = j + entries[i].ofs;
+			struct xe_pt *newpte = entries[i].pt_entries[j].pt;
+
+			if (xe_pt_entry(pt_dir, j_))
+				xe_pt_destroy(xe_pt_entry(pt_dir, j_),
+					      xe_vma_vm(vma)->flags, deferred);
+
+			pt_dir->children[j_] = &newpte->base;
+		}
+		kfree(entries[i].pt_entries);
+	}
+}
+
+static int
+xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma,
+		   struct xe_vm_pgtable_update *entries, u32 *num_entries,
+		   bool rebind)
+{
+	int err;
+
+	*num_entries = 0;
+	err = xe_pt_stage_bind(tile, vma, entries, num_entries);
+	if (!err)
+		xe_tile_assert(tile, *num_entries);
+	else /* abort! */
+		xe_pt_abort_bind(vma, entries, *num_entries);
+
+	return err;
+}
+
+static void xe_vm_dbg_print_entries(struct xe_device *xe,
+				    const struct xe_vm_pgtable_update *entries,
+				    unsigned int num_entries)
+#if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM))
+{
+	unsigned int i;
+
+	vm_dbg(&xe->drm, "%u entries to update\n", num_entries);
+	for (i = 0; i < num_entries; i++) {
+		const struct xe_vm_pgtable_update *entry = &entries[i];
+		struct xe_pt *xe_pt = entry->pt;
+		u64 page_size = 1ull << xe_pt_shift(xe_pt->level);
+		u64 end;
+		u64 start;
+
+		xe_assert(xe, !entry->pt->is_compact);
+		start = entry->ofs * page_size;
+		end = start + page_size * entry->qwords;
+		vm_dbg(&xe->drm,
+		       "\t%u: Update level %u at (%u + %u) [%llx...%llx) f:%x\n",
+		       i, xe_pt->level, entry->ofs, entry->qwords,
+		       xe_pt_addr(xe_pt) + start, xe_pt_addr(xe_pt) + end, 0);
+	}
+}
+#else
+{}
+#endif
+
+#ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT
+
+static int xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma)
+{
+	u32 divisor = uvma->userptr.divisor ? uvma->userptr.divisor : 2;
+	static u32 count;
+
+	if (count++ % divisor == divisor - 1) {
+		struct xe_vm *vm = xe_vma_vm(&uvma->vma);
+
+		uvma->userptr.divisor = divisor << 1;
+		spin_lock(&vm->userptr.invalidated_lock);
+		list_move_tail(&uvma->userptr.invalidate_link,
+			       &vm->userptr.invalidated);
+		spin_unlock(&vm->userptr.invalidated_lock);
+		return true;
+	}
+
+	return false;
+}
+
+#else
+
+static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma)
+{
+	return false;
+}
+
+#endif
+
+/**
+ * struct xe_pt_migrate_pt_update - Callback argument for pre-commit callbacks
+ * @base: Base we derive from.
+ * @bind: Whether this is a bind or an unbind operation. A bind operation
+ *        makes the pre-commit callback error with -EAGAIN if it detects a
+ *        pending invalidation.
+ * @locked: Whether the pre-commit callback locked the userptr notifier lock
+ *          and it needs unlocking.
+ */
+struct xe_pt_migrate_pt_update {
+	struct xe_migrate_pt_update base;
+	bool bind;
+	bool locked;
+};
+
+/*
+ * This function adds the needed dependencies to a page-table update job
+ * to make sure racing jobs for separate bind engines don't race writing
+ * to the same page-table range, wreaking havoc. Initially use a single
+ * fence for the entire VM. An optimization would use smaller granularity.
+ */
+static int xe_pt_vm_dependencies(struct xe_sched_job *job,
+				 struct xe_range_fence_tree *rftree,
+				 u64 start, u64 last)
+{
+	struct xe_range_fence *rtfence;
+	struct dma_fence *fence;
+	int err;
+
+	rtfence = xe_range_fence_tree_first(rftree, start, last);
+	while (rtfence) {
+		fence = rtfence->fence;
+
+		if (!dma_fence_is_signaled(fence)) {
+			/*
+			 * Is this a CPU update? GPU is busy updating, so return
+			 * an error
+			 */
+			if (!job)
+				return -ETIME;
+
+			dma_fence_get(fence);
+			err = drm_sched_job_add_dependency(&job->drm, fence);
+			if (err)
+				return err;
+		}
+
+		rtfence = xe_range_fence_tree_next(rtfence, start, last);
+	}
+
+	return 0;
+}
+
+static int xe_pt_pre_commit(struct xe_migrate_pt_update *pt_update)
+{
+	struct xe_range_fence_tree *rftree =
+		&xe_vma_vm(pt_update->vma)->rftree[pt_update->tile_id];
+
+	return xe_pt_vm_dependencies(pt_update->job, rftree,
+				     pt_update->start, pt_update->last);
+}
+
+static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
+{
+	struct xe_pt_migrate_pt_update *userptr_update =
+		container_of(pt_update, typeof(*userptr_update), base);
+	struct xe_userptr_vma *uvma = to_userptr_vma(pt_update->vma);
+	unsigned long notifier_seq = uvma->userptr.notifier_seq;
+	struct xe_vm *vm = xe_vma_vm(&uvma->vma);
+	int err = xe_pt_vm_dependencies(pt_update->job,
+					&vm->rftree[pt_update->tile_id],
+					pt_update->start,
+					pt_update->last);
+
+	if (err)
+		return err;
+
+	userptr_update->locked = false;
+
+	/*
+	 * Wait until nobody is running the invalidation notifier, and
+	 * since we're exiting the loop holding the notifier lock,
+	 * nobody can proceed invalidating either.
+	 *
+	 * Note that we don't update the vma->userptr.notifier_seq since
+	 * we don't update the userptr pages.
+	 */
+	do {
+		down_read(&vm->userptr.notifier_lock);
+		if (!mmu_interval_read_retry(&uvma->userptr.notifier,
+					     notifier_seq))
+			break;
+
+		up_read(&vm->userptr.notifier_lock);
+
+		if (userptr_update->bind)
+			return -EAGAIN;
+
+		notifier_seq = mmu_interval_read_begin(&uvma->userptr.notifier);
+	} while (true);
+
+	/* Inject errors to test_whether they are handled correctly */
+	if (userptr_update->bind && xe_pt_userptr_inject_eagain(uvma)) {
+		up_read(&vm->userptr.notifier_lock);
+		return -EAGAIN;
+	}
+
+	userptr_update->locked = true;
+
+	return 0;
+}
+
+static const struct xe_migrate_pt_update_ops bind_ops = {
+	.populate = xe_vm_populate_pgtable,
+	.pre_commit = xe_pt_pre_commit,
+};
+
+static const struct xe_migrate_pt_update_ops userptr_bind_ops = {
+	.populate = xe_vm_populate_pgtable,
+	.pre_commit = xe_pt_userptr_pre_commit,
+};
+
+struct invalidation_fence {
+	struct xe_gt_tlb_invalidation_fence base;
+	struct xe_gt *gt;
+	struct xe_vma *vma;
+	struct dma_fence *fence;
+	struct dma_fence_cb cb;
+	struct work_struct work;
+};
+
+static const char *
+invalidation_fence_get_driver_name(struct dma_fence *dma_fence)
+{
+	return "xe";
+}
+
+static const char *
+invalidation_fence_get_timeline_name(struct dma_fence *dma_fence)
+{
+	return "invalidation_fence";
+}
+
+static const struct dma_fence_ops invalidation_fence_ops = {
+	.get_driver_name = invalidation_fence_get_driver_name,
+	.get_timeline_name = invalidation_fence_get_timeline_name,
+};
+
+static void invalidation_fence_cb(struct dma_fence *fence,
+				  struct dma_fence_cb *cb)
+{
+	struct invalidation_fence *ifence =
+		container_of(cb, struct invalidation_fence, cb);
+
+	trace_xe_gt_tlb_invalidation_fence_cb(&ifence->base);
+	if (!ifence->fence->error) {
+		queue_work(system_wq, &ifence->work);
+	} else {
+		ifence->base.base.error = ifence->fence->error;
+		dma_fence_signal(&ifence->base.base);
+		dma_fence_put(&ifence->base.base);
+	}
+	dma_fence_put(ifence->fence);
+}
+
+static void invalidation_fence_work_func(struct work_struct *w)
+{
+	struct invalidation_fence *ifence =
+		container_of(w, struct invalidation_fence, work);
+
+	trace_xe_gt_tlb_invalidation_fence_work_func(&ifence->base);
+	xe_gt_tlb_invalidation_vma(ifence->gt, &ifence->base, ifence->vma);
+}
+
+static int invalidation_fence_init(struct xe_gt *gt,
+				   struct invalidation_fence *ifence,
+				   struct dma_fence *fence,
+				   struct xe_vma *vma)
+{
+	int ret;
+
+	trace_xe_gt_tlb_invalidation_fence_create(&ifence->base);
+
+	spin_lock_irq(&gt->tlb_invalidation.lock);
+	dma_fence_init(&ifence->base.base, &invalidation_fence_ops,
+		       &gt->tlb_invalidation.lock,
+		       gt->tlb_invalidation.fence_context,
+		       ++gt->tlb_invalidation.fence_seqno);
+	spin_unlock_irq(&gt->tlb_invalidation.lock);
+
+	INIT_LIST_HEAD(&ifence->base.link);
+
+	dma_fence_get(&ifence->base.base);	/* Ref for caller */
+	ifence->fence = fence;
+	ifence->gt = gt;
+	ifence->vma = vma;
+
+	INIT_WORK(&ifence->work, invalidation_fence_work_func);
+	ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb);
+	if (ret == -ENOENT) {
+		dma_fence_put(ifence->fence);	/* Usually dropped in CB */
+		invalidation_fence_work_func(&ifence->work);
+	} else if (ret) {
+		dma_fence_put(&ifence->base.base);	/* Caller ref */
+		dma_fence_put(&ifence->base.base);	/* Creation ref */
+	}
+
+	xe_gt_assert(gt, !ret || ret == -ENOENT);
+
+	return ret && ret != -ENOENT ? ret : 0;
+}
+
+static void xe_pt_calc_rfence_interval(struct xe_vma *vma,
+				       struct xe_pt_migrate_pt_update *update,
+				       struct xe_vm_pgtable_update *entries,
+				       u32 num_entries)
+{
+	int i, level = 0;
+
+	for (i = 0; i < num_entries; i++) {
+		const struct xe_vm_pgtable_update *entry = &entries[i];
+
+		if (entry->pt->level > level)
+			level = entry->pt->level;
+	}
+
+	/* Greedy (non-optimal) calculation but simple */
+	update->base.start = ALIGN_DOWN(xe_vma_start(vma),
+					0x1ull << xe_pt_shift(level));
+	update->base.last = ALIGN(xe_vma_end(vma),
+				  0x1ull << xe_pt_shift(level)) - 1;
+}
+
+/**
+ * __xe_pt_bind_vma() - Build and connect a page-table tree for the vma
+ * address range.
+ * @tile: The tile to bind for.
+ * @vma: The vma to bind.
+ * @q: The exec_queue with which to do pipelined page-table updates.
+ * @syncs: Entries to sync on before binding the built tree to the live vm tree.
+ * @num_syncs: Number of @sync entries.
+ * @rebind: Whether we're rebinding this vma to the same address range without
+ * an unbind in-between.
+ *
+ * This function builds a page-table tree (see xe_pt_stage_bind() for more
+ * information on page-table building), and the xe_vm_pgtable_update entries
+ * abstracting the operations needed to attach it to the main vm tree. It
+ * then takes the relevant locks and updates the metadata side of the main
+ * vm tree and submits the operations for pipelined attachment of the
+ * gpu page-table to the vm main tree, (which can be done either by the
+ * cpu and the GPU).
+ *
+ * Return: A valid dma-fence representing the pipelined attachment operation
+ * on success, an error pointer on error.
+ */
+struct dma_fence *
+__xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q,
+		 struct xe_sync_entry *syncs, u32 num_syncs,
+		 bool rebind)
+{
+	struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1];
+	struct xe_pt_migrate_pt_update bind_pt_update = {
+		.base = {
+			.ops = xe_vma_is_userptr(vma) ? &userptr_bind_ops : &bind_ops,
+			.vma = vma,
+			.tile_id = tile->id,
+		},
+		.bind = true,
+	};
+	struct xe_vm *vm = xe_vma_vm(vma);
+	u32 num_entries;
+	struct dma_fence *fence;
+	struct invalidation_fence *ifence = NULL;
+	struct xe_range_fence *rfence;
+	int err;
+
+	bind_pt_update.locked = false;
+	xe_bo_assert_held(xe_vma_bo(vma));
+	xe_vm_assert_held(vm);
+
+	vm_dbg(&xe_vma_vm(vma)->xe->drm,
+	       "Preparing bind, with range [%llx...%llx) engine %p.\n",
+	       xe_vma_start(vma), xe_vma_end(vma), q);
+
+	err = xe_pt_prepare_bind(tile, vma, entries, &num_entries, rebind);
+	if (err)
+		goto err;
+	xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries));
+
+	xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries);
+	xe_pt_calc_rfence_interval(vma, &bind_pt_update, entries,
+				   num_entries);
+
+	/*
+	 * If rebind, we have to invalidate TLB on !LR vms to invalidate
+	 * cached PTEs point to freed memory. on LR vms this is done
+	 * automatically when the context is re-enabled by the rebind worker,
+	 * or in fault mode it was invalidated on PTE zapping.
+	 *
+	 * If !rebind, and scratch enabled VMs, there is a chance the scratch
+	 * PTE is already cached in the TLB so it needs to be invalidated.
+	 * on !LR VMs this is done in the ring ops preceding a batch, but on
+	 * non-faulting LR, in particular on user-space batch buffer chaining,
+	 * it needs to be done here.
+	 */
+	if ((rebind && !xe_vm_in_lr_mode(vm) && !vm->batch_invalidate_tlb) ||
+	    (!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) {
+		ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
+		if (!ifence)
+			return ERR_PTR(-ENOMEM);
+	}
+
+	rfence = kzalloc(sizeof(*rfence), GFP_KERNEL);
+	if (!rfence) {
+		kfree(ifence);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	fence = xe_migrate_update_pgtables(tile->migrate,
+					   vm, xe_vma_bo(vma), q,
+					   entries, num_entries,
+					   syncs, num_syncs,
+					   &bind_pt_update.base);
+	if (!IS_ERR(fence)) {
+		bool last_munmap_rebind = vma->gpuva.flags & XE_VMA_LAST_REBIND;
+		LLIST_HEAD(deferred);
+		int err;
+
+		err = xe_range_fence_insert(&vm->rftree[tile->id], rfence,
+					    &xe_range_fence_kfree_ops,
+					    bind_pt_update.base.start,
+					    bind_pt_update.base.last, fence);
+		if (err)
+			dma_fence_wait(fence, false);
+
+		/* TLB invalidation must be done before signaling rebind */
+		if (ifence) {
+			int err = invalidation_fence_init(tile->primary_gt, ifence, fence,
+							  vma);
+			if (err) {
+				dma_fence_put(fence);
+				kfree(ifence);
+				return ERR_PTR(err);
+			}
+			fence = &ifence->base.base;
+		}
+
+		/* add shared fence now for pagetable delayed destroy */
+		dma_resv_add_fence(xe_vm_resv(vm), fence, !rebind &&
+				   last_munmap_rebind ?
+				   DMA_RESV_USAGE_KERNEL :
+				   DMA_RESV_USAGE_BOOKKEEP);
+
+		if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
+			dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence,
+					   DMA_RESV_USAGE_BOOKKEEP);
+		xe_pt_commit_bind(vma, entries, num_entries, rebind,
+				  bind_pt_update.locked ? &deferred : NULL);
+
+		/* This vma is live (again?) now */
+		vma->tile_present |= BIT(tile->id);
+
+		if (bind_pt_update.locked) {
+			to_userptr_vma(vma)->userptr.initial_bind = true;
+			up_read(&vm->userptr.notifier_lock);
+			xe_bo_put_commit(&deferred);
+		}
+		if (!rebind && last_munmap_rebind &&
+		    xe_vm_in_preempt_fence_mode(vm))
+			xe_vm_queue_rebind_worker(vm);
+	} else {
+		kfree(rfence);
+		kfree(ifence);
+		if (bind_pt_update.locked)
+			up_read(&vm->userptr.notifier_lock);
+		xe_pt_abort_bind(vma, entries, num_entries);
+	}
+
+	return fence;
+
+err:
+	return ERR_PTR(err);
+}
+
+struct xe_pt_stage_unbind_walk {
+	/** @base: The pagewalk base-class. */
+	struct xe_pt_walk base;
+
+	/* Input parameters for the walk */
+	/** @tile: The tile we're unbinding from. */
+	struct xe_tile *tile;
+
+	/**
+	 * @modified_start: Walk range start, modified to include any
+	 * shared pagetables that we're the only user of and can thus
+	 * treat as private.
+	 */
+	u64 modified_start;
+	/** @modified_end: Walk range start, modified like @modified_start. */
+	u64 modified_end;
+
+	/* Output */
+	/* @wupd: Structure to track the page-table updates we're building */
+	struct xe_walk_update wupd;
+};
+
+/*
+ * Check whether this range is the only one populating this pagetable,
+ * and in that case, update the walk range checks so that higher levels don't
+ * view us as a shared pagetable.
+ */
+static bool xe_pt_check_kill(u64 addr, u64 next, unsigned int level,
+			     const struct xe_pt *child,
+			     enum page_walk_action *action,
+			     struct xe_pt_walk *walk)
+{
+	struct xe_pt_stage_unbind_walk *xe_walk =
+		container_of(walk, typeof(*xe_walk), base);
+	unsigned int shift = walk->shifts[level];
+	u64 size = 1ull << shift;
+
+	if (IS_ALIGNED(addr, size) && IS_ALIGNED(next, size) &&
+	    ((next - addr) >> shift) == child->num_live) {
+		u64 size = 1ull << walk->shifts[level + 1];
+
+		*action = ACTION_CONTINUE;
+
+		if (xe_walk->modified_start >= addr)
+			xe_walk->modified_start = round_down(addr, size);
+		if (xe_walk->modified_end <= next)
+			xe_walk->modified_end = round_up(next, size);
+
+		return true;
+	}
+
+	return false;
+}
+
+static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset,
+				    unsigned int level, u64 addr, u64 next,
+				    struct xe_ptw **child,
+				    enum page_walk_action *action,
+				    struct xe_pt_walk *walk)
+{
+	struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base);
+
+	XE_WARN_ON(!*child);
+	XE_WARN_ON(!level && xe_child->is_compact);
+
+	xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk);
+
+	return 0;
+}
+
+static int
+xe_pt_stage_unbind_post_descend(struct xe_ptw *parent, pgoff_t offset,
+				unsigned int level, u64 addr, u64 next,
+				struct xe_ptw **child,
+				enum page_walk_action *action,
+				struct xe_pt_walk *walk)
+{
+	struct xe_pt_stage_unbind_walk *xe_walk =
+		container_of(walk, typeof(*xe_walk), base);
+	struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base);
+	pgoff_t end_offset;
+	u64 size = 1ull << walk->shifts[--level];
+
+	if (!IS_ALIGNED(addr, size))
+		addr = xe_walk->modified_start;
+	if (!IS_ALIGNED(next, size))
+		next = xe_walk->modified_end;
+
+	/* Parent == *child is the root pt. Don't kill it. */
+	if (parent != *child &&
+	    xe_pt_check_kill(addr, next, level, xe_child, action, walk))
+		return 0;
+
+	if (!xe_pt_nonshared_offsets(addr, next, level, walk, action, &offset,
+				     &end_offset))
+		return 0;
+
+	(void)xe_pt_new_shared(&xe_walk->wupd, xe_child, offset, false);
+	xe_walk->wupd.updates[level].update->qwords = end_offset - offset;
+
+	return 0;
+}
+
+static const struct xe_pt_walk_ops xe_pt_stage_unbind_ops = {
+	.pt_entry = xe_pt_stage_unbind_entry,
+	.pt_post_descend = xe_pt_stage_unbind_post_descend,
+};
+
+/**
+ * xe_pt_stage_unbind() - Build page-table update structures for an unbind
+ * operation
+ * @tile: The tile we're unbinding for.
+ * @vma: The vma we're unbinding.
+ * @entries: Caller-provided storage for the update structures.
+ *
+ * Builds page-table update structures for an unbind operation. The function
+ * will attempt to remove all page-tables that we're the only user
+ * of, and for that to work, the unbind operation must be committed in the
+ * same critical section that blocks racing binds to the same page-table tree.
+ *
+ * Return: The number of entries used.
+ */
+static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, struct xe_vma *vma,
+				       struct xe_vm_pgtable_update *entries)
+{
+	struct xe_pt_stage_unbind_walk xe_walk = {
+		.base = {
+			.ops = &xe_pt_stage_unbind_ops,
+			.shifts = xe_normal_pt_shifts,
+			.max_level = XE_PT_HIGHEST_LEVEL,
+		},
+		.tile = tile,
+		.modified_start = xe_vma_start(vma),
+		.modified_end = xe_vma_end(vma),
+		.wupd.entries = entries,
+	};
+	struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id];
+
+	(void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma),
+				xe_vma_end(vma), &xe_walk.base);
+
+	return xe_walk.wupd.num_used_entries;
+}
+
+static void
+xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update,
+				  struct xe_tile *tile, struct iosys_map *map,
+				  void *ptr, u32 qword_ofs, u32 num_qwords,
+				  const struct xe_vm_pgtable_update *update)
+{
+	struct xe_vma *vma = pt_update->vma;
+	u64 empty = __xe_pt_empty_pte(tile, xe_vma_vm(vma), update->pt->level);
+	int i;
+
+	if (map && map->is_iomem)
+		for (i = 0; i < num_qwords; ++i)
+			xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) *
+				  sizeof(u64), u64, empty);
+	else if (map)
+		memset64(map->vaddr + qword_ofs * sizeof(u64), empty,
+			 num_qwords);
+	else
+		memset64(ptr, empty, num_qwords);
+}
+
+static void
+xe_pt_commit_unbind(struct xe_vma *vma,
+		    struct xe_vm_pgtable_update *entries, u32 num_entries,
+		    struct llist_head *deferred)
+{
+	u32 j;
+
+	xe_pt_commit_locks_assert(vma);
+
+	for (j = 0; j < num_entries; ++j) {
+		struct xe_vm_pgtable_update *entry = &entries[j];
+		struct xe_pt *pt = entry->pt;
+
+		pt->num_live -= entry->qwords;
+		if (pt->level) {
+			struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt);
+			u32 i;
+
+			for (i = entry->ofs; i < entry->ofs + entry->qwords;
+			     i++) {
+				if (xe_pt_entry(pt_dir, i))
+					xe_pt_destroy(xe_pt_entry(pt_dir, i),
+						      xe_vma_vm(vma)->flags, deferred);
+
+				pt_dir->children[i] = NULL;
+			}
+		}
+	}
+}
+
+static const struct xe_migrate_pt_update_ops unbind_ops = {
+	.populate = xe_migrate_clear_pgtable_callback,
+	.pre_commit = xe_pt_pre_commit,
+};
+
+static const struct xe_migrate_pt_update_ops userptr_unbind_ops = {
+	.populate = xe_migrate_clear_pgtable_callback,
+	.pre_commit = xe_pt_userptr_pre_commit,
+};
+
+/**
+ * __xe_pt_unbind_vma() - Disconnect and free a page-table tree for the vma
+ * address range.
+ * @tile: The tile to unbind for.
+ * @vma: The vma to unbind.
+ * @q: The exec_queue with which to do pipelined page-table updates.
+ * @syncs: Entries to sync on before disconnecting the tree to be destroyed.
+ * @num_syncs: Number of @sync entries.
+ *
+ * This function builds a the xe_vm_pgtable_update entries abstracting the
+ * operations needed to detach the page-table tree to be destroyed from the
+ * man vm tree.
+ * It then takes the relevant locks and submits the operations for
+ * pipelined detachment of the gpu page-table from  the vm main tree,
+ * (which can be done either by the cpu and the GPU), Finally it frees the
+ * detached page-table tree.
+ *
+ * Return: A valid dma-fence representing the pipelined detachment operation
+ * on success, an error pointer on error.
+ */
+struct dma_fence *
+__xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q,
+		   struct xe_sync_entry *syncs, u32 num_syncs)
+{
+	struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1];
+	struct xe_pt_migrate_pt_update unbind_pt_update = {
+		.base = {
+			.ops = xe_vma_is_userptr(vma) ? &userptr_unbind_ops :
+			&unbind_ops,
+			.vma = vma,
+			.tile_id = tile->id,
+		},
+	};
+	struct xe_vm *vm = xe_vma_vm(vma);
+	u32 num_entries;
+	struct dma_fence *fence = NULL;
+	struct invalidation_fence *ifence;
+	struct xe_range_fence *rfence;
+
+	LLIST_HEAD(deferred);
+
+	xe_bo_assert_held(xe_vma_bo(vma));
+	xe_vm_assert_held(vm);
+
+	vm_dbg(&xe_vma_vm(vma)->xe->drm,
+	       "Preparing unbind, with range [%llx...%llx) engine %p.\n",
+	       xe_vma_start(vma), xe_vma_end(vma), q);
+
+	num_entries = xe_pt_stage_unbind(tile, vma, entries);
+	xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries));
+
+	xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries);
+	xe_pt_calc_rfence_interval(vma, &unbind_pt_update, entries,
+				   num_entries);
+
+	ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
+	if (!ifence)
+		return ERR_PTR(-ENOMEM);
+
+	rfence = kzalloc(sizeof(*rfence), GFP_KERNEL);
+	if (!rfence) {
+		kfree(ifence);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	/*
+	 * Even if we were already evicted and unbind to destroy, we need to
+	 * clear again here. The eviction may have updated pagetables at a
+	 * lower level, because it needs to be more conservative.
+	 */
+	fence = xe_migrate_update_pgtables(tile->migrate,
+					   vm, NULL, q ? q :
+					   vm->q[tile->id],
+					   entries, num_entries,
+					   syncs, num_syncs,
+					   &unbind_pt_update.base);
+	if (!IS_ERR(fence)) {
+		int err;
+
+		err = xe_range_fence_insert(&vm->rftree[tile->id], rfence,
+					    &xe_range_fence_kfree_ops,
+					    unbind_pt_update.base.start,
+					    unbind_pt_update.base.last, fence);
+		if (err)
+			dma_fence_wait(fence, false);
+
+		/* TLB invalidation must be done before signaling unbind */
+		err = invalidation_fence_init(tile->primary_gt, ifence, fence, vma);
+		if (err) {
+			dma_fence_put(fence);
+			kfree(ifence);
+			return ERR_PTR(err);
+		}
+		fence = &ifence->base.base;
+
+		/* add shared fence now for pagetable delayed destroy */
+		dma_resv_add_fence(xe_vm_resv(vm), fence,
+				   DMA_RESV_USAGE_BOOKKEEP);
+
+		/* This fence will be installed by caller when doing eviction */
+		if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
+			dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence,
+					   DMA_RESV_USAGE_BOOKKEEP);
+		xe_pt_commit_unbind(vma, entries, num_entries,
+				    unbind_pt_update.locked ? &deferred : NULL);
+		vma->tile_present &= ~BIT(tile->id);
+	} else {
+		kfree(rfence);
+		kfree(ifence);
+	}
+
+	if (!vma->tile_present)
+		list_del_init(&vma->combined_links.rebind);
+
+	if (unbind_pt_update.locked) {
+		xe_tile_assert(tile, xe_vma_is_userptr(vma));
+
+		if (!vma->tile_present) {
+			spin_lock(&vm->userptr.invalidated_lock);
+			list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link);
+			spin_unlock(&vm->userptr.invalidated_lock);
+		}
+		up_read(&vm->userptr.notifier_lock);
+		xe_bo_put_commit(&deferred);
+	}
+
+	return fence;
+}
diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h
new file mode 100644
index 000000000000..71a4fbfcff43
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pt.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+#ifndef _XE_PT_H_
+#define _XE_PT_H_
+
+#include <linux/types.h>
+
+#include "xe_pt_types.h"
+
+struct dma_fence;
+struct xe_bo;
+struct xe_device;
+struct xe_exec_queue;
+struct xe_sync_entry;
+struct xe_tile;
+struct xe_vm;
+struct xe_vma;
+
+/* Largest huge pte is currently 1GiB. May become device dependent. */
+#define MAX_HUGEPTE_LEVEL 2
+
+#define xe_pt_write(xe, map, idx, data) \
+	xe_map_wr(xe, map, (idx) * sizeof(u64), u64, data)
+
+unsigned int xe_pt_shift(unsigned int level);
+
+struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
+			   unsigned int level);
+
+void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm,
+			  struct xe_pt *pt);
+
+void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred);
+
+struct dma_fence *
+__xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q,
+		 struct xe_sync_entry *syncs, u32 num_syncs,
+		 bool rebind);
+
+struct dma_fence *
+__xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q,
+		   struct xe_sync_entry *syncs, u32 num_syncs);
+
+bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h
new file mode 100644
index 000000000000..cee70cb0f014
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pt_types.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_PT_TYPES_H_
+#define _XE_PT_TYPES_H_
+
+#include <linux/types.h>
+
+#include "xe_pt_walk.h"
+
+struct xe_bo;
+struct xe_device;
+struct xe_vma;
+
+enum xe_cache_level {
+	XE_CACHE_NONE,
+	XE_CACHE_WT,
+	XE_CACHE_WB,
+	XE_CACHE_NONE_COMPRESSION, /*UC + COH_NONE + COMPRESSION */
+	__XE_CACHE_LEVEL_COUNT,
+};
+
+#define XE_VM_MAX_LEVEL 4
+
+struct xe_pt {
+	struct xe_ptw base;
+	struct xe_bo *bo;
+	unsigned int level;
+	unsigned int num_live;
+	bool rebind;
+	bool is_compact;
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
+	/** addr: Virtual address start address of the PT. */
+	u64 addr;
+#endif
+};
+
+struct xe_pt_ops {
+	u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset,
+			     u16 pat_index, u32 pt_level);
+	u64 (*pte_encode_vma)(u64 pte, struct xe_vma *vma,
+			      u16 pat_index, u32 pt_level);
+	u64 (*pte_encode_addr)(struct xe_device *xe, u64 addr,
+			       u16 pat_index,
+			       u32 pt_level, bool devmem, u64 flags);
+	u64 (*pde_encode_bo)(struct xe_bo *bo, u64 bo_offset,
+			     u16 pat_index);
+};
+
+struct xe_pt_entry {
+	struct xe_pt *pt;
+	u64 pte;
+};
+
+struct xe_vm_pgtable_update {
+	/** @bo: page table bo to write to */
+	struct xe_bo *pt_bo;
+
+	/** @ofs: offset inside this PTE to begin writing to (in qwords) */
+	u32 ofs;
+
+	/** @qwords: number of PTE's to write */
+	u32 qwords;
+
+	/** @pt: opaque pointer useful for the caller of xe_migrate_update_pgtables */
+	struct xe_pt *pt;
+
+	/** @pt_entries: Newly added pagetable entries */
+	struct xe_pt_entry *pt_entries;
+
+	/** @flags: Target flags */
+	u32 flags;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pt_walk.c b/drivers/gpu/drm/xe/xe_pt_walk.c
new file mode 100644
index 000000000000..b8b3d2aea492
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pt_walk.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+#include "xe_pt_walk.h"
+
+/**
+ * DOC: GPU page-table tree walking.
+ * The utilities in this file are similar to the CPU page-table walk
+ * utilities in mm/pagewalk.c. The main difference is that we distinguish
+ * the various levels of a page-table tree with an unsigned integer rather
+ * than by name. 0 is the lowest level, and page-tables with level 0 can
+ * not be directories pointing to lower levels, whereas all other levels
+ * can. The user of the utilities determines the highest level.
+ *
+ * Nomenclature:
+ * Each struct xe_ptw, regardless of level is referred to as a page table, and
+ * multiple page tables typically form a page table tree with page tables at
+ * intermediate levels being page directories pointing at page tables at lower
+ * levels. A shared page table for a given address range is a page-table which
+ * is neither fully within nor fully outside the address range and that can
+ * thus be shared by two or more address ranges.
+ *
+ * Please keep this code generic so that it can used as a drm-wide page-
+ * table walker should other drivers find use for it.
+ */
+static u64 xe_pt_addr_end(u64 addr, u64 end, unsigned int level,
+			  const struct xe_pt_walk *walk)
+{
+	u64 size = 1ull << walk->shifts[level];
+	u64 tmp = round_up(addr + 1, size);
+
+	return min_t(u64, tmp, end);
+}
+
+static bool xe_pt_next(pgoff_t *offset, u64 *addr, u64 next, u64 end,
+		       unsigned int level, const struct xe_pt_walk *walk)
+{
+	pgoff_t step = 1;
+
+	/* Shared pt walk skips to the last pagetable */
+	if (unlikely(walk->shared_pt_mode)) {
+		unsigned int shift = walk->shifts[level];
+		u64 skip_to = round_down(end, 1ull << shift);
+
+		if (skip_to > next) {
+			step += (skip_to - next) >> shift;
+			next = skip_to;
+		}
+	}
+
+	*addr = next;
+	*offset += step;
+
+	return next != end;
+}
+
+/**
+ * xe_pt_walk_range() - Walk a range of a gpu page table tree with callbacks
+ * for each page-table entry in all levels.
+ * @parent: The root page table for walk start.
+ * @level: The root page table level.
+ * @addr: Virtual address start.
+ * @end: Virtual address end + 1.
+ * @walk: Walk info.
+ *
+ * Similar to the CPU page-table walker, this is a helper to walk
+ * a gpu page table and call a provided callback function for each entry.
+ *
+ * Return: 0 on success, negative error code on error. The error is
+ * propagated from the callback and on error the walk is terminated.
+ */
+int xe_pt_walk_range(struct xe_ptw *parent, unsigned int level,
+		     u64 addr, u64 end, struct xe_pt_walk *walk)
+{
+	pgoff_t offset = xe_pt_offset(addr, level, walk);
+	struct xe_ptw **entries = parent->children ? parent->children : NULL;
+	const struct xe_pt_walk_ops *ops = walk->ops;
+	enum page_walk_action action;
+	struct xe_ptw *child;
+	int err = 0;
+	u64 next;
+
+	do {
+		next = xe_pt_addr_end(addr, end, level, walk);
+		if (walk->shared_pt_mode && xe_pt_covers(addr, next, level,
+							 walk))
+			continue;
+again:
+		action = ACTION_SUBTREE;
+		child = entries ? entries[offset] : NULL;
+		err = ops->pt_entry(parent, offset, level, addr, next,
+				    &child, &action, walk);
+		if (err)
+			break;
+
+		/* Probably not needed yet for gpu pagetable walk. */
+		if (unlikely(action == ACTION_AGAIN))
+			goto again;
+
+		if (likely(!level || !child || action == ACTION_CONTINUE))
+			continue;
+
+		err = xe_pt_walk_range(child, level - 1, addr, next, walk);
+
+		if (!err && ops->pt_post_descend)
+			err = ops->pt_post_descend(parent, offset, level, addr,
+						   next, &child, &action, walk);
+		if (err)
+			break;
+
+	} while (xe_pt_next(&offset, &addr, next, end, level, walk));
+
+	return err;
+}
+
+/**
+ * xe_pt_walk_shared() - Walk shared page tables of a page-table tree.
+ * @parent: Root page table directory.
+ * @level: Level of the root.
+ * @addr: Start address.
+ * @end: Last address + 1.
+ * @walk: Walk info.
+ *
+ * This function is similar to xe_pt_walk_range() but it skips page tables
+ * that are private to the range. Since the root (or @parent) page table is
+ * typically also a shared page table this function is different in that it
+ * calls the pt_entry callback and the post_descend callback also for the
+ * root. The root can be detected in the callbacks by checking whether
+ * parent == *child.
+ * Walking only the shared page tables is common for unbind-type operations
+ * where the page-table entries for an address range are cleared or detached
+ * from the main page-table tree.
+ *
+ * Return: 0 on success, negative error code on error: If a callback
+ * returns an error, the walk will be terminated and the error returned by
+ * this function.
+ */
+int xe_pt_walk_shared(struct xe_ptw *parent, unsigned int level,
+		      u64 addr, u64 end, struct xe_pt_walk *walk)
+{
+	const struct xe_pt_walk_ops *ops = walk->ops;
+	enum page_walk_action action = ACTION_SUBTREE;
+	struct xe_ptw *child = parent;
+	int err;
+
+	walk->shared_pt_mode = true;
+	err = walk->ops->pt_entry(parent, 0, level + 1, addr, end,
+				  &child, &action, walk);
+
+	if (err || action != ACTION_SUBTREE)
+		return err;
+
+	err = xe_pt_walk_range(parent, level, addr, end, walk);
+	if (!err && ops->pt_post_descend) {
+		err = ops->pt_post_descend(parent, 0, level + 1, addr, end,
+					   &child, &action, walk);
+	}
+	return err;
+}
diff --git a/drivers/gpu/drm/xe/xe_pt_walk.h b/drivers/gpu/drm/xe/xe_pt_walk.h
new file mode 100644
index 000000000000..5ecc4d2f0f65
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pt_walk.h
@@ -0,0 +1,148 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+#ifndef __XE_PT_WALK__
+#define __XE_PT_WALK__
+
+#include <linux/pagewalk.h>
+#include <linux/types.h>
+
+/**
+ * struct xe_ptw - base class for driver pagetable subclassing.
+ * @children: Pointer to an array of children if any.
+ *
+ * Drivers could subclass this, and if it's a page-directory, typically
+ * embed an array of xe_ptw pointers.
+ */
+struct xe_ptw {
+	struct xe_ptw **children;
+};
+
+/**
+ * struct xe_pt_walk - Embeddable struct for walk parameters
+ */
+struct xe_pt_walk {
+	/** @ops: The walk ops used for the pagewalk */
+	const struct xe_pt_walk_ops *ops;
+	/**
+	 * @shifts: Array of page-table entry shifts used for the
+	 * different levels, starting out with the leaf level 0
+	 * page-shift as the first entry. It's legal for this pointer to be
+	 * changed during the walk.
+	 */
+	const u64 *shifts;
+	/** @max_level: Highest populated level in @sizes */
+	unsigned int max_level;
+	/**
+	 * @shared_pt_mode: Whether to skip all entries that are private
+	 * to the address range and called only for entries that are
+	 * shared with other address ranges. Such entries are referred to
+	 * as shared pagetables.
+	 */
+	bool shared_pt_mode;
+};
+
+/**
+ * typedef xe_pt_entry_fn - gpu page-table-walk callback-function
+ * @parent: The parent page.table.
+ * @offset: The offset (number of entries) into the page table.
+ * @level: The level of @parent.
+ * @addr: The virtual address.
+ * @next: The virtual address for the next call, or end address.
+ * @child: Pointer to pointer to child page-table at this @offset. The
+ * function may modify the value pointed to if, for example, allocating a
+ * child page table.
+ * @action: The walk action to take upon return. See <linux/pagewalk.h>.
+ * @walk: The walk parameters.
+ */
+typedef int (*xe_pt_entry_fn)(struct xe_ptw *parent, pgoff_t offset,
+			      unsigned int level, u64 addr, u64 next,
+			      struct xe_ptw **child,
+			      enum page_walk_action *action,
+			      struct xe_pt_walk *walk);
+
+/**
+ * struct xe_pt_walk_ops - Walk callbacks.
+ */
+struct xe_pt_walk_ops {
+	/**
+	 * @pt_entry: Callback to be called for each page table entry prior
+	 * to descending to the next level. The returned value of the action
+	 * function parameter is honored.
+	 */
+	xe_pt_entry_fn pt_entry;
+	/**
+	 * @pt_post_descend: Callback to be called for each page table entry
+	 * after return from descending to the next level. The returned value
+	 * of the action function parameter is ignored.
+	 */
+	xe_pt_entry_fn pt_post_descend;
+};
+
+int xe_pt_walk_range(struct xe_ptw *parent, unsigned int level,
+		     u64 addr, u64 end, struct xe_pt_walk *walk);
+
+int xe_pt_walk_shared(struct xe_ptw *parent, unsigned int level,
+		      u64 addr, u64 end, struct xe_pt_walk *walk);
+
+/**
+ * xe_pt_covers - Whether the address range covers an entire entry in @level
+ * @addr: Start of the range.
+ * @end: End of range + 1.
+ * @level: Page table level.
+ * @walk: Page table walk info.
+ *
+ * This function is a helper to aid in determining whether a leaf page table
+ * entry can be inserted at this @level.
+ *
+ * Return: Whether the range provided covers exactly an entry at this level.
+ */
+static inline bool xe_pt_covers(u64 addr, u64 end, unsigned int level,
+				const struct xe_pt_walk *walk)
+{
+	u64 pt_size = 1ull << walk->shifts[level];
+
+	return end - addr == pt_size && IS_ALIGNED(addr, pt_size);
+}
+
+/**
+ * xe_pt_num_entries: Number of page-table entries of a given range at this
+ * level
+ * @addr: Start address.
+ * @end: End address.
+ * @level: Page table level.
+ * @walk: Walk info.
+ *
+ * Return: The number of page table entries at this level between @start and
+ * @end.
+ */
+static inline pgoff_t
+xe_pt_num_entries(u64 addr, u64 end, unsigned int level,
+		  const struct xe_pt_walk *walk)
+{
+	u64 pt_size = 1ull << walk->shifts[level];
+
+	return (round_up(end, pt_size) - round_down(addr, pt_size)) >>
+		walk->shifts[level];
+}
+
+/**
+ * xe_pt_offset: Offset of the page-table entry for a given address.
+ * @addr: The address.
+ * @level: Page table level.
+ * @walk: Walk info.
+ *
+ * Return: The page table entry offset for the given address in a
+ * page table with size indicated by @level.
+ */
+static inline pgoff_t
+xe_pt_offset(u64 addr, unsigned int level, const struct xe_pt_walk *walk)
+{
+	if (level < walk->max_level)
+		addr &= ((1ull << walk->shifts[level + 1]) - 1);
+
+	return addr >> walk->shifts[level];
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
new file mode 100644
index 000000000000..7e924faeeea0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -0,0 +1,552 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_query.h"
+
+#include <linux/nospec.h>
+#include <linux/sched/clock.h>
+
+#include <drm/ttm/ttm_placement.h>
+#include <drm/xe_drm.h>
+
+#include "regs/xe_engine_regs.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_exec_queue.h"
+#include "xe_ggtt.h"
+#include "xe_gt.h"
+#include "xe_guc_hwconfig.h"
+#include "xe_macros.h"
+#include "xe_mmio.h"
+#include "xe_ttm_vram_mgr.h"
+
+static const u16 xe_to_user_engine_class[] = {
+	[XE_ENGINE_CLASS_RENDER] = DRM_XE_ENGINE_CLASS_RENDER,
+	[XE_ENGINE_CLASS_COPY] = DRM_XE_ENGINE_CLASS_COPY,
+	[XE_ENGINE_CLASS_VIDEO_DECODE] = DRM_XE_ENGINE_CLASS_VIDEO_DECODE,
+	[XE_ENGINE_CLASS_VIDEO_ENHANCE] = DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE,
+	[XE_ENGINE_CLASS_COMPUTE] = DRM_XE_ENGINE_CLASS_COMPUTE,
+};
+
+static const enum xe_engine_class user_to_xe_engine_class[] = {
+	[DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER,
+	[DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY,
+	[DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE,
+	[DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE,
+	[DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE,
+};
+
+static size_t calc_hw_engine_info_size(struct xe_device *xe)
+{
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	struct xe_gt *gt;
+	u8 gt_id;
+	int i = 0;
+
+	for_each_gt(gt, xe, gt_id)
+		for_each_hw_engine(hwe, gt, id) {
+			if (xe_hw_engine_is_reserved(hwe))
+				continue;
+			i++;
+		}
+
+	return sizeof(struct drm_xe_query_engines) +
+		i * sizeof(struct drm_xe_engine);
+}
+
+typedef u64 (*__ktime_func_t)(void);
+static __ktime_func_t __clock_id_to_func(clockid_t clk_id)
+{
+	/*
+	 * Use logic same as the perf subsystem to allow user to select the
+	 * reference clock id to be used for timestamps.
+	 */
+	switch (clk_id) {
+	case CLOCK_MONOTONIC:
+		return &ktime_get_ns;
+	case CLOCK_MONOTONIC_RAW:
+		return &ktime_get_raw_ns;
+	case CLOCK_REALTIME:
+		return &ktime_get_real_ns;
+	case CLOCK_BOOTTIME:
+		return &ktime_get_boottime_ns;
+	case CLOCK_TAI:
+		return &ktime_get_clocktai_ns;
+	default:
+		return NULL;
+	}
+}
+
+static void
+__read_timestamps(struct xe_gt *gt,
+		  struct xe_reg lower_reg,
+		  struct xe_reg upper_reg,
+		  u64 *engine_ts,
+		  u64 *cpu_ts,
+		  u64 *cpu_delta,
+		  __ktime_func_t cpu_clock)
+{
+	u32 upper, lower, old_upper, loop = 0;
+
+	upper = xe_mmio_read32(gt, upper_reg);
+	do {
+		*cpu_delta = local_clock();
+		*cpu_ts = cpu_clock();
+		lower = xe_mmio_read32(gt, lower_reg);
+		*cpu_delta = local_clock() - *cpu_delta;
+		old_upper = upper;
+		upper = xe_mmio_read32(gt, upper_reg);
+	} while (upper != old_upper && loop++ < 2);
+
+	*engine_ts = (u64)upper << 32 | lower;
+}
+
+static int
+query_engine_cycles(struct xe_device *xe,
+		    struct drm_xe_device_query *query)
+{
+	struct drm_xe_query_engine_cycles __user *query_ptr;
+	struct drm_xe_engine_class_instance *eci;
+	struct drm_xe_query_engine_cycles resp;
+	size_t size = sizeof(resp);
+	__ktime_func_t cpu_clock;
+	struct xe_hw_engine *hwe;
+	struct xe_gt *gt;
+
+	if (query->size == 0) {
+		query->size = size;
+		return 0;
+	} else if (XE_IOCTL_DBG(xe, query->size != size)) {
+		return -EINVAL;
+	}
+
+	query_ptr = u64_to_user_ptr(query->data);
+	if (copy_from_user(&resp, query_ptr, size))
+		return -EFAULT;
+
+	cpu_clock = __clock_id_to_func(resp.clockid);
+	if (!cpu_clock)
+		return -EINVAL;
+
+	eci = &resp.eci;
+	if (eci->gt_id > XE_MAX_GT_PER_TILE)
+		return -EINVAL;
+
+	gt = xe_device_get_gt(xe, eci->gt_id);
+	if (!gt)
+		return -EINVAL;
+
+	if (eci->engine_class >= ARRAY_SIZE(user_to_xe_engine_class))
+		return -EINVAL;
+
+	hwe = xe_gt_hw_engine(gt, user_to_xe_engine_class[eci->engine_class],
+			      eci->engine_instance, true);
+	if (!hwe)
+		return -EINVAL;
+
+	xe_device_mem_access_get(xe);
+	xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+
+	__read_timestamps(gt,
+			  RING_TIMESTAMP(hwe->mmio_base),
+			  RING_TIMESTAMP_UDW(hwe->mmio_base),
+			  &resp.engine_cycles,
+			  &resp.cpu_timestamp,
+			  &resp.cpu_delta,
+			  cpu_clock);
+
+	xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	xe_device_mem_access_put(xe);
+	resp.width = 36;
+
+	/* Only write to the output fields of user query */
+	if (put_user(resp.cpu_timestamp, &query_ptr->cpu_timestamp))
+		return -EFAULT;
+
+	if (put_user(resp.cpu_delta, &query_ptr->cpu_delta))
+		return -EFAULT;
+
+	if (put_user(resp.engine_cycles, &query_ptr->engine_cycles))
+		return -EFAULT;
+
+	if (put_user(resp.width, &query_ptr->width))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int query_engines(struct xe_device *xe,
+			 struct drm_xe_device_query *query)
+{
+	size_t size = calc_hw_engine_info_size(xe);
+	struct drm_xe_query_engines __user *query_ptr =
+		u64_to_user_ptr(query->data);
+	struct drm_xe_query_engines *engines;
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	struct xe_gt *gt;
+	u8 gt_id;
+	int i = 0;
+
+	if (query->size == 0) {
+		query->size = size;
+		return 0;
+	} else if (XE_IOCTL_DBG(xe, query->size != size)) {
+		return -EINVAL;
+	}
+
+	engines = kmalloc(size, GFP_KERNEL);
+	if (!engines)
+		return -ENOMEM;
+
+	for_each_gt(gt, xe, gt_id)
+		for_each_hw_engine(hwe, gt, id) {
+			if (xe_hw_engine_is_reserved(hwe))
+				continue;
+
+			engines->engines[i].instance.engine_class =
+				xe_to_user_engine_class[hwe->class];
+			engines->engines[i].instance.engine_instance =
+				hwe->logical_instance;
+			engines->engines[i].instance.gt_id = gt->info.id;
+			engines->engines[i].instance.pad = 0;
+			memset(engines->engines[i].reserved, 0,
+			       sizeof(engines->engines[i].reserved));
+
+			i++;
+		}
+
+	engines->pad = 0;
+	engines->num_engines = i;
+
+	if (copy_to_user(query_ptr, engines, size)) {
+		kfree(engines);
+		return -EFAULT;
+	}
+	kfree(engines);
+
+	return 0;
+}
+
+static size_t calc_mem_regions_size(struct xe_device *xe)
+{
+	u32 num_managers = 1;
+	int i;
+
+	for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i)
+		if (ttm_manager_type(&xe->ttm, i))
+			num_managers++;
+
+	return offsetof(struct drm_xe_query_mem_regions, mem_regions[num_managers]);
+}
+
+static int query_mem_regions(struct xe_device *xe,
+			    struct drm_xe_device_query *query)
+{
+	size_t size = calc_mem_regions_size(xe);
+	struct drm_xe_query_mem_regions *mem_regions;
+	struct drm_xe_query_mem_regions __user *query_ptr =
+		u64_to_user_ptr(query->data);
+	struct ttm_resource_manager *man;
+	int ret, i;
+
+	if (query->size == 0) {
+		query->size = size;
+		return 0;
+	} else if (XE_IOCTL_DBG(xe, query->size != size)) {
+		return -EINVAL;
+	}
+
+	mem_regions = kzalloc(size, GFP_KERNEL);
+	if (XE_IOCTL_DBG(xe, !mem_regions))
+		return -ENOMEM;
+
+	man = ttm_manager_type(&xe->ttm, XE_PL_TT);
+	mem_regions->mem_regions[0].mem_class = DRM_XE_MEM_REGION_CLASS_SYSMEM;
+	/*
+	 * The instance needs to be a unique number that represents the index
+	 * in the placement mask used at xe_gem_create_ioctl() for the
+	 * xe_bo_create() placement.
+	 */
+	mem_regions->mem_regions[0].instance = 0;
+	mem_regions->mem_regions[0].min_page_size = PAGE_SIZE;
+	mem_regions->mem_regions[0].total_size = man->size << PAGE_SHIFT;
+	if (perfmon_capable())
+		mem_regions->mem_regions[0].used = ttm_resource_manager_usage(man);
+	mem_regions->num_mem_regions = 1;
+
+	for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
+		man = ttm_manager_type(&xe->ttm, i);
+		if (man) {
+			mem_regions->mem_regions[mem_regions->num_mem_regions].mem_class =
+				DRM_XE_MEM_REGION_CLASS_VRAM;
+			mem_regions->mem_regions[mem_regions->num_mem_regions].instance =
+				mem_regions->num_mem_regions;
+			mem_regions->mem_regions[mem_regions->num_mem_regions].min_page_size =
+				xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ?
+				SZ_64K : PAGE_SIZE;
+			mem_regions->mem_regions[mem_regions->num_mem_regions].total_size =
+				man->size;
+
+			if (perfmon_capable()) {
+				xe_ttm_vram_get_used(man,
+					&mem_regions->mem_regions
+					[mem_regions->num_mem_regions].used,
+					&mem_regions->mem_regions
+					[mem_regions->num_mem_regions].cpu_visible_used);
+			}
+
+			mem_regions->mem_regions[mem_regions->num_mem_regions].cpu_visible_size =
+				xe_ttm_vram_get_cpu_visible_size(man);
+			mem_regions->num_mem_regions++;
+		}
+	}
+
+	if (!copy_to_user(query_ptr, mem_regions, size))
+		ret = 0;
+	else
+		ret = -ENOSPC;
+
+	kfree(mem_regions);
+	return ret;
+}
+
+static int query_config(struct xe_device *xe, struct drm_xe_device_query *query)
+{
+	const u32 num_params = DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY + 1;
+	size_t size =
+		sizeof(struct drm_xe_query_config) + num_params * sizeof(u64);
+	struct drm_xe_query_config __user *query_ptr =
+		u64_to_user_ptr(query->data);
+	struct drm_xe_query_config *config;
+
+	if (query->size == 0) {
+		query->size = size;
+		return 0;
+	} else if (XE_IOCTL_DBG(xe, query->size != size)) {
+		return -EINVAL;
+	}
+
+	config = kzalloc(size, GFP_KERNEL);
+	if (!config)
+		return -ENOMEM;
+
+	config->num_params = num_params;
+	config->info[DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID] =
+		xe->info.devid | (xe->info.revid << 16);
+	if (xe_device_get_root_tile(xe)->mem.vram.usable_size)
+		config->info[DRM_XE_QUERY_CONFIG_FLAGS] =
+			DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM;
+	config->info[DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT] =
+		xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K;
+	config->info[DRM_XE_QUERY_CONFIG_VA_BITS] = xe->info.va_bits;
+	config->info[DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY] =
+		xe_exec_queue_device_get_max_priority(xe);
+
+	if (copy_to_user(query_ptr, config, size)) {
+		kfree(config);
+		return -EFAULT;
+	}
+	kfree(config);
+
+	return 0;
+}
+
+static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query)
+{
+	struct xe_gt *gt;
+	size_t size = sizeof(struct drm_xe_query_gt_list) +
+		xe->info.gt_count * sizeof(struct drm_xe_gt);
+	struct drm_xe_query_gt_list __user *query_ptr =
+		u64_to_user_ptr(query->data);
+	struct drm_xe_query_gt_list *gt_list;
+	u8 id;
+
+	if (query->size == 0) {
+		query->size = size;
+		return 0;
+	} else if (XE_IOCTL_DBG(xe, query->size != size)) {
+		return -EINVAL;
+	}
+
+	gt_list = kzalloc(size, GFP_KERNEL);
+	if (!gt_list)
+		return -ENOMEM;
+
+	gt_list->num_gt = xe->info.gt_count;
+
+	for_each_gt(gt, xe, id) {
+		if (xe_gt_is_media_type(gt))
+			gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MEDIA;
+		else
+			gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MAIN;
+		gt_list->gt_list[id].tile_id = gt_to_tile(gt)->id;
+		gt_list->gt_list[id].gt_id = gt->info.id;
+		gt_list->gt_list[id].reference_clock = gt->info.reference_clock;
+		/*
+		 * The mem_regions indexes in the mask below need to
+		 * directly identify the struct
+		 * drm_xe_query_mem_regions' instance constructed at
+		 * query_mem_regions()
+		 *
+		 * For our current platforms:
+		 * Bit 0 -> System Memory
+		 * Bit 1 -> VRAM0 on Tile0
+		 * Bit 2 -> VRAM1 on Tile1
+		 * However the uAPI is generic and it's userspace's
+		 * responsibility to check the mem_class, without any
+		 * assumption.
+		 */
+		if (!IS_DGFX(xe))
+			gt_list->gt_list[id].near_mem_regions = 0x1;
+		else
+			gt_list->gt_list[id].near_mem_regions =
+				BIT(gt_to_tile(gt)->id) << 1;
+		gt_list->gt_list[id].far_mem_regions = xe->info.mem_region_mask ^
+			gt_list->gt_list[id].near_mem_regions;
+	}
+
+	if (copy_to_user(query_ptr, gt_list, size)) {
+		kfree(gt_list);
+		return -EFAULT;
+	}
+	kfree(gt_list);
+
+	return 0;
+}
+
+static int query_hwconfig(struct xe_device *xe,
+			  struct drm_xe_device_query *query)
+{
+	struct xe_gt *gt = xe_root_mmio_gt(xe);
+	size_t size = xe_guc_hwconfig_size(&gt->uc.guc);
+	void __user *query_ptr = u64_to_user_ptr(query->data);
+	void *hwconfig;
+
+	if (query->size == 0) {
+		query->size = size;
+		return 0;
+	} else if (XE_IOCTL_DBG(xe, query->size != size)) {
+		return -EINVAL;
+	}
+
+	hwconfig = kzalloc(size, GFP_KERNEL);
+	if (!hwconfig)
+		return -ENOMEM;
+
+	xe_device_mem_access_get(xe);
+	xe_guc_hwconfig_copy(&gt->uc.guc, hwconfig);
+	xe_device_mem_access_put(xe);
+
+	if (copy_to_user(query_ptr, hwconfig, size)) {
+		kfree(hwconfig);
+		return -EFAULT;
+	}
+	kfree(hwconfig);
+
+	return 0;
+}
+
+static size_t calc_topo_query_size(struct xe_device *xe)
+{
+	return xe->info.gt_count *
+		(3 * sizeof(struct drm_xe_query_topology_mask) +
+		 sizeof_field(struct xe_gt, fuse_topo.g_dss_mask) +
+		 sizeof_field(struct xe_gt, fuse_topo.c_dss_mask) +
+		 sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss));
+}
+
+static int copy_mask(void __user **ptr,
+		     struct drm_xe_query_topology_mask *topo,
+		     void *mask, size_t mask_size)
+{
+	topo->num_bytes = mask_size;
+
+	if (copy_to_user(*ptr, topo, sizeof(*topo)))
+		return -EFAULT;
+	*ptr += sizeof(topo);
+
+	if (copy_to_user(*ptr, mask, mask_size))
+		return -EFAULT;
+	*ptr += mask_size;
+
+	return 0;
+}
+
+static int query_gt_topology(struct xe_device *xe,
+			     struct drm_xe_device_query *query)
+{
+	void __user *query_ptr = u64_to_user_ptr(query->data);
+	size_t size = calc_topo_query_size(xe);
+	struct drm_xe_query_topology_mask topo;
+	struct xe_gt *gt;
+	int id;
+
+	if (query->size == 0) {
+		query->size = size;
+		return 0;
+	} else if (XE_IOCTL_DBG(xe, query->size != size)) {
+		return -EINVAL;
+	}
+
+	for_each_gt(gt, xe, id) {
+		int err;
+
+		topo.gt_id = id;
+
+		topo.type = DRM_XE_TOPO_DSS_GEOMETRY;
+		err = copy_mask(&query_ptr, &topo, gt->fuse_topo.g_dss_mask,
+				sizeof(gt->fuse_topo.g_dss_mask));
+		if (err)
+			return err;
+
+		topo.type = DRM_XE_TOPO_DSS_COMPUTE;
+		err = copy_mask(&query_ptr, &topo, gt->fuse_topo.c_dss_mask,
+				sizeof(gt->fuse_topo.c_dss_mask));
+		if (err)
+			return err;
+
+		topo.type = DRM_XE_TOPO_EU_PER_DSS;
+		err = copy_mask(&query_ptr, &topo,
+				gt->fuse_topo.eu_mask_per_dss,
+				sizeof(gt->fuse_topo.eu_mask_per_dss));
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int (* const xe_query_funcs[])(struct xe_device *xe,
+				      struct drm_xe_device_query *query) = {
+	query_engines,
+	query_mem_regions,
+	query_config,
+	query_gt_list,
+	query_hwconfig,
+	query_gt_topology,
+	query_engine_cycles,
+};
+
+int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct drm_xe_device_query *query = data;
+	u32 idx;
+
+	if (XE_IOCTL_DBG(xe, query->extensions) ||
+	    XE_IOCTL_DBG(xe, query->reserved[0] || query->reserved[1]))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, query->query >= ARRAY_SIZE(xe_query_funcs)))
+		return -EINVAL;
+
+	idx = array_index_nospec(query->query, ARRAY_SIZE(xe_query_funcs));
+	if (XE_IOCTL_DBG(xe, !xe_query_funcs[idx]))
+		return -EINVAL;
+
+	return xe_query_funcs[idx](xe, query);
+}
diff --git a/drivers/gpu/drm/xe/xe_query.h b/drivers/gpu/drm/xe/xe_query.h
new file mode 100644
index 000000000000..beeb7a8192b4
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_query.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_QUERY_H_
+#define _XE_QUERY_H_
+
+struct drm_device;
+struct drm_file;
+
+int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_range_fence.c b/drivers/gpu/drm/xe/xe_range_fence.c
new file mode 100644
index 000000000000..372378e89e98
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_range_fence.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/dma-fence.h>
+#include <linux/interval_tree_generic.h>
+#include <linux/slab.h>
+
+#include "xe_macros.h"
+#include "xe_range_fence.h"
+
+#define XE_RANGE_TREE_START(_node)	((_node)->start)
+#define XE_RANGE_TREE_LAST(_node)	((_node)->last)
+
+INTERVAL_TREE_DEFINE(struct xe_range_fence, rb, u64, __subtree_last,
+		     XE_RANGE_TREE_START, XE_RANGE_TREE_LAST, static,
+		     xe_range_fence_tree);
+
+static void
+xe_range_fence_signal_notify(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+	struct xe_range_fence *rfence = container_of(cb, typeof(*rfence), cb);
+	struct xe_range_fence_tree *tree = rfence->tree;
+
+	llist_add(&rfence->link, &tree->list);
+}
+
+static bool __xe_range_fence_tree_cleanup(struct xe_range_fence_tree *tree)
+{
+	struct llist_node *node = llist_del_all(&tree->list);
+	struct xe_range_fence *rfence, *next;
+
+	llist_for_each_entry_safe(rfence, next, node, link) {
+		xe_range_fence_tree_remove(rfence, &tree->root);
+		dma_fence_put(rfence->fence);
+		kfree(rfence);
+	}
+
+	return !!node;
+}
+
+/**
+ * xe_range_fence_insert() - range fence insert
+ * @tree: range fence tree to insert intoi
+ * @rfence: range fence
+ * @ops: range fence ops
+ * @start: start address of range fence
+ * @last: last address of range fence
+ * @fence: dma fence which signals range fence can be removed + freed
+ *
+ * Return: 0 on success, non-zero on failure
+ */
+int xe_range_fence_insert(struct xe_range_fence_tree *tree,
+			  struct xe_range_fence *rfence,
+			  const struct xe_range_fence_ops *ops,
+			  u64 start, u64 last, struct dma_fence *fence)
+{
+	int err = 0;
+
+	__xe_range_fence_tree_cleanup(tree);
+
+	if (dma_fence_is_signaled(fence))
+		goto free;
+
+	rfence->ops = ops;
+	rfence->start = start;
+	rfence->last = last;
+	rfence->tree = tree;
+	rfence->fence = dma_fence_get(fence);
+	err = dma_fence_add_callback(fence, &rfence->cb,
+				     xe_range_fence_signal_notify);
+	if (err == -ENOENT) {
+		dma_fence_put(fence);
+		err = 0;
+		goto free;
+	} else if (err == 0) {
+		xe_range_fence_tree_insert(rfence, &tree->root);
+		return 0;
+	}
+
+free:
+	if (ops->free)
+		ops->free(rfence);
+
+	return err;
+}
+
+static void xe_range_fence_tree_remove_all(struct xe_range_fence_tree *tree)
+{
+	struct xe_range_fence *rfence;
+	bool retry = true;
+
+	rfence = xe_range_fence_tree_iter_first(&tree->root, 0, U64_MAX);
+	while (rfence) {
+		/* Should be ok with the minimalistic callback */
+		if (dma_fence_remove_callback(rfence->fence, &rfence->cb))
+			llist_add(&rfence->link, &tree->list);
+		rfence = xe_range_fence_tree_iter_next(rfence, 0, U64_MAX);
+	}
+
+	while (retry)
+		retry = __xe_range_fence_tree_cleanup(tree);
+}
+
+/**
+ * xe_range_fence_tree_init() - Init range fence tree
+ * @tree: range fence tree
+ */
+void xe_range_fence_tree_init(struct xe_range_fence_tree *tree)
+{
+	memset(tree, 0, sizeof(*tree));
+}
+
+/**
+ * xe_range_fence_tree_fini() - Fini range fence tree
+ * @tree: range fence tree
+ */
+void xe_range_fence_tree_fini(struct xe_range_fence_tree *tree)
+{
+	xe_range_fence_tree_remove_all(tree);
+	XE_WARN_ON(!RB_EMPTY_ROOT(&tree->root.rb_root));
+}
+
+/**
+ * xe_range_fence_tree_first() - range fence tree iterator first
+ * @tree: range fence tree
+ * @start: start address of range fence
+ * @last: last address of range fence
+ *
+ * Return: first range fence found in range or NULL
+ */
+struct xe_range_fence *
+xe_range_fence_tree_first(struct xe_range_fence_tree *tree, u64 start,
+			  u64 last)
+{
+	return xe_range_fence_tree_iter_first(&tree->root, start, last);
+}
+
+/**
+ * xe_range_fence_tree_next() - range fence tree iterator next
+ * @rfence: current range fence
+ * @start: start address of range fence
+ * @last: last address of range fence
+ *
+ * Return: next range fence found in range or NULL
+ */
+struct xe_range_fence *
+xe_range_fence_tree_next(struct xe_range_fence *rfence, u64 start, u64 last)
+{
+	return xe_range_fence_tree_iter_next(rfence, start, last);
+}
+
+static void xe_range_fence_free(struct xe_range_fence *rfence)
+{
+	kfree(rfence);
+}
+
+const struct xe_range_fence_ops xe_range_fence_kfree_ops = {
+	.free = xe_range_fence_free,
+};
diff --git a/drivers/gpu/drm/xe/xe_range_fence.h b/drivers/gpu/drm/xe/xe_range_fence.h
new file mode 100644
index 000000000000..edd58b34f5c0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_range_fence.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_RANGE_FENCE_H_
+#define _XE_RANGE_FENCE_H_
+
+#include <linux/dma-fence.h>
+#include <linux/rbtree.h>
+#include <linux/types.h>
+
+struct xe_range_fence_tree;
+struct xe_range_fence;
+
+/** struct xe_range_fence_ops - XE range fence ops */
+struct xe_range_fence_ops {
+	/** @free: free range fence op */
+	void (*free)(struct xe_range_fence *rfence);
+};
+
+/** struct xe_range_fence - XE range fence (address conflict tracking) */
+struct xe_range_fence {
+	/** @rb: RB tree node inserted into interval tree */
+	struct rb_node rb;
+	/** @start: start address of range fence is interval tree */
+	u64 start;
+	/** @last: last address (inclusive) of range fence is interval tree */
+	u64 last;
+	/** @__subtree_last: interval tree internal usage */
+	u64 __subtree_last;
+	/**
+	 * @fence: fence signals address in range fence no longer has conflict
+	 */
+	struct dma_fence *fence;
+	/** @tree: interval tree which range fence belongs to */
+	struct xe_range_fence_tree *tree;
+	/**
+	 * @cb: callback when fence signals to remove range fence free from interval tree
+	 */
+	struct dma_fence_cb cb;
+	/** @link: used to defer free of range fence to non-irq context */
+	struct llist_node link;
+	/** @ops: range fence ops */
+	const struct xe_range_fence_ops *ops;
+};
+
+/** struct xe_range_fence_tree - interval tree to store range fences */
+struct xe_range_fence_tree {
+	/** @root: interval tree root */
+	struct rb_root_cached root;
+	/** @list: list of pending range fences to be freed */
+	struct llist_head list;
+};
+
+extern const struct xe_range_fence_ops xe_range_fence_kfree_ops;
+
+struct xe_range_fence *
+xe_range_fence_tree_first(struct xe_range_fence_tree *tree, u64 start,
+			  u64 last);
+
+struct xe_range_fence *
+xe_range_fence_tree_next(struct xe_range_fence *rfence, u64 start, u64 last);
+
+void xe_range_fence_tree_init(struct xe_range_fence_tree *tree);
+
+void xe_range_fence_tree_fini(struct xe_range_fence_tree *tree);
+
+int xe_range_fence_insert(struct xe_range_fence_tree *tree,
+			  struct xe_range_fence *rfence,
+			  const struct xe_range_fence_ops *ops,
+			  u64 start, u64 end,
+			  struct dma_fence *fence);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
new file mode 100644
index 000000000000..87adefb56024
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_reg_sr.c
@@ -0,0 +1,284 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_reg_sr.h"
+
+#include <kunit/visibility.h>
+#include <linux/align.h>
+#include <linux/string_helpers.h>
+#include <linux/xarray.h>
+
+#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
+
+#include "regs/xe_engine_regs.h"
+#include "regs/xe_gt_regs.h"
+#include "xe_device_types.h"
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_gt_mcr.h"
+#include "xe_gt_printk.h"
+#include "xe_hw_engine_types.h"
+#include "xe_macros.h"
+#include "xe_mmio.h"
+#include "xe_reg_whitelist.h"
+#include "xe_rtp_types.h"
+
+#define XE_REG_SR_GROW_STEP_DEFAULT	16
+
+static void reg_sr_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_reg_sr *sr = arg;
+
+	xa_destroy(&sr->xa);
+	kfree(sr->pool.arr);
+	memset(&sr->pool, 0, sizeof(sr->pool));
+}
+
+int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe)
+{
+	xa_init(&sr->xa);
+	memset(&sr->pool, 0, sizeof(sr->pool));
+	sr->pool.grow_step = XE_REG_SR_GROW_STEP_DEFAULT;
+	sr->name = name;
+
+	return drmm_add_action_or_reset(&xe->drm, reg_sr_fini, sr);
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_reg_sr_init);
+
+static struct xe_reg_sr_entry *alloc_entry(struct xe_reg_sr *sr)
+{
+	if (sr->pool.used == sr->pool.allocated) {
+		struct xe_reg_sr_entry *arr;
+
+		arr = krealloc_array(sr->pool.arr,
+				     ALIGN(sr->pool.allocated + 1, sr->pool.grow_step),
+				     sizeof(*arr), GFP_KERNEL);
+		if (!arr)
+			return NULL;
+
+		sr->pool.arr = arr;
+		sr->pool.allocated += sr->pool.grow_step;
+	}
+
+	return &sr->pool.arr[sr->pool.used++];
+}
+
+static bool compatible_entries(const struct xe_reg_sr_entry *e1,
+			       const struct xe_reg_sr_entry *e2)
+{
+	/*
+	 * Don't allow overwriting values: clr_bits/set_bits should be disjoint
+	 * when operating in the same register
+	 */
+	if (e1->clr_bits & e2->clr_bits || e1->set_bits & e2->set_bits ||
+	    e1->clr_bits & e2->set_bits || e1->set_bits & e2->clr_bits)
+		return false;
+
+	if (e1->reg.raw != e2->reg.raw)
+		return false;
+
+	return true;
+}
+
+static void reg_sr_inc_error(struct xe_reg_sr *sr)
+{
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+	sr->errors++;
+#endif
+}
+
+int xe_reg_sr_add(struct xe_reg_sr *sr,
+		  const struct xe_reg_sr_entry *e,
+		  struct xe_gt *gt)
+{
+	unsigned long idx = e->reg.addr;
+	struct xe_reg_sr_entry *pentry = xa_load(&sr->xa, idx);
+	int ret;
+
+	if (pentry) {
+		if (!compatible_entries(pentry, e)) {
+			ret = -EINVAL;
+			goto fail;
+		}
+
+		pentry->clr_bits |= e->clr_bits;
+		pentry->set_bits |= e->set_bits;
+		pentry->read_mask |= e->read_mask;
+
+		return 0;
+	}
+
+	pentry = alloc_entry(sr);
+	if (!pentry) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	*pentry = *e;
+	ret = xa_err(xa_store(&sr->xa, idx, pentry, GFP_KERNEL));
+	if (ret)
+		goto fail;
+
+	return 0;
+
+fail:
+	xe_gt_err(gt,
+		  "discarding save-restore reg %04lx (clear: %08x, set: %08x, masked: %s, mcr: %s): ret=%d\n",
+		  idx, e->clr_bits, e->set_bits,
+		  str_yes_no(e->reg.masked),
+		  str_yes_no(e->reg.mcr),
+		  ret);
+	reg_sr_inc_error(sr);
+
+	return ret;
+}
+
+/*
+ * Convert back from encoded value to type-safe, only to be used when reg.mcr
+ * is true
+ */
+static struct xe_reg_mcr to_xe_reg_mcr(const struct xe_reg reg)
+{
+	return (const struct xe_reg_mcr){.__reg.raw = reg.raw };
+}
+
+static void apply_one_mmio(struct xe_gt *gt, struct xe_reg_sr_entry *entry)
+{
+	struct xe_reg reg = entry->reg;
+	struct xe_reg_mcr reg_mcr = to_xe_reg_mcr(reg);
+	u32 val;
+
+	/*
+	 * If this is a masked register, need to set the upper 16 bits.
+	 * Set them to clr_bits since that is always a superset of the bits
+	 * being modified.
+	 *
+	 * When it's not masked, we have to read it from hardware, unless we are
+	 * supposed to set all bits.
+	 */
+	if (reg.masked)
+		val = entry->clr_bits << 16;
+	else if (entry->clr_bits + 1)
+		val = (reg.mcr ?
+		       xe_gt_mcr_unicast_read_any(gt, reg_mcr) :
+		       xe_mmio_read32(gt, reg)) & (~entry->clr_bits);
+	else
+		val = 0;
+
+	/*
+	 * TODO: add selftest to validate all tables, regardless of platform:
+	 *   - Masked registers can't have set_bits with upper bits set
+	 *   - set_bits must be contained in clr_bits
+	 */
+	val |= entry->set_bits;
+
+	xe_gt_dbg(gt, "REG[0x%x] = 0x%08x", reg.addr, val);
+
+	if (entry->reg.mcr)
+		xe_gt_mcr_multicast_write(gt, reg_mcr, val);
+	else
+		xe_mmio_write32(gt, reg, val);
+}
+
+void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt)
+{
+	struct xe_reg_sr_entry *entry;
+	unsigned long reg;
+	int err;
+
+	if (xa_empty(&sr->xa))
+		return;
+
+	xe_gt_dbg(gt, "Applying %s save-restore MMIOs\n", sr->name);
+
+	err = xe_force_wake_get(&gt->mmio.fw, XE_FORCEWAKE_ALL);
+	if (err)
+		goto err_force_wake;
+
+	xa_for_each(&sr->xa, reg, entry)
+		apply_one_mmio(gt, entry);
+
+	err = xe_force_wake_put(&gt->mmio.fw, XE_FORCEWAKE_ALL);
+	XE_WARN_ON(err);
+
+	return;
+
+err_force_wake:
+	xe_gt_err(gt, "Failed to apply, err=%d\n", err);
+}
+
+void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe)
+{
+	struct xe_reg_sr *sr = &hwe->reg_whitelist;
+	struct xe_gt *gt = hwe->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_reg_sr_entry *entry;
+	struct drm_printer p;
+	u32 mmio_base = hwe->mmio_base;
+	unsigned long reg;
+	unsigned int slot = 0;
+	int err;
+
+	if (xa_empty(&sr->xa))
+		return;
+
+	drm_dbg(&xe->drm, "Whitelisting %s registers\n", sr->name);
+
+	err = xe_force_wake_get(&gt->mmio.fw, XE_FORCEWAKE_ALL);
+	if (err)
+		goto err_force_wake;
+
+	p = drm_debug_printer(KBUILD_MODNAME);
+	xa_for_each(&sr->xa, reg, entry) {
+		if (slot == RING_MAX_NONPRIV_SLOTS) {
+			xe_gt_err(gt,
+				  "hwe %s: maximum register whitelist slots (%d) reached, refusing to add more\n",
+				  hwe->name, RING_MAX_NONPRIV_SLOTS);
+			break;
+		}
+
+		xe_reg_whitelist_print_entry(&p, 0, reg, entry);
+		xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot),
+				reg | entry->set_bits);
+		slot++;
+	}
+
+	/* And clear the rest just in case of garbage */
+	for (; slot < RING_MAX_NONPRIV_SLOTS; slot++) {
+		u32 addr = RING_NOPID(mmio_base).addr;
+
+		xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot), addr);
+	}
+
+	err = xe_force_wake_put(&gt->mmio.fw, XE_FORCEWAKE_ALL);
+	XE_WARN_ON(err);
+
+	return;
+
+err_force_wake:
+	drm_err(&xe->drm, "Failed to apply, err=%d\n", err);
+}
+
+/**
+ * xe_reg_sr_dump - print all save/restore entries
+ * @sr: Save/restore entries
+ * @p: DRM printer
+ */
+void xe_reg_sr_dump(struct xe_reg_sr *sr, struct drm_printer *p)
+{
+	struct xe_reg_sr_entry *entry;
+	unsigned long reg;
+
+	if (!sr->name || xa_empty(&sr->xa))
+		return;
+
+	drm_printf(p, "%s\n", sr->name);
+	xa_for_each(&sr->xa, reg, entry)
+		drm_printf(p, "\tREG[0x%lx] clr=0x%08x set=0x%08x masked=%s mcr=%s\n",
+			   reg, entry->clr_bits, entry->set_bits,
+			   str_yes_no(entry->reg.masked),
+			   str_yes_no(entry->reg.mcr));
+}
diff --git a/drivers/gpu/drm/xe/xe_reg_sr.h b/drivers/gpu/drm/xe/xe_reg_sr.h
new file mode 100644
index 000000000000..e3197c33afe2
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_reg_sr.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_REG_SR_
+#define _XE_REG_SR_
+
+#include "xe_reg_sr_types.h"
+
+/*
+ * Reg save/restore bookkeeping
+ */
+
+struct xe_device;
+struct xe_gt;
+struct xe_hw_engine;
+struct drm_printer;
+
+int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe);
+void xe_reg_sr_dump(struct xe_reg_sr *sr, struct drm_printer *p);
+
+int xe_reg_sr_add(struct xe_reg_sr *sr, const struct xe_reg_sr_entry *e,
+		  struct xe_gt *gt);
+void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt);
+void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_reg_sr_types.h b/drivers/gpu/drm/xe/xe_reg_sr_types.h
new file mode 100644
index 000000000000..ad48a52b824a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_reg_sr_types.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_REG_SR_TYPES_
+#define _XE_REG_SR_TYPES_
+
+#include <linux/types.h>
+#include <linux/xarray.h>
+
+#include "regs/xe_reg_defs.h"
+
+struct xe_reg_sr_entry {
+	struct xe_reg	reg;
+	u32		clr_bits;
+	u32		set_bits;
+	/* Mask for bits to consider when reading value back */
+	u32		read_mask;
+};
+
+struct xe_reg_sr {
+	struct {
+		struct xe_reg_sr_entry *arr;
+		unsigned int used;
+		unsigned int allocated;
+		unsigned int grow_step;
+	} pool;
+	struct xarray xa;
+	const char *name;
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+	unsigned int errors;
+#endif
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c
new file mode 100644
index 000000000000..e66ae1bdaf9c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_reg_whitelist.h"
+
+#include "regs/xe_engine_regs.h"
+#include "regs/xe_gt_regs.h"
+#include "xe_gt_types.h"
+#include "xe_platform_types.h"
+#include "xe_rtp.h"
+
+#undef XE_REG_MCR
+#define XE_REG_MCR(...)     XE_REG(__VA_ARGS__, .mcr = 1)
+
+static bool match_not_render(const struct xe_gt *gt,
+			     const struct xe_hw_engine *hwe)
+{
+	return hwe->class != XE_ENGINE_CLASS_RENDER;
+}
+
+static const struct xe_rtp_entry_sr register_whitelist[] = {
+	{ XE_RTP_NAME("WaAllowPMDepthAndInvocationCountAccessFromUMD, 1408556865"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(WHITELIST(PS_INVOCATION_COUNT,
+				   RING_FORCE_TO_NONPRIV_ACCESS_RD |
+				   RING_FORCE_TO_NONPRIV_RANGE_4))
+	},
+	{ XE_RTP_NAME("1508744258, 14012131227, 1808121037"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(WHITELIST(COMMON_SLICE_CHICKEN1, 0))
+	},
+	{ XE_RTP_NAME("1806527549"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(WHITELIST(HIZ_CHICKEN, 0))
+	},
+	{ XE_RTP_NAME("allow_read_ctx_timestamp"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1260), FUNC(match_not_render)),
+	  XE_RTP_ACTIONS(WHITELIST(RING_CTX_TIMESTAMP(0),
+				RING_FORCE_TO_NONPRIV_ACCESS_RD,
+				XE_RTP_ACTION_FLAG(ENGINE_BASE)))
+	},
+	{ XE_RTP_NAME("16014440446"),
+	  XE_RTP_RULES(PLATFORM(PVC)),
+	  XE_RTP_ACTIONS(WHITELIST(XE_REG(0x4400),
+				   RING_FORCE_TO_NONPRIV_DENY |
+				   RING_FORCE_TO_NONPRIV_RANGE_64),
+			 WHITELIST(XE_REG(0x4500),
+				   RING_FORCE_TO_NONPRIV_DENY |
+				   RING_FORCE_TO_NONPRIV_RANGE_64))
+	},
+	{ XE_RTP_NAME("16017236439"),
+	  XE_RTP_RULES(PLATFORM(PVC), ENGINE_CLASS(COPY)),
+	  XE_RTP_ACTIONS(WHITELIST(BCS_SWCTRL(0),
+				   RING_FORCE_TO_NONPRIV_DENY,
+				   XE_RTP_ACTION_FLAG(ENGINE_BASE)))
+	},
+	{}
+};
+
+/**
+ * xe_reg_whitelist_process_engine - process table of registers to whitelist
+ * @hwe: engine instance to process whitelist for
+ *
+ * Process wwhitelist table for this platform, saving in @hwe all the
+ * registers that need to be whitelisted by the hardware so they can be accessed
+ * by userspace.
+ */
+void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe)
+{
+	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
+
+	xe_rtp_process_to_sr(&ctx, register_whitelist, &hwe->reg_whitelist);
+}
+
+/**
+ * xe_reg_whitelist_print_entry - print one whitelist entry
+ * @p: DRM printer
+ * @indent: indent level
+ * @reg: register allowed/denied
+ * @entry: save-restore entry
+ *
+ * Print details about the entry added to allow/deny access
+ */
+void xe_reg_whitelist_print_entry(struct drm_printer *p, unsigned int indent,
+				  u32 reg, struct xe_reg_sr_entry *entry)
+{
+	u32 val = entry->set_bits;
+	const char *access_str = "(invalid)";
+	unsigned int range_bit = 2;
+	u32 range_start, range_end;
+	bool deny;
+
+	deny = val & RING_FORCE_TO_NONPRIV_DENY;
+
+	switch (val & RING_FORCE_TO_NONPRIV_RANGE_MASK) {
+	case RING_FORCE_TO_NONPRIV_RANGE_4:
+		range_bit = 4;
+		break;
+	case RING_FORCE_TO_NONPRIV_RANGE_16:
+		range_bit = 6;
+		break;
+	case RING_FORCE_TO_NONPRIV_RANGE_64:
+		range_bit = 8;
+		break;
+	}
+
+	range_start = reg & REG_GENMASK(25, range_bit);
+	range_end = range_start | REG_GENMASK(range_bit, 0);
+
+	switch (val & RING_FORCE_TO_NONPRIV_ACCESS_MASK) {
+	case RING_FORCE_TO_NONPRIV_ACCESS_RW:
+		access_str = "rw";
+		break;
+	case RING_FORCE_TO_NONPRIV_ACCESS_RD:
+		access_str = "read";
+		break;
+	case RING_FORCE_TO_NONPRIV_ACCESS_WR:
+		access_str = "write";
+		break;
+	}
+
+	drm_printf_indent(p, indent, "REG[0x%x-0x%x]: %s %s access\n",
+			  range_start, range_end,
+			  deny ? "deny" : "allow",
+			  access_str);
+}
+
+/**
+ * xe_reg_whitelist_dump - print all whitelist entries
+ * @sr: Save/restore entries
+ * @p: DRM printer
+ */
+void xe_reg_whitelist_dump(struct xe_reg_sr *sr, struct drm_printer *p)
+{
+	struct xe_reg_sr_entry *entry;
+	unsigned long reg;
+
+	if (!sr->name || xa_empty(&sr->xa))
+		return;
+
+	drm_printf(p, "%s\n", sr->name);
+	xa_for_each(&sr->xa, reg, entry)
+		xe_reg_whitelist_print_entry(p, 1, reg, entry);
+}
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.h b/drivers/gpu/drm/xe/xe_reg_whitelist.h
new file mode 100644
index 000000000000..69b121d377da
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_REG_WHITELIST_
+#define _XE_REG_WHITELIST_
+
+#include <linux/types.h>
+
+struct drm_printer;
+struct xe_hw_engine;
+struct xe_reg_sr;
+struct xe_reg_sr_entry;
+
+void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe);
+
+void xe_reg_whitelist_print_entry(struct drm_printer *p, unsigned int indent,
+				  u32 reg, struct xe_reg_sr_entry *entry);
+
+void xe_reg_whitelist_dump(struct xe_reg_sr *sr, struct drm_printer *p);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h
new file mode 100644
index 000000000000..0a306963aa8e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_res_cursor.h
@@ -0,0 +1,240 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _XE_RES_CURSOR_H_
+#define _XE_RES_CURSOR_H_
+
+#include <linux/scatterlist.h>
+
+#include <drm/drm_mm.h>
+#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_range_manager.h>
+#include <drm/ttm/ttm_resource.h>
+#include <drm/ttm/ttm_tt.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_macros.h"
+#include "xe_ttm_vram_mgr.h"
+
+/* state back for walking over vram_mgr, stolen_mgr, and gtt_mgr allocations */
+struct xe_res_cursor {
+	u64 start;
+	u64 size;
+	u64 remaining;
+	void *node;
+	u32 mem_type;
+	struct scatterlist *sgl;
+	struct drm_buddy *mm;
+};
+
+static struct drm_buddy *xe_res_get_buddy(struct ttm_resource *res)
+{
+	struct ttm_resource_manager *mgr;
+
+	mgr = ttm_manager_type(res->bo->bdev, res->mem_type);
+	return &to_xe_ttm_vram_mgr(mgr)->mm;
+}
+
+/**
+ * xe_res_first - initialize a xe_res_cursor
+ *
+ * @res: TTM resource object to walk
+ * @start: Start of the range
+ * @size: Size of the range
+ * @cur: cursor object to initialize
+ *
+ * Start walking over the range of allocations between @start and @size.
+ */
+static inline void xe_res_first(struct ttm_resource *res,
+				u64 start, u64 size,
+				struct xe_res_cursor *cur)
+{
+	cur->sgl = NULL;
+	if (!res)
+		goto fallback;
+
+	XE_WARN_ON(start + size > res->size);
+
+	cur->mem_type = res->mem_type;
+
+	switch (cur->mem_type) {
+	case XE_PL_STOLEN:
+	case XE_PL_VRAM0:
+	case XE_PL_VRAM1: {
+		struct drm_buddy_block *block;
+		struct list_head *head, *next;
+		struct drm_buddy *mm = xe_res_get_buddy(res);
+
+		head = &to_xe_ttm_vram_mgr_resource(res)->blocks;
+
+		block = list_first_entry_or_null(head,
+						 struct drm_buddy_block,
+						 link);
+		if (!block)
+			goto fallback;
+
+		while (start >= drm_buddy_block_size(mm, block)) {
+			start -= drm_buddy_block_size(mm, block);
+
+			next = block->link.next;
+			if (next != head)
+				block = list_entry(next, struct drm_buddy_block,
+						   link);
+		}
+
+		cur->mm = mm;
+		cur->start = drm_buddy_block_offset(block) + start;
+		cur->size = min(drm_buddy_block_size(mm, block) - start,
+				size);
+		cur->remaining = size;
+		cur->node = block;
+		break;
+	}
+	default:
+		goto fallback;
+	}
+
+	return;
+
+fallback:
+	cur->start = start;
+	cur->size = size;
+	cur->remaining = size;
+	cur->node = NULL;
+	cur->mem_type = XE_PL_TT;
+	XE_WARN_ON(res && start + size > res->size);
+}
+
+static inline void __xe_res_sg_next(struct xe_res_cursor *cur)
+{
+	struct scatterlist *sgl = cur->sgl;
+	u64 start = cur->start;
+
+	while (start >= sg_dma_len(sgl)) {
+		start -= sg_dma_len(sgl);
+		sgl = sg_next(sgl);
+		XE_WARN_ON(!sgl);
+	}
+
+	cur->start = start;
+	cur->size = sg_dma_len(sgl) - start;
+	cur->sgl = sgl;
+}
+
+/**
+ * xe_res_first_sg - initialize a xe_res_cursor with a scatter gather table
+ *
+ * @sg: scatter gather table to walk
+ * @start: Start of the range
+ * @size: Size of the range
+ * @cur: cursor object to initialize
+ *
+ * Start walking over the range of allocations between @start and @size.
+ */
+static inline void xe_res_first_sg(const struct sg_table *sg,
+				   u64 start, u64 size,
+				   struct xe_res_cursor *cur)
+{
+	XE_WARN_ON(!sg);
+	XE_WARN_ON(!IS_ALIGNED(start, PAGE_SIZE) ||
+		   !IS_ALIGNED(size, PAGE_SIZE));
+	cur->node = NULL;
+	cur->start = start;
+	cur->remaining = size;
+	cur->size = 0;
+	cur->sgl = sg->sgl;
+	cur->mem_type = XE_PL_TT;
+	__xe_res_sg_next(cur);
+}
+
+/**
+ * xe_res_next - advance the cursor
+ *
+ * @cur: the cursor to advance
+ * @size: number of bytes to move forward
+ *
+ * Move the cursor @size bytes forwrad, walking to the next node if necessary.
+ */
+static inline void xe_res_next(struct xe_res_cursor *cur, u64 size)
+{
+	struct drm_buddy_block *block;
+	struct list_head *next;
+	u64 start;
+
+	XE_WARN_ON(size > cur->remaining);
+
+	cur->remaining -= size;
+	if (!cur->remaining)
+		return;
+
+	if (cur->size > size) {
+		cur->size -= size;
+		cur->start += size;
+		return;
+	}
+
+	if (cur->sgl) {
+		cur->start += size;
+		__xe_res_sg_next(cur);
+		return;
+	}
+
+	switch (cur->mem_type) {
+	case XE_PL_STOLEN:
+	case XE_PL_VRAM0:
+	case XE_PL_VRAM1:
+		start = size - cur->size;
+		block = cur->node;
+
+		next = block->link.next;
+		block = list_entry(next, struct drm_buddy_block, link);
+
+
+		while (start >= drm_buddy_block_size(cur->mm, block)) {
+			start -= drm_buddy_block_size(cur->mm, block);
+
+			next = block->link.next;
+			block = list_entry(next, struct drm_buddy_block, link);
+		}
+
+		cur->start = drm_buddy_block_offset(block) + start;
+		cur->size = min(drm_buddy_block_size(cur->mm, block) - start,
+				cur->remaining);
+		cur->node = block;
+		break;
+	default:
+		return;
+	}
+}
+
+/**
+ * xe_res_dma - return dma address of cursor at current position
+ *
+ * @cur: the cursor to return the dma address from
+ */
+static inline u64 xe_res_dma(const struct xe_res_cursor *cur)
+{
+	return cur->sgl ? sg_dma_address(cur->sgl) + cur->start : cur->start;
+}
+#endif
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
new file mode 100644
index 000000000000..1e4c06eacd98
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -0,0 +1,482 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_ring_ops.h"
+
+#include "generated/xe_wa_oob.h"
+#include "instructions/xe_mi_commands.h"
+#include "regs/xe_engine_regs.h"
+#include "regs/xe_gpu_commands.h"
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_lrc_layout.h"
+#include "xe_exec_queue_types.h"
+#include "xe_gt.h"
+#include "xe_lrc.h"
+#include "xe_macros.h"
+#include "xe_sched_job.h"
+#include "xe_vm_types.h"
+#include "xe_vm.h"
+#include "xe_wa.h"
+
+/*
+ * 3D-related flags that can't be set on _engines_ that lack access to the 3D
+ * pipeline (i.e., CCS engines).
+ */
+#define PIPE_CONTROL_3D_ENGINE_FLAGS (\
+		PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | \
+		PIPE_CONTROL_DEPTH_CACHE_FLUSH | \
+		PIPE_CONTROL_TILE_CACHE_FLUSH | \
+		PIPE_CONTROL_DEPTH_STALL | \
+		PIPE_CONTROL_STALL_AT_SCOREBOARD | \
+		PIPE_CONTROL_PSD_SYNC | \
+		PIPE_CONTROL_AMFS_FLUSH | \
+		PIPE_CONTROL_VF_CACHE_INVALIDATE | \
+		PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET)
+
+/* 3D-related flags that can't be set on _platforms_ that lack a 3D pipeline */
+#define PIPE_CONTROL_3D_ARCH_FLAGS ( \
+		PIPE_CONTROL_3D_ENGINE_FLAGS | \
+		PIPE_CONTROL_INDIRECT_STATE_DISABLE | \
+		PIPE_CONTROL_FLUSH_ENABLE | \
+		PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \
+		PIPE_CONTROL_DC_FLUSH_ENABLE)
+
+static u32 preparser_disable(bool state)
+{
+	return MI_ARB_CHECK | BIT(8) | state;
+}
+
+static int emit_aux_table_inv(struct xe_gt *gt, struct xe_reg reg,
+			      u32 *dw, int i)
+{
+	dw[i++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1) | MI_LRI_MMIO_REMAP_EN;
+	dw[i++] = reg.addr + gt->mmio.adj_offset;
+	dw[i++] = AUX_INV;
+	dw[i++] = MI_NOOP;
+
+	return i;
+}
+
+static int emit_user_interrupt(u32 *dw, int i)
+{
+	dw[i++] = MI_USER_INTERRUPT;
+	dw[i++] = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+	dw[i++] = MI_ARB_CHECK;
+
+	return i;
+}
+
+static int emit_store_imm_ggtt(u32 addr, u32 value, u32 *dw, int i)
+{
+	dw[i++] = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1);
+	dw[i++] = addr;
+	dw[i++] = 0;
+	dw[i++] = value;
+
+	return i;
+}
+
+static int emit_flush_imm_ggtt(u32 addr, u32 value, bool invalidate_tlb,
+			       u32 *dw, int i)
+{
+	dw[i++] = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_IMM_DW |
+		(invalidate_tlb ? MI_INVALIDATE_TLB : 0);
+	dw[i++] = addr | MI_FLUSH_DW_USE_GTT;
+	dw[i++] = 0;
+	dw[i++] = value;
+
+	return i;
+}
+
+static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i)
+{
+	dw[i++] = MI_BATCH_BUFFER_START | ppgtt_flag | XE_INSTR_NUM_DW(3);
+	dw[i++] = lower_32_bits(batch_addr);
+	dw[i++] = upper_32_bits(batch_addr);
+
+	return i;
+}
+
+static int emit_flush_invalidate(u32 flag, u32 *dw, int i)
+{
+	dw[i] = MI_FLUSH_DW;
+	dw[i] |= flag;
+	dw[i++] |= MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_IMM_DW |
+		MI_FLUSH_DW_STORE_INDEX;
+
+	dw[i++] = LRC_PPHWSP_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
+	dw[i++] = 0;
+	dw[i++] = ~0U;
+
+	return i;
+}
+
+static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw,
+				int i)
+{
+	u32 flags = PIPE_CONTROL_CS_STALL |
+		PIPE_CONTROL_COMMAND_CACHE_INVALIDATE |
+		PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE |
+		PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
+		PIPE_CONTROL_VF_CACHE_INVALIDATE |
+		PIPE_CONTROL_CONST_CACHE_INVALIDATE |
+		PIPE_CONTROL_STATE_CACHE_INVALIDATE |
+		PIPE_CONTROL_QW_WRITE |
+		PIPE_CONTROL_STORE_DATA_INDEX;
+
+	if (invalidate_tlb)
+		flags |= PIPE_CONTROL_TLB_INVALIDATE;
+
+	flags &= ~mask_flags;
+
+	dw[i++] = GFX_OP_PIPE_CONTROL(6);
+	dw[i++] = flags;
+	dw[i++] = LRC_PPHWSP_SCRATCH_ADDR;
+	dw[i++] = 0;
+	dw[i++] = 0;
+	dw[i++] = 0;
+
+	return i;
+}
+
+static int emit_store_imm_ppgtt_posted(u64 addr, u64 value,
+				       u32 *dw, int i)
+{
+	dw[i++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(1);
+	dw[i++] = lower_32_bits(addr);
+	dw[i++] = upper_32_bits(addr);
+	dw[i++] = lower_32_bits(value);
+	dw[i++] = upper_32_bits(value);
+
+	return i;
+}
+
+static int emit_render_cache_flush(struct xe_sched_job *job, u32 *dw, int i)
+{
+	struct xe_gt *gt = job->q->gt;
+	bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK);
+	u32 flags;
+
+	flags = (PIPE_CONTROL_CS_STALL |
+		 PIPE_CONTROL_TILE_CACHE_FLUSH |
+		 PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+		 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+		 PIPE_CONTROL_DC_FLUSH_ENABLE |
+		 PIPE_CONTROL_FLUSH_ENABLE);
+
+	if (XE_WA(gt, 1409600907))
+		flags |= PIPE_CONTROL_DEPTH_STALL;
+
+	if (lacks_render)
+		flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
+	else if (job->q->class == XE_ENGINE_CLASS_COMPUTE)
+		flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
+
+	dw[i++] = GFX_OP_PIPE_CONTROL(6) | PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
+	dw[i++] = flags;
+	dw[i++] = 0;
+	dw[i++] = 0;
+	dw[i++] = 0;
+	dw[i++] = 0;
+
+	return i;
+}
+
+static int emit_pipe_control_to_ring_end(struct xe_hw_engine *hwe, u32 *dw, int i)
+{
+	if (hwe->class != XE_ENGINE_CLASS_RENDER)
+		return i;
+
+	if (XE_WA(hwe->gt, 16020292621)) {
+		dw[i++] = GFX_OP_PIPE_CONTROL(6);
+		dw[i++] = PIPE_CONTROL_LRI_POST_SYNC;
+		dw[i++] = RING_NOPID(hwe->mmio_base).addr;
+		dw[i++] = 0;
+		dw[i++] = 0;
+		dw[i++] = 0;
+	}
+
+	return i;
+}
+
+static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw,
+			      int i)
+{
+	dw[i++] = GFX_OP_PIPE_CONTROL(6);
+	dw[i++] = (stall_only ? PIPE_CONTROL_CS_STALL :
+		   PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_CS_STALL) |
+		PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_QW_WRITE;
+	dw[i++] = addr;
+	dw[i++] = 0;
+	dw[i++] = value;
+	dw[i++] = 0; /* We're thrashing one extra dword. */
+
+	return i;
+}
+
+static u32 get_ppgtt_flag(struct xe_sched_job *job)
+{
+	return job->q->vm ? BIT(8) : 0;
+}
+
+/* for engines that don't require any special HW handling (no EUs, no aux inval, etc) */
+static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc,
+				    u64 batch_addr, u32 seqno)
+{
+	u32 dw[MAX_JOB_SIZE_DW], i = 0;
+	u32 ppgtt_flag = get_ppgtt_flag(job);
+	struct xe_vm *vm = job->q->vm;
+	struct xe_gt *gt = job->q->gt;
+
+	if (vm && vm->batch_invalidate_tlb) {
+		dw[i++] = preparser_disable(true);
+		i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
+					seqno, true, dw, i);
+		dw[i++] = preparser_disable(false);
+	} else {
+		i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
+					seqno, dw, i);
+	}
+
+	i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
+
+	if (job->user_fence.used)
+		i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
+						job->user_fence.value,
+						dw, i);
+
+	i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i);
+
+	i = emit_user_interrupt(dw, i);
+
+	xe_gt_assert(gt, i <= MAX_JOB_SIZE_DW);
+
+	xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
+}
+
+static bool has_aux_ccs(struct xe_device *xe)
+{
+	/*
+	 * PVC is a special case that has no compression of either type
+	 * (FlatCCS or AuxCCS).  Also, AuxCCS is no longer used from Xe2
+	 * onward, so any future platforms with no FlatCCS will not have
+	 * AuxCCS either.
+	 */
+	if (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC)
+		return false;
+
+	return !xe->info.has_flat_ccs;
+}
+
+static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
+				   u64 batch_addr, u32 seqno)
+{
+	u32 dw[MAX_JOB_SIZE_DW], i = 0;
+	u32 ppgtt_flag = get_ppgtt_flag(job);
+	struct xe_gt *gt = job->q->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	bool decode = job->q->class == XE_ENGINE_CLASS_VIDEO_DECODE;
+	struct xe_vm *vm = job->q->vm;
+
+	dw[i++] = preparser_disable(true);
+
+	/* hsdes: 1809175790 */
+	if (has_aux_ccs(xe)) {
+		if (decode)
+			i = emit_aux_table_inv(gt, VD0_AUX_INV, dw, i);
+		else
+			i = emit_aux_table_inv(gt, VE0_AUX_INV, dw, i);
+	}
+
+	if (vm && vm->batch_invalidate_tlb)
+		i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
+					seqno, true, dw, i);
+
+	dw[i++] = preparser_disable(false);
+
+	if (!vm || !vm->batch_invalidate_tlb)
+		i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
+					seqno, dw, i);
+
+	i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
+
+	if (job->user_fence.used)
+		i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
+						job->user_fence.value,
+						dw, i);
+
+	i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i);
+
+	i = emit_user_interrupt(dw, i);
+
+	xe_gt_assert(gt, i <= MAX_JOB_SIZE_DW);
+
+	xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
+}
+
+static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
+					    struct xe_lrc *lrc,
+					    u64 batch_addr, u32 seqno)
+{
+	u32 dw[MAX_JOB_SIZE_DW], i = 0;
+	u32 ppgtt_flag = get_ppgtt_flag(job);
+	struct xe_gt *gt = job->q->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK);
+	struct xe_vm *vm = job->q->vm;
+	u32 mask_flags = 0;
+
+	dw[i++] = preparser_disable(true);
+	if (lacks_render)
+		mask_flags = PIPE_CONTROL_3D_ARCH_FLAGS;
+	else if (job->q->class == XE_ENGINE_CLASS_COMPUTE)
+		mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS;
+
+	/* See __xe_pt_bind_vma() for a discussion on TLB invalidations. */
+	i = emit_pipe_invalidate(mask_flags, vm && vm->batch_invalidate_tlb, dw, i);
+
+	/* hsdes: 1809175790 */
+	if (has_aux_ccs(xe))
+		i = emit_aux_table_inv(gt, CCS_AUX_INV, dw, i);
+
+	dw[i++] = preparser_disable(false);
+
+	i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
+				seqno, dw, i);
+
+	i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
+
+	i = emit_render_cache_flush(job, dw, i);
+
+	if (job->user_fence.used)
+		i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
+						job->user_fence.value,
+						dw, i);
+
+	i = emit_pipe_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, lacks_render, dw, i);
+
+	i = emit_user_interrupt(dw, i);
+
+	i = emit_pipe_control_to_ring_end(job->q->hwe, dw, i);
+
+	xe_gt_assert(gt, i <= MAX_JOB_SIZE_DW);
+
+	xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
+}
+
+static void emit_migration_job_gen12(struct xe_sched_job *job,
+				     struct xe_lrc *lrc, u32 seqno)
+{
+	u32 dw[MAX_JOB_SIZE_DW], i = 0;
+
+	i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
+				seqno, dw, i);
+
+	dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE; /* Enabled again below */
+
+	i = emit_bb_start(job->batch_addr[0], BIT(8), dw, i);
+
+	/* XXX: Do we need this? Leaving for now. */
+	dw[i++] = preparser_disable(true);
+	i = emit_flush_invalidate(0, dw, i);
+	dw[i++] = preparser_disable(false);
+
+	i = emit_bb_start(job->batch_addr[1], BIT(8), dw, i);
+
+	dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | job->migrate_flush_flags |
+		MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_IMM_DW;
+	dw[i++] = xe_lrc_seqno_ggtt_addr(lrc) | MI_FLUSH_DW_USE_GTT;
+	dw[i++] = 0;
+	dw[i++] = seqno; /* value */
+
+	i = emit_user_interrupt(dw, i);
+
+	xe_gt_assert(job->q->gt, i <= MAX_JOB_SIZE_DW);
+
+	xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
+}
+
+static void emit_job_gen12_gsc(struct xe_sched_job *job)
+{
+	struct xe_gt *gt = job->q->gt;
+
+	xe_gt_assert(gt, job->q->width <= 1); /* no parallel submission for GSCCS */
+
+	__emit_job_gen12_simple(job, job->q->lrc,
+				job->batch_addr[0],
+				xe_sched_job_seqno(job));
+}
+
+static void emit_job_gen12_copy(struct xe_sched_job *job)
+{
+	int i;
+
+	if (xe_sched_job_is_migration(job->q)) {
+		emit_migration_job_gen12(job, job->q->lrc,
+					 xe_sched_job_seqno(job));
+		return;
+	}
+
+	for (i = 0; i < job->q->width; ++i)
+		__emit_job_gen12_simple(job, job->q->lrc + i,
+				        job->batch_addr[i],
+				        xe_sched_job_seqno(job));
+}
+
+static void emit_job_gen12_video(struct xe_sched_job *job)
+{
+	int i;
+
+	/* FIXME: Not doing parallel handshake for now */
+	for (i = 0; i < job->q->width; ++i)
+		__emit_job_gen12_video(job, job->q->lrc + i,
+				       job->batch_addr[i],
+				       xe_sched_job_seqno(job));
+}
+
+static void emit_job_gen12_render_compute(struct xe_sched_job *job)
+{
+	int i;
+
+	for (i = 0; i < job->q->width; ++i)
+		__emit_job_gen12_render_compute(job, job->q->lrc + i,
+						job->batch_addr[i],
+						xe_sched_job_seqno(job));
+}
+
+static const struct xe_ring_ops ring_ops_gen12_gsc = {
+	.emit_job = emit_job_gen12_gsc,
+};
+
+static const struct xe_ring_ops ring_ops_gen12_copy = {
+	.emit_job = emit_job_gen12_copy,
+};
+
+static const struct xe_ring_ops ring_ops_gen12_video = {
+	.emit_job = emit_job_gen12_video,
+};
+
+static const struct xe_ring_ops ring_ops_gen12_render_compute = {
+	.emit_job = emit_job_gen12_render_compute,
+};
+
+const struct xe_ring_ops *
+xe_ring_ops_get(struct xe_gt *gt, enum xe_engine_class class)
+{
+	switch (class) {
+	case XE_ENGINE_CLASS_OTHER:
+		return &ring_ops_gen12_gsc;
+	case XE_ENGINE_CLASS_COPY:
+		return &ring_ops_gen12_copy;
+	case XE_ENGINE_CLASS_VIDEO_DECODE:
+	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+		return &ring_ops_gen12_video;
+	case XE_ENGINE_CLASS_RENDER:
+	case XE_ENGINE_CLASS_COMPUTE:
+		return &ring_ops_gen12_render_compute;
+	default:
+		return NULL;
+	}
+}
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.h b/drivers/gpu/drm/xe/xe_ring_ops.h
new file mode 100644
index 000000000000..e942735d76a6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ring_ops.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_RING_OPS_H_
+#define _XE_RING_OPS_H_
+
+#include "xe_hw_engine_types.h"
+#include "xe_ring_ops_types.h"
+
+struct xe_gt;
+
+const struct xe_ring_ops *
+xe_ring_ops_get(struct xe_gt *gt, enum xe_engine_class class);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_ring_ops_types.h b/drivers/gpu/drm/xe/xe_ring_ops_types.h
new file mode 100644
index 000000000000..1ae56e2ee7b4
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ring_ops_types.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_RING_OPS_TYPES_H_
+#define _XE_RING_OPS_TYPES_H_
+
+struct xe_sched_job;
+
+#define MAX_JOB_SIZE_DW 48
+#define MAX_JOB_SIZE_BYTES (MAX_JOB_SIZE_DW * 4)
+
+/**
+ * struct xe_ring_ops - Ring operations
+ */
+struct xe_ring_ops {
+	/** @emit_job: Write job to ring */
+	void (*emit_job)(struct xe_sched_job *job);
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
new file mode 100644
index 000000000000..fb44cc7521d8
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -0,0 +1,325 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_rtp.h"
+
+#include <kunit/visibility.h>
+
+#include <drm/xe_drm.h>
+
+#include "xe_gt.h"
+#include "xe_gt_topology.h"
+#include "xe_macros.h"
+#include "xe_reg_sr.h"
+
+/**
+ * DOC: Register Table Processing
+ *
+ * Internal infrastructure to define how registers should be updated based on
+ * rules and actions. This can be used to define tables with multiple entries
+ * (one per register) that will be walked over at some point in time to apply
+ * the values to the registers that have matching rules.
+ */
+
+static bool has_samedia(const struct xe_device *xe)
+{
+	return xe->info.media_verx100 >= 1300;
+}
+
+static bool rule_matches(const struct xe_device *xe,
+			 struct xe_gt *gt,
+			 struct xe_hw_engine *hwe,
+			 const struct xe_rtp_rule *rules,
+			 unsigned int n_rules)
+{
+	const struct xe_rtp_rule *r;
+	unsigned int i;
+	bool match;
+
+	for (r = rules, i = 0; i < n_rules; r = &rules[++i]) {
+		switch (r->match_type) {
+		case XE_RTP_MATCH_PLATFORM:
+			match = xe->info.platform == r->platform;
+			break;
+		case XE_RTP_MATCH_SUBPLATFORM:
+			match = xe->info.platform == r->platform &&
+				xe->info.subplatform == r->subplatform;
+			break;
+		case XE_RTP_MATCH_GRAPHICS_VERSION:
+			match = xe->info.graphics_verx100 == r->ver_start &&
+				(!has_samedia(xe) || !xe_gt_is_media_type(gt));
+			break;
+		case XE_RTP_MATCH_GRAPHICS_VERSION_RANGE:
+			match = xe->info.graphics_verx100 >= r->ver_start &&
+				xe->info.graphics_verx100 <= r->ver_end &&
+				(!has_samedia(xe) || !xe_gt_is_media_type(gt));
+			break;
+		case XE_RTP_MATCH_GRAPHICS_STEP:
+			match = xe->info.step.graphics >= r->step_start &&
+				xe->info.step.graphics < r->step_end &&
+				(!has_samedia(xe) || !xe_gt_is_media_type(gt));
+			break;
+		case XE_RTP_MATCH_MEDIA_VERSION:
+			match = xe->info.media_verx100 == r->ver_start &&
+				(!has_samedia(xe) || xe_gt_is_media_type(gt));
+			break;
+		case XE_RTP_MATCH_MEDIA_VERSION_RANGE:
+			match = xe->info.media_verx100 >= r->ver_start &&
+				xe->info.media_verx100 <= r->ver_end &&
+				(!has_samedia(xe) || xe_gt_is_media_type(gt));
+			break;
+		case XE_RTP_MATCH_MEDIA_STEP:
+			match = xe->info.step.media >= r->step_start &&
+				xe->info.step.media < r->step_end &&
+				(!has_samedia(xe) || xe_gt_is_media_type(gt));
+			break;
+		case XE_RTP_MATCH_INTEGRATED:
+			match = !xe->info.is_dgfx;
+			break;
+		case XE_RTP_MATCH_DISCRETE:
+			match = xe->info.is_dgfx;
+			break;
+		case XE_RTP_MATCH_ENGINE_CLASS:
+			if (drm_WARN_ON(&xe->drm, !hwe))
+				return false;
+
+			match = hwe->class == r->engine_class;
+			break;
+		case XE_RTP_MATCH_NOT_ENGINE_CLASS:
+			if (drm_WARN_ON(&xe->drm, !hwe))
+				return false;
+
+			match = hwe->class != r->engine_class;
+			break;
+		case XE_RTP_MATCH_FUNC:
+			match = r->match_func(gt, hwe);
+			break;
+		default:
+			drm_warn(&xe->drm, "Invalid RTP match %u\n",
+				 r->match_type);
+			match = false;
+		}
+
+		if (!match)
+			return false;
+	}
+
+	return true;
+}
+
+static void rtp_add_sr_entry(const struct xe_rtp_action *action,
+			     struct xe_gt *gt,
+			     u32 mmio_base,
+			     struct xe_reg_sr *sr)
+{
+	struct xe_reg_sr_entry sr_entry = {
+		.reg = action->reg,
+		.clr_bits = action->clr_bits,
+		.set_bits = action->set_bits,
+		.read_mask = action->read_mask,
+	};
+
+	sr_entry.reg.addr += mmio_base;
+	xe_reg_sr_add(sr, &sr_entry, gt);
+}
+
+static bool rtp_process_one_sr(const struct xe_rtp_entry_sr *entry,
+			       struct xe_device *xe, struct xe_gt *gt,
+			       struct xe_hw_engine *hwe, struct xe_reg_sr *sr)
+{
+	const struct xe_rtp_action *action;
+	u32 mmio_base;
+	unsigned int i;
+
+	if (!rule_matches(xe, gt, hwe, entry->rules, entry->n_rules))
+		return false;
+
+	for (i = 0, action = &entry->actions[0]; i < entry->n_actions; action++, i++) {
+		if ((entry->flags & XE_RTP_ENTRY_FLAG_FOREACH_ENGINE) ||
+		    (action->flags & XE_RTP_ACTION_FLAG_ENGINE_BASE))
+			mmio_base = hwe->mmio_base;
+		else
+			mmio_base = 0;
+
+		rtp_add_sr_entry(action, gt, mmio_base, sr);
+	}
+
+	return true;
+}
+
+static void rtp_get_context(struct xe_rtp_process_ctx *ctx,
+			    struct xe_hw_engine **hwe,
+			    struct xe_gt **gt,
+			    struct xe_device **xe)
+{
+	switch (ctx->type) {
+	case XE_RTP_PROCESS_TYPE_GT:
+		*hwe = NULL;
+		*gt = ctx->gt;
+		*xe = gt_to_xe(*gt);
+		break;
+	case XE_RTP_PROCESS_TYPE_ENGINE:
+		*hwe = ctx->hwe;
+		*gt = (*hwe)->gt;
+		*xe = gt_to_xe(*gt);
+		break;
+	};
+}
+
+/**
+ * xe_rtp_process_ctx_enable_active_tracking - Enable tracking of active entries
+ *
+ * Set additional metadata to track what entries are considered "active", i.e.
+ * their rules match the condition. Bits are never cleared: entries with
+ * matching rules set the corresponding bit in the bitmap.
+ *
+ * @ctx: The context for processing the table
+ * @active_entries: bitmap to store the active entries
+ * @n_entries: number of entries to be processed
+ */
+void xe_rtp_process_ctx_enable_active_tracking(struct xe_rtp_process_ctx *ctx,
+					       unsigned long *active_entries,
+					       size_t n_entries)
+{
+	ctx->active_entries = active_entries;
+	ctx->n_entries = n_entries;
+}
+
+static void rtp_mark_active(struct xe_device *xe,
+			    struct xe_rtp_process_ctx *ctx,
+			    unsigned int first, unsigned int last)
+{
+	if (!ctx->active_entries)
+		return;
+
+	if (drm_WARN_ON(&xe->drm, last > ctx->n_entries))
+		return;
+
+	if (first == last)
+		bitmap_set(ctx->active_entries, first, 1);
+	else
+		bitmap_set(ctx->active_entries, first, last - first + 2);
+}
+
+/**
+ * xe_rtp_process_to_sr - Process all rtp @entries, adding the matching ones to
+ *                        the save-restore argument.
+ * @ctx: The context for processing the table, with one of device, gt or hwe
+ * @entries: Table with RTP definitions
+ * @sr: Save-restore struct where matching rules execute the action. This can be
+ *      viewed as the "coalesced view" of multiple the tables. The bits for each
+ *      register set are expected not to collide with previously added entries
+ *
+ * Walk the table pointed by @entries (with an empty sentinel) and add all
+ * entries with matching rules to @sr. If @hwe is not NULL, its mmio_base is
+ * used to calculate the right register offset
+ */
+void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx,
+			  const struct xe_rtp_entry_sr *entries,
+			  struct xe_reg_sr *sr)
+{
+	const struct xe_rtp_entry_sr *entry;
+	struct xe_hw_engine *hwe = NULL;
+	struct xe_gt *gt = NULL;
+	struct xe_device *xe = NULL;
+
+	rtp_get_context(ctx, &hwe, &gt, &xe);
+
+	for (entry = entries; entry && entry->name; entry++) {
+		bool match = false;
+
+		if (entry->flags & XE_RTP_ENTRY_FLAG_FOREACH_ENGINE) {
+			struct xe_hw_engine *each_hwe;
+			enum xe_hw_engine_id id;
+
+			for_each_hw_engine(each_hwe, gt, id)
+				match |= rtp_process_one_sr(entry, xe, gt,
+							    each_hwe, sr);
+		} else {
+			match = rtp_process_one_sr(entry, xe, gt, hwe, sr);
+		}
+
+		if (match)
+			rtp_mark_active(xe, ctx, entry - entries,
+					entry - entries);
+	}
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_rtp_process_to_sr);
+
+/**
+ * xe_rtp_process - Process all rtp @entries, without running any action
+ * @ctx: The context for processing the table, with one of device, gt or hwe
+ * @entries: Table with RTP definitions
+ *
+ * Walk the table pointed by @entries (with an empty sentinel), executing the
+ * rules. A few differences from xe_rtp_process_to_sr():
+ *
+ * 1. There is no action associated with each entry since this uses
+ *    struct xe_rtp_entry. Its main use is for marking active workarounds via
+ *    xe_rtp_process_ctx_enable_active_tracking().
+ * 2. There is support for OR operations by having entries with no name.
+ */
+void xe_rtp_process(struct xe_rtp_process_ctx *ctx,
+		    const struct xe_rtp_entry *entries)
+{
+	const struct xe_rtp_entry *entry, *first_entry;
+	struct xe_hw_engine *hwe;
+	struct xe_gt *gt;
+	struct xe_device *xe;
+
+	rtp_get_context(ctx, &hwe, &gt, &xe);
+
+	first_entry = entries;
+	if (drm_WARN_ON(&xe->drm, !first_entry->name))
+		return;
+
+	for (entry = entries; entry && entry->rules; entry++) {
+		if (entry->name)
+			first_entry = entry;
+
+		if (!rule_matches(xe, gt, hwe, entry->rules, entry->n_rules))
+			continue;
+
+		/* Fast-forward entry, eliminating the OR'ed entries */
+		for (entry++; entry && entry->rules; entry++)
+			if (entry->name)
+				break;
+		entry--;
+
+		rtp_mark_active(xe, ctx, first_entry - entries,
+				entry - entries);
+	}
+}
+
+bool xe_rtp_match_even_instance(const struct xe_gt *gt,
+				const struct xe_hw_engine *hwe)
+{
+	return hwe->instance % 2 == 0;
+}
+
+bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt,
+					  const struct xe_hw_engine *hwe)
+{
+	u64 render_compute_mask = gt->info.engine_mask &
+		(XE_HW_ENGINE_CCS_MASK | XE_HW_ENGINE_RCS_MASK);
+
+	return render_compute_mask &&
+		hwe->engine_id == __ffs(render_compute_mask);
+}
+
+bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt,
+					 const struct xe_hw_engine *hwe)
+{
+	unsigned int dss_per_gslice = 4;
+	unsigned int dss;
+
+	if (drm_WARN(&gt_to_xe(gt)->drm, xe_dss_mask_empty(gt->fuse_topo.g_dss_mask),
+		     "Checking gslice for platform without geometry pipeline\n"))
+		return false;
+
+	dss = xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0);
+
+	return dss >= dss_per_gslice;
+}
diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
new file mode 100644
index 000000000000..c56fedd126e6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -0,0 +1,430 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_RTP_
+#define _XE_RTP_
+
+#include <linux/types.h>
+#include <linux/xarray.h>
+
+#define _XE_RTP_INCLUDE_PRIVATE_HELPERS
+
+#include "xe_rtp_helpers.h"
+#include "xe_rtp_types.h"
+
+#undef _XE_RTP_INCLUDE_PRIVATE_HELPERS
+
+/*
+ * Register table poke infrastructure
+ */
+
+struct xe_hw_engine;
+struct xe_gt;
+struct xe_reg_sr;
+
+/*
+ * Macros to encode rules to match against platform, IP version, stepping, etc.
+ * Shouldn't be used directly - see XE_RTP_RULES()
+ */
+#define _XE_RTP_RULE_PLATFORM(plat__)						\
+	{ .match_type = XE_RTP_MATCH_PLATFORM, .platform = plat__ }
+
+#define _XE_RTP_RULE_SUBPLATFORM(plat__, sub__)					\
+	{ .match_type = XE_RTP_MATCH_SUBPLATFORM,				\
+	  .platform = plat__, .subplatform = sub__ }
+
+#define _XE_RTP_RULE_GRAPHICS_STEP(start__, end__)				\
+	{ .match_type = XE_RTP_MATCH_GRAPHICS_STEP,				\
+	  .step_start = start__, .step_end = end__ }
+
+#define _XE_RTP_RULE_MEDIA_STEP(start__, end__)					\
+	{ .match_type = XE_RTP_MATCH_MEDIA_STEP,				\
+	  .step_start = start__, .step_end = end__ }
+
+#define _XE_RTP_RULE_ENGINE_CLASS(cls__)					\
+	{ .match_type = XE_RTP_MATCH_ENGINE_CLASS,				\
+	  .engine_class = (cls__) }
+
+/**
+ * XE_RTP_RULE_PLATFORM - Create rule matching platform
+ * @plat_: platform to match
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_PLATFORM(plat_)						\
+	_XE_RTP_RULE_PLATFORM(XE_##plat_)
+
+/**
+ * XE_RTP_RULE_SUBPLATFORM - Create rule matching platform and sub-platform
+ * @plat_: platform to match
+ * @sub_: sub-platform to match
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_SUBPLATFORM(plat_, sub_)					\
+	_XE_RTP_RULE_SUBPLATFORM(XE_##plat_, XE_SUBPLATFORM_##plat_##_##sub_)
+
+/**
+ * XE_RTP_RULE_GRAPHICS_STEP - Create rule matching graphics stepping
+ * @start_: First stepping matching the rule
+ * @end_: First stepping that does not match the rule
+ *
+ * Note that the range matching this rule is [ @start_, @end_ ), i.e. inclusive
+ * on the left, exclusive on the right.
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_GRAPHICS_STEP(start_, end_)					\
+	_XE_RTP_RULE_GRAPHICS_STEP(STEP_##start_, STEP_##end_)
+
+/**
+ * XE_RTP_RULE_MEDIA_STEP - Create rule matching media stepping
+ * @start_: First stepping matching the rule
+ * @end_: First stepping that does not match the rule
+ *
+ * Note that the range matching this rule is [ @start_, @end_ ), i.e. inclusive
+ * on the left, exclusive on the right.
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_MEDIA_STEP(start_, end_)					\
+	_XE_RTP_RULE_MEDIA_STEP(STEP_##start_, STEP_##end_)
+
+/**
+ * XE_RTP_RULE_ENGINE_CLASS - Create rule matching an engine class
+ * @cls_: Engine class to match
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_ENGINE_CLASS(cls_)						\
+	_XE_RTP_RULE_ENGINE_CLASS(XE_ENGINE_CLASS_##cls_)
+
+/**
+ * XE_RTP_RULE_FUNC - Create rule using callback function for match
+ * @func__: Function to call to decide if rule matches
+ *
+ * This allows more complex checks to be performed. The ``XE_RTP``
+ * infrastructure will simply call the function @func_ passed to decide if this
+ * rule matches the device.
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_FUNC(func__)						\
+	{ .match_type = XE_RTP_MATCH_FUNC,					\
+	  .match_func = (func__) }
+
+/**
+ * XE_RTP_RULE_GRAPHICS_VERSION - Create rule matching graphics version
+ * @ver__: Graphics IP version to match
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_GRAPHICS_VERSION(ver__)					\
+	{ .match_type = XE_RTP_MATCH_GRAPHICS_VERSION,				\
+	  .ver_start = ver__, }
+
+/**
+ * XE_RTP_RULE_GRAPHICS_VERSION_RANGE - Create rule matching a range of graphics version
+ * @ver_start__: First graphics IP version to match
+ * @ver_end__: Last graphics IP version to match
+ *
+ * Note that the range matching this rule is [ @ver_start__, @ver_end__ ], i.e.
+ * inclusive on boths sides
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_GRAPHICS_VERSION_RANGE(ver_start__, ver_end__)		\
+	{ .match_type = XE_RTP_MATCH_GRAPHICS_VERSION_RANGE,			\
+	  .ver_start = ver_start__, .ver_end = ver_end__, }
+
+/**
+ * XE_RTP_RULE_MEDIA_VERSION - Create rule matching media version
+ * @ver__: Graphics IP version to match
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_MEDIA_VERSION(ver__)					\
+	{ .match_type = XE_RTP_MATCH_MEDIA_VERSION,				\
+	  .ver_start = ver__, }
+
+/**
+ * XE_RTP_RULE_MEDIA_VERSION_RANGE - Create rule matching a range of media version
+ * @ver_start__: First media IP version to match
+ * @ver_end__: Last media IP version to match
+ *
+ * Note that the range matching this rule is [ @ver_start__, @ver_end__ ], i.e.
+ * inclusive on boths sides
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_MEDIA_VERSION_RANGE(ver_start__, ver_end__)			\
+	{ .match_type = XE_RTP_MATCH_MEDIA_VERSION_RANGE,			\
+	  .ver_start = ver_start__, .ver_end = ver_end__, }
+
+/**
+ * XE_RTP_RULE_IS_INTEGRATED - Create a rule matching integrated graphics devices
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_IS_INTEGRATED						\
+	{ .match_type = XE_RTP_MATCH_INTEGRATED }
+
+/**
+ * XE_RTP_RULE_IS_DISCRETE - Create a rule matching discrete graphics devices
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_IS_DISCRETE							\
+	{ .match_type = XE_RTP_MATCH_DISCRETE }
+
+/**
+ * XE_RTP_ACTION_WR - Helper to write a value to the register, overriding all
+ *                    the bits
+ * @reg_: Register
+ * @val_: Value to set
+ * @...: Additional fields to override in the struct xe_rtp_action entry
+ *
+ * The correspondent notation in bspec is:
+ *
+ *	REGNAME = VALUE
+ */
+#define XE_RTP_ACTION_WR(reg_, val_, ...)					\
+	{ .reg = XE_RTP_DROP_CAST(reg_),					\
+	  .clr_bits = ~0u, .set_bits = (val_),					\
+	  .read_mask = (~0u), ##__VA_ARGS__ }
+
+/**
+ * XE_RTP_ACTION_SET - Set bits from @val_ in the register.
+ * @reg_: Register
+ * @val_: Bits to set in the register
+ * @...: Additional fields to override in the struct xe_rtp_action entry
+ *
+ * For masked registers this translates to a single write, while for other
+ * registers it's a RMW. The correspondent bspec notation is (example for bits 2
+ * and 5, but could be any):
+ *
+ *	REGNAME[2] = 1
+ *	REGNAME[5] = 1
+ */
+#define XE_RTP_ACTION_SET(reg_, val_, ...)					\
+	{ .reg = XE_RTP_DROP_CAST(reg_),					\
+	  .clr_bits = val_, .set_bits = val_,					\
+	  .read_mask = val_, ##__VA_ARGS__ }
+
+/**
+ * XE_RTP_ACTION_CLR: Clear bits from @val_ in the register.
+ * @reg_: Register
+ * @val_: Bits to clear in the register
+ * @...: Additional fields to override in the struct xe_rtp_action entry
+ *
+ * For masked registers this translates to a single write, while for other
+ * registers it's a RMW. The correspondent bspec notation is (example for bits 2
+ * and 5, but could be any):
+ *
+ *	REGNAME[2] = 0
+ *	REGNAME[5] = 0
+ */
+#define XE_RTP_ACTION_CLR(reg_, val_, ...)					\
+	{ .reg = XE_RTP_DROP_CAST(reg_),					\
+	  .clr_bits = val_, .set_bits = 0,					\
+	  .read_mask = val_, ##__VA_ARGS__ }
+
+/**
+ * XE_RTP_ACTION_FIELD_SET: Set a bit range
+ * @reg_: Register
+ * @mask_bits_: Mask of bits to be changed in the register, forming a field
+ * @val_: Value to set in the field denoted by @mask_bits_
+ * @...: Additional fields to override in the struct xe_rtp_action entry
+ *
+ * For masked registers this translates to a single write, while for other
+ * registers it's a RMW. The correspondent bspec notation is:
+ *
+ *	REGNAME[<end>:<start>] = VALUE
+ */
+#define XE_RTP_ACTION_FIELD_SET(reg_, mask_bits_, val_, ...)			\
+	{ .reg = XE_RTP_DROP_CAST(reg_),					\
+	  .clr_bits = mask_bits_, .set_bits = val_,				\
+	  .read_mask = mask_bits_, ##__VA_ARGS__ }
+
+#define XE_RTP_ACTION_FIELD_SET_NO_READ_MASK(reg_, mask_bits_, val_, ...)	\
+	{ .reg = XE_RTP_DROP_CAST(reg_),					\
+	  .clr_bits = (mask_bits_), .set_bits = (val_),				\
+	  .read_mask = 0, ##__VA_ARGS__ }
+
+/**
+ * XE_RTP_ACTION_WHITELIST - Add register to userspace whitelist
+ * @reg_: Register
+ * @val_: Whitelist-specific flags to set
+ * @...: Additional fields to override in the struct xe_rtp_action entry
+ *
+ * Add a register to the whitelist, allowing userspace to modify the ster with
+ * regular user privileges.
+ */
+#define XE_RTP_ACTION_WHITELIST(reg_, val_, ...)				\
+	/* TODO fail build if ((flags) & ~(RING_FORCE_TO_NONPRIV_MASK_VALID)) */\
+	{ .reg = XE_RTP_DROP_CAST(reg_),					\
+	  .set_bits = val_,							\
+	  .clr_bits = RING_FORCE_TO_NONPRIV_MASK_VALID,				\
+	  ##__VA_ARGS__ }
+
+/**
+ * XE_RTP_NAME - Helper to set the name in xe_rtp_entry
+ * @s_: Name describing this rule, often a HW-specific number
+ *
+ * TODO: maybe move this behind a debug config?
+ */
+#define XE_RTP_NAME(s_)	.name = (s_)
+
+/**
+ * XE_RTP_ENTRY_FLAG - Helper to add multiple flags to a struct xe_rtp_entry_sr
+ * @...: Entry flags, without the ``XE_RTP_ENTRY_FLAG_`` prefix
+ *
+ * Helper to automatically add a ``XE_RTP_ENTRY_FLAG_`` prefix to the flags
+ * when defining struct xe_rtp_entry entries. Example:
+ *
+ * .. code-block:: c
+ *
+ *	const struct xe_rtp_entry_sr wa_entries[] = {
+ *		...
+ *		{ XE_RTP_NAME("test-entry"),
+ *		  ...
+ *		  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
+ *		  ...
+ *		},
+ *		...
+ *	};
+ */
+#define XE_RTP_ENTRY_FLAG(...)							\
+	.flags = (XE_RTP_PASTE_FOREACH(ENTRY_FLAG_, BITWISE_OR, (__VA_ARGS__)))
+
+/**
+ * XE_RTP_ACTION_FLAG - Helper to add multiple flags to a struct xe_rtp_action
+ * @...: Action flags, without the ``XE_RTP_ACTION_FLAG_`` prefix
+ *
+ * Helper to automatically add a ``XE_RTP_ACTION_FLAG_`` prefix to the flags
+ * when defining struct xe_rtp_action entries. Example:
+ *
+ * .. code-block:: c
+ *
+ *	const struct xe_rtp_entry_sr wa_entries[] = {
+ *		...
+ *		{ XE_RTP_NAME("test-entry"),
+ *		  ...
+ *		  XE_RTP_ACTION_SET(..., XE_RTP_ACTION_FLAG(FOREACH_ENGINE)),
+ *		  ...
+ *		},
+ *		...
+ *	};
+ */
+#define XE_RTP_ACTION_FLAG(...)							\
+	.flags = (XE_RTP_PASTE_FOREACH(ACTION_FLAG_, BITWISE_OR, (__VA_ARGS__)))
+
+/**
+ * XE_RTP_RULES - Helper to set multiple rules to a struct xe_rtp_entry_sr entry
+ * @...: Rules
+ *
+ * At least one rule is needed and up to 4 are supported. Multiple rules are
+ * AND'ed together, i.e. all the rules must evaluate to true for the entry to
+ * be processed. See XE_RTP_MATCH_* for the possible match rules. Example:
+ *
+ * .. code-block:: c
+ *
+ *	const struct xe_rtp_entry_sr wa_entries[] = {
+ *		...
+ *		{ XE_RTP_NAME("test-entry"),
+ *		  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
+ *		  ...
+ *		},
+ *		...
+ *	};
+ */
+#define XE_RTP_RULES(...)							\
+	.n_rules = _XE_COUNT_ARGS(__VA_ARGS__),					\
+	.rules = (const struct xe_rtp_rule[]) {					\
+		XE_RTP_PASTE_FOREACH(RULE_, COMMA, (__VA_ARGS__))	\
+	}
+
+/**
+ * XE_RTP_ACTIONS - Helper to set multiple actions to a struct xe_rtp_entry_sr
+ * @...: Actions to be taken
+ *
+ * At least one action is needed and up to 4 are supported. See XE_RTP_ACTION_*
+ * for the possible actions. Example:
+ *
+ * .. code-block:: c
+ *
+ *	const struct xe_rtp_entry_sr wa_entries[] = {
+ *		...
+ *		{ XE_RTP_NAME("test-entry"),
+ *		  XE_RTP_RULES(...),
+ *		  XE_RTP_ACTIONS(SET(..), SET(...), CLR(...)),
+ *		  ...
+ *		},
+ *		...
+ *	};
+ */
+#define XE_RTP_ACTIONS(...)							\
+	.n_actions = _XE_COUNT_ARGS(__VA_ARGS__),				\
+	.actions = (const struct xe_rtp_action[]) {				\
+		XE_RTP_PASTE_FOREACH(ACTION_, COMMA, (__VA_ARGS__))	\
+	}
+
+#define XE_RTP_PROCESS_CTX_INITIALIZER(arg__) _Generic((arg__),							\
+	struct xe_hw_engine * :	(struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_ENGINE },	\
+	struct xe_gt * :	(struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_GT })
+
+void xe_rtp_process_ctx_enable_active_tracking(struct xe_rtp_process_ctx *ctx,
+					       unsigned long *active_entries,
+					       size_t n_entries);
+
+void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx,
+			  const struct xe_rtp_entry_sr *entries,
+			  struct xe_reg_sr *sr);
+
+void xe_rtp_process(struct xe_rtp_process_ctx *ctx,
+		    const struct xe_rtp_entry *entries);
+
+/* Match functions to be used with XE_RTP_MATCH_FUNC */
+
+/**
+ * xe_rtp_match_even_instance - Match if engine instance is even
+ * @gt: GT structure
+ * @hwe: Engine instance
+ *
+ * Returns: true if engine instance is even, false otherwise
+ */
+bool xe_rtp_match_even_instance(const struct xe_gt *gt,
+				const struct xe_hw_engine *hwe);
+
+/*
+ * xe_rtp_match_first_render_or_compute - Match if it's first render or compute
+ * engine in the GT
+ *
+ * @gt: GT structure
+ * @hwe: Engine instance
+ *
+ * Registers on the render reset domain need to have their values re-applied
+ * when any of those engines are reset. Since the engines reset together, a
+ * programming can be set to just one of them. For simplicity the first engine
+ * of either render or compute class can be chosen.
+ *
+ * Returns: true if engine id is the first to match the render reset domain,
+ * false otherwise.
+ */
+bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt,
+					  const struct xe_hw_engine *hwe);
+
+/*
+ * xe_rtp_match_first_gslice_fused_off - Match when first gslice is fused off
+ *
+ * @gt: GT structure
+ * @hwe: Engine instance
+ *
+ * Returns: true if first gslice is fused off, false otherwise.
+ */
+bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt,
+					 const struct xe_hw_engine *hwe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_rtp_helpers.h b/drivers/gpu/drm/xe/xe_rtp_helpers.h
new file mode 100644
index 000000000000..181b6290fac3
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_rtp_helpers.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_RTP_HELPERS_
+#define _XE_RTP_HELPERS_
+
+#ifndef _XE_RTP_INCLUDE_PRIVATE_HELPERS
+#error "This header is supposed to be included by xe_rtp.h only"
+#endif
+
+/*
+ * Helper macros - not to be used outside this header.
+ */
+#define _XE_ESC(...) __VA_ARGS__
+#define _XE_COUNT_ARGS(...) _XE_ESC(__XE_COUNT_ARGS(__VA_ARGS__, 5, 4, 3, 2, 1,))
+#define __XE_COUNT_ARGS(_, _5, _4, _3, _2, X_, ...) X_
+
+#define _XE_FIRST(...) _XE_ESC(__XE_FIRST(__VA_ARGS__,))
+#define __XE_FIRST(x_, ...) x_
+#define _XE_TUPLE_TAIL(...) _XE_ESC(__XE_TUPLE_TAIL(__VA_ARGS__))
+#define __XE_TUPLE_TAIL(x_, ...) (__VA_ARGS__)
+
+#define _XE_DROP_FIRST(x_, ...) __VA_ARGS__
+
+#define _XE_RTP_CONCAT(a, b) __XE_RTP_CONCAT(a, b)
+#define __XE_RTP_CONCAT(a, b) XE_RTP_ ## a ## b
+
+#define __XE_RTP_PASTE_SEP_COMMA		,
+#define __XE_RTP_PASTE_SEP_BITWISE_OR		|
+
+/*
+ * XE_RTP_PASTE_FOREACH - Paste XE_RTP_<@prefix_> on each element of the tuple
+ * @args, with the end result separated by @sep_. @sep must be one of the
+ * previously declared macros __XE_RTP_PASTE_SEP_*, or declared with such
+ * prefix.
+ *
+ * Examples:
+ *
+ * 1) XE_RTP_PASTE_FOREACH(TEST_, COMMA, (FOO, BAR))
+ *    expands to:
+ *
+ *	XE_RTP_TEST_FOO , XE_RTP_TEST_BAR
+ *
+ * 2) XE_RTP_PASTE_FOREACH(TEST2_, COMMA, (FOO))
+ *    expands to:
+ *
+ *	XE_RTP_TEST2_FOO
+ *
+ * 3) XE_RTP_PASTE_FOREACH(TEST3, BITWISE_OR, (FOO, BAR))
+ *    expands to:
+ *
+ *	XE_RTP_TEST3_FOO | XE_RTP_TEST3_BAR
+ *
+ * 4) #define __XE_RTP_PASTE_SEP_MY_SEP	BANANA
+ *    XE_RTP_PASTE_FOREACH(TEST_, MY_SEP, (FOO, BAR))
+ *    expands to:
+ *
+ *	XE_RTP_TEST_FOO BANANA XE_RTP_TEST_BAR
+ */
+#define XE_RTP_PASTE_FOREACH(prefix_, sep_, args_) _XE_ESC(_XE_RTP_CONCAT(PASTE_, _XE_COUNT_ARGS args_)(prefix_, sep_, args_))
+#define XE_RTP_PASTE_1(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_)
+#define XE_RTP_PASTE_2(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_1(prefix_, sep_, _XE_TUPLE_TAIL args_)
+#define XE_RTP_PASTE_3(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_2(prefix_, sep_, _XE_TUPLE_TAIL args_)
+#define XE_RTP_PASTE_4(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_3(prefix_, sep_, _XE_TUPLE_TAIL args_)
+
+/*
+ * XE_RTP_DROP_CAST - Drop cast to convert a compound statement to a initializer
+ *
+ * Example:
+ *
+ *	#define foo(a_)	((struct foo){ .a = a_ })
+ *	XE_RTP_DROP_CAST(foo(10))
+ *	expands to:
+ *
+ *	{ .a = 10 }
+ */
+#define XE_RTP_DROP_CAST(...) _XE_ESC(_XE_DROP_FIRST _XE_ESC __VA_ARGS__)
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h
new file mode 100644
index 000000000000..637acc7626a4
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_rtp_types.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_RTP_TYPES_
+#define _XE_RTP_TYPES_
+
+#include <linux/types.h>
+
+#include "regs/xe_reg_defs.h"
+
+struct xe_hw_engine;
+struct xe_gt;
+
+/**
+ * struct xe_rtp_action - action to take for any matching rule
+ *
+ * This struct records what action should be taken in a register that has a
+ * matching rule. Example of actions: set/clear bits.
+ */
+struct xe_rtp_action {
+	/** @reg: Register */
+	struct xe_reg		reg;
+	/**
+	 * @clr_bits: bits to clear when updating register. It's always a
+	 * superset of bits being modified
+	 */
+	u32			clr_bits;
+	/** @set_bits: bits to set when updating register */
+	u32			set_bits;
+#define XE_RTP_NOCHECK		.read_mask = 0
+	/** @read_mask: mask for bits to consider when reading value back */
+	u32			read_mask;
+#define XE_RTP_ACTION_FLAG_ENGINE_BASE		BIT(0)
+	/** @flags: flags to apply on rule evaluation or action */
+	u8			flags;
+};
+
+enum {
+	XE_RTP_MATCH_PLATFORM,
+	XE_RTP_MATCH_SUBPLATFORM,
+	XE_RTP_MATCH_GRAPHICS_VERSION,
+	XE_RTP_MATCH_GRAPHICS_VERSION_RANGE,
+	XE_RTP_MATCH_GRAPHICS_STEP,
+	XE_RTP_MATCH_MEDIA_VERSION,
+	XE_RTP_MATCH_MEDIA_VERSION_RANGE,
+	XE_RTP_MATCH_MEDIA_STEP,
+	XE_RTP_MATCH_INTEGRATED,
+	XE_RTP_MATCH_DISCRETE,
+	XE_RTP_MATCH_ENGINE_CLASS,
+	XE_RTP_MATCH_NOT_ENGINE_CLASS,
+	XE_RTP_MATCH_FUNC,
+};
+
+/** struct xe_rtp_rule - match rule for processing entry */
+struct xe_rtp_rule {
+	u8 match_type;
+
+	/* match filters */
+	union {
+		/* MATCH_PLATFORM / MATCH_SUBPLATFORM */
+		struct {
+			u8 platform;
+			u8 subplatform;
+		};
+		/*
+		 * MATCH_GRAPHICS_VERSION / XE_RTP_MATCH_GRAPHICS_VERSION_RANGE /
+		 * MATCH_MEDIA_VERSION  / XE_RTP_MATCH_MEDIA_VERSION_RANGE
+		 */
+		struct {
+			u32 ver_start;
+#define XE_RTP_END_VERSION_UNDEFINED	U32_MAX
+			u32 ver_end;
+		};
+		/* MATCH_STEP */
+		struct {
+			u8 step_start;
+			u8 step_end;
+		};
+		/* MATCH_ENGINE_CLASS / MATCH_NOT_ENGINE_CLASS */
+		struct {
+			u8 engine_class;
+		};
+		/* MATCH_FUNC */
+		bool (*match_func)(const struct xe_gt *gt,
+				   const struct xe_hw_engine *hwe);
+	};
+};
+
+/** struct xe_rtp_entry_sr - Entry in an rtp table */
+struct xe_rtp_entry_sr {
+	const char *name;
+	const struct xe_rtp_action *actions;
+	const struct xe_rtp_rule *rules;
+	u8 n_rules;
+	u8 n_actions;
+#define XE_RTP_ENTRY_FLAG_FOREACH_ENGINE	BIT(0)
+	u8 flags;
+};
+
+/** struct xe_rtp_entry - Entry in an rtp table, with no action associated */
+struct xe_rtp_entry {
+	const char *name;
+	const struct xe_rtp_rule *rules;
+	u8 n_rules;
+};
+
+enum xe_rtp_process_type {
+	XE_RTP_PROCESS_TYPE_GT,
+	XE_RTP_PROCESS_TYPE_ENGINE,
+};
+
+struct xe_rtp_process_ctx {
+	union {
+		struct xe_gt *gt;
+		struct xe_hw_engine *hwe;
+	};
+	enum xe_rtp_process_type type;
+	unsigned long *active_entries;
+	size_t n_entries;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c
new file mode 100644
index 000000000000..2c4632259edd
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sa.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_sa.h"
+
+#include <linux/kernel.h>
+
+#include <drm/drm_managed.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_map.h"
+
+static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_sa_manager *sa_manager = arg;
+	struct xe_bo *bo = sa_manager->bo;
+
+	if (!bo) {
+		drm_err(drm, "no bo for sa manager\n");
+		return;
+	}
+
+	drm_suballoc_manager_fini(&sa_manager->base);
+
+	if (bo->vmap.is_iomem)
+		kvfree(sa_manager->cpu_ptr);
+
+	xe_bo_unpin_map_no_vm(bo);
+	sa_manager->bo = NULL;
+}
+
+struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 align)
+{
+	struct xe_device *xe = tile_to_xe(tile);
+	u32 managed_size = size - SZ_4K;
+	struct xe_bo *bo;
+	int ret;
+
+	struct xe_sa_manager *sa_manager = drmm_kzalloc(&tile_to_xe(tile)->drm,
+							sizeof(*sa_manager),
+							GFP_KERNEL);
+	if (!sa_manager)
+		return ERR_PTR(-ENOMEM);
+
+	sa_manager->bo = NULL;
+
+	bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel,
+				  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+				  XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(bo)) {
+		drm_err(&xe->drm, "failed to allocate bo for sa manager: %ld\n",
+			PTR_ERR(bo));
+		return (struct xe_sa_manager *)bo;
+	}
+	sa_manager->bo = bo;
+
+	drm_suballoc_manager_init(&sa_manager->base, managed_size, align);
+	sa_manager->gpu_addr = xe_bo_ggtt_addr(bo);
+
+	if (bo->vmap.is_iomem) {
+		sa_manager->cpu_ptr = kvzalloc(managed_size, GFP_KERNEL);
+		if (!sa_manager->cpu_ptr) {
+			xe_bo_unpin_map_no_vm(sa_manager->bo);
+			sa_manager->bo = NULL;
+			return ERR_PTR(-ENOMEM);
+		}
+	} else {
+		sa_manager->cpu_ptr = bo->vmap.vaddr;
+		memset(sa_manager->cpu_ptr, 0, bo->ttm.base.size);
+	}
+
+	ret = drmm_add_action_or_reset(&xe->drm, xe_sa_bo_manager_fini,
+				       sa_manager);
+	if (ret)
+		return ERR_PTR(ret);
+
+	return sa_manager;
+}
+
+struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager,
+				  unsigned int size)
+{
+	return drm_suballoc_new(&sa_manager->base, size, GFP_KERNEL, true, 0);
+}
+
+void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo)
+{
+	struct xe_sa_manager *sa_manager = to_xe_sa_manager(sa_bo->manager);
+	struct xe_device *xe = tile_to_xe(sa_manager->bo->tile);
+
+	if (!sa_manager->bo->vmap.is_iomem)
+		return;
+
+	xe_map_memcpy_to(xe, &sa_manager->bo->vmap, drm_suballoc_soffset(sa_bo),
+			 xe_sa_bo_cpu_addr(sa_bo),
+			 drm_suballoc_size(sa_bo));
+}
+
+void xe_sa_bo_free(struct drm_suballoc *sa_bo,
+		   struct dma_fence *fence)
+{
+	drm_suballoc_free(sa_bo, fence);
+}
diff --git a/drivers/gpu/drm/xe/xe_sa.h b/drivers/gpu/drm/xe/xe_sa.h
new file mode 100644
index 000000000000..4e96483057d7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sa.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+#ifndef _XE_SA_H_
+#define _XE_SA_H_
+
+#include "xe_sa_types.h"
+
+struct dma_fence;
+struct xe_bo;
+struct xe_tile;
+
+struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 align);
+
+struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager,
+				  u32 size);
+void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo);
+void xe_sa_bo_free(struct drm_suballoc *sa_bo,
+		   struct dma_fence *fence);
+
+static inline struct xe_sa_manager *
+to_xe_sa_manager(struct drm_suballoc_manager *mng)
+{
+	return container_of(mng, struct xe_sa_manager, base);
+}
+
+static inline u64 xe_sa_bo_gpu_addr(struct drm_suballoc *sa)
+{
+	return to_xe_sa_manager(sa->manager)->gpu_addr +
+		drm_suballoc_soffset(sa);
+}
+
+static inline void *xe_sa_bo_cpu_addr(struct drm_suballoc *sa)
+{
+	return to_xe_sa_manager(sa->manager)->cpu_ptr +
+		drm_suballoc_soffset(sa);
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sa_types.h b/drivers/gpu/drm/xe/xe_sa_types.h
new file mode 100644
index 000000000000..2ef896aeca1d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sa_types.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+#ifndef _XE_SA_TYPES_H_
+#define _XE_SA_TYPES_H_
+
+#include <drm/drm_suballoc.h>
+
+struct xe_bo;
+
+struct xe_sa_manager {
+	struct drm_suballoc_manager base;
+	struct xe_bo *bo;
+	u64 gpu_addr;
+	void *cpu_ptr;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
new file mode 100644
index 000000000000..4e2ccad0e52f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -0,0 +1,279 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_sched_job.h"
+
+#include <linux/dma-fence-array.h>
+#include <linux/slab.h>
+
+#include "xe_device.h"
+#include "xe_exec_queue.h"
+#include "xe_gt.h"
+#include "xe_hw_engine_types.h"
+#include "xe_hw_fence.h"
+#include "xe_lrc.h"
+#include "xe_macros.h"
+#include "xe_trace.h"
+#include "xe_vm.h"
+
+static struct kmem_cache *xe_sched_job_slab;
+static struct kmem_cache *xe_sched_job_parallel_slab;
+
+int __init xe_sched_job_module_init(void)
+{
+	xe_sched_job_slab =
+		kmem_cache_create("xe_sched_job",
+				  sizeof(struct xe_sched_job) +
+				  sizeof(u64), 0,
+				  SLAB_HWCACHE_ALIGN, NULL);
+	if (!xe_sched_job_slab)
+		return -ENOMEM;
+
+	xe_sched_job_parallel_slab =
+		kmem_cache_create("xe_sched_job_parallel",
+				  sizeof(struct xe_sched_job) +
+				  sizeof(u64) *
+				  XE_HW_ENGINE_MAX_INSTANCE, 0,
+				  SLAB_HWCACHE_ALIGN, NULL);
+	if (!xe_sched_job_parallel_slab) {
+		kmem_cache_destroy(xe_sched_job_slab);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+void xe_sched_job_module_exit(void)
+{
+	kmem_cache_destroy(xe_sched_job_slab);
+	kmem_cache_destroy(xe_sched_job_parallel_slab);
+}
+
+static struct xe_sched_job *job_alloc(bool parallel)
+{
+	return kmem_cache_zalloc(parallel ? xe_sched_job_parallel_slab :
+				 xe_sched_job_slab, GFP_KERNEL);
+}
+
+bool xe_sched_job_is_migration(struct xe_exec_queue *q)
+{
+	return q->vm && (q->vm->flags & XE_VM_FLAG_MIGRATION);
+}
+
+static void job_free(struct xe_sched_job *job)
+{
+	struct xe_exec_queue *q = job->q;
+	bool is_migration = xe_sched_job_is_migration(q);
+
+	kmem_cache_free(xe_exec_queue_is_parallel(job->q) || is_migration ?
+			xe_sched_job_parallel_slab : xe_sched_job_slab, job);
+}
+
+static struct xe_device *job_to_xe(struct xe_sched_job *job)
+{
+	return gt_to_xe(job->q->gt);
+}
+
+struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
+					 u64 *batch_addr)
+{
+	struct xe_sched_job *job;
+	struct dma_fence **fences;
+	bool is_migration = xe_sched_job_is_migration(q);
+	int err;
+	int i, j;
+	u32 width;
+
+	/* only a kernel context can submit a vm-less job */
+	XE_WARN_ON(!q->vm && !(q->flags & EXEC_QUEUE_FLAG_KERNEL));
+
+	/* Migration and kernel engines have their own locking */
+	if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) {
+		lockdep_assert_held(&q->vm->lock);
+		if (!xe_vm_in_lr_mode(q->vm))
+			xe_vm_assert_held(q->vm);
+	}
+
+	job = job_alloc(xe_exec_queue_is_parallel(q) || is_migration);
+	if (!job)
+		return ERR_PTR(-ENOMEM);
+
+	job->q = q;
+	kref_init(&job->refcount);
+	xe_exec_queue_get(job->q);
+
+	err = drm_sched_job_init(&job->drm, q->entity, 1, NULL);
+	if (err)
+		goto err_free;
+
+	if (!xe_exec_queue_is_parallel(q)) {
+		job->fence = xe_lrc_create_seqno_fence(q->lrc);
+		if (IS_ERR(job->fence)) {
+			err = PTR_ERR(job->fence);
+			goto err_sched_job;
+		}
+	} else {
+		struct dma_fence_array *cf;
+
+		fences = kmalloc_array(q->width, sizeof(*fences), GFP_KERNEL);
+		if (!fences) {
+			err = -ENOMEM;
+			goto err_sched_job;
+		}
+
+		for (j = 0; j < q->width; ++j) {
+			fences[j] = xe_lrc_create_seqno_fence(q->lrc + j);
+			if (IS_ERR(fences[j])) {
+				err = PTR_ERR(fences[j]);
+				goto err_fences;
+			}
+		}
+
+		cf = dma_fence_array_create(q->width, fences,
+					    q->parallel.composite_fence_ctx,
+					    q->parallel.composite_fence_seqno++,
+					    false);
+		if (!cf) {
+			--q->parallel.composite_fence_seqno;
+			err = -ENOMEM;
+			goto err_fences;
+		}
+
+		/* Sanity check */
+		for (j = 0; j < q->width; ++j)
+			xe_assert(job_to_xe(job), cf->base.seqno == fences[j]->seqno);
+
+		job->fence = &cf->base;
+	}
+
+	width = q->width;
+	if (is_migration)
+		width = 2;
+
+	for (i = 0; i < width; ++i)
+		job->batch_addr[i] = batch_addr[i];
+
+	/* All other jobs require a VM to be open which has a ref */
+	if (unlikely(q->flags & EXEC_QUEUE_FLAG_KERNEL))
+		xe_device_mem_access_get(job_to_xe(job));
+	xe_device_assert_mem_access(job_to_xe(job));
+
+	trace_xe_sched_job_create(job);
+	return job;
+
+err_fences:
+	for (j = j - 1; j >= 0; --j) {
+		--q->lrc[j].fence_ctx.next_seqno;
+		dma_fence_put(fences[j]);
+	}
+	kfree(fences);
+err_sched_job:
+	drm_sched_job_cleanup(&job->drm);
+err_free:
+	xe_exec_queue_put(q);
+	job_free(job);
+	return ERR_PTR(err);
+}
+
+/**
+ * xe_sched_job_destroy - Destroy XE schedule job
+ * @ref: reference to XE schedule job
+ *
+ * Called when ref == 0, drop a reference to job's xe_engine + fence, cleanup
+ * base DRM schedule job, and free memory for XE schedule job.
+ */
+void xe_sched_job_destroy(struct kref *ref)
+{
+	struct xe_sched_job *job =
+		container_of(ref, struct xe_sched_job, refcount);
+
+	if (unlikely(job->q->flags & EXEC_QUEUE_FLAG_KERNEL))
+		xe_device_mem_access_put(job_to_xe(job));
+	xe_exec_queue_put(job->q);
+	dma_fence_put(job->fence);
+	drm_sched_job_cleanup(&job->drm);
+	job_free(job);
+}
+
+void xe_sched_job_set_error(struct xe_sched_job *job, int error)
+{
+	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags))
+		return;
+
+	dma_fence_set_error(job->fence, error);
+
+	if (dma_fence_is_array(job->fence)) {
+		struct dma_fence_array *array =
+			to_dma_fence_array(job->fence);
+		struct dma_fence **child = array->fences;
+		unsigned int nchild = array->num_fences;
+
+		do {
+			struct dma_fence *current_fence = *child++;
+
+			if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+				     &current_fence->flags))
+				continue;
+			dma_fence_set_error(current_fence, error);
+		} while (--nchild);
+	}
+
+	trace_xe_sched_job_set_error(job);
+
+	dma_fence_enable_sw_signaling(job->fence);
+	xe_hw_fence_irq_run(job->q->fence_irq);
+}
+
+bool xe_sched_job_started(struct xe_sched_job *job)
+{
+	struct xe_lrc *lrc = job->q->lrc;
+
+	return !__dma_fence_is_later(xe_sched_job_seqno(job),
+				     xe_lrc_start_seqno(lrc),
+				     job->fence->ops);
+}
+
+bool xe_sched_job_completed(struct xe_sched_job *job)
+{
+	struct xe_lrc *lrc = job->q->lrc;
+
+	/*
+	 * Can safely check just LRC[0] seqno as that is last seqno written when
+	 * parallel handshake is done.
+	 */
+
+	return !__dma_fence_is_later(xe_sched_job_seqno(job), xe_lrc_seqno(lrc),
+				     job->fence->ops);
+}
+
+void xe_sched_job_arm(struct xe_sched_job *job)
+{
+	drm_sched_job_arm(&job->drm);
+}
+
+void xe_sched_job_push(struct xe_sched_job *job)
+{
+	xe_sched_job_get(job);
+	trace_xe_sched_job_exec(job);
+	drm_sched_entity_push_job(&job->drm);
+	xe_sched_job_put(job);
+}
+
+/**
+ * xe_sched_job_last_fence_add_dep - Add last fence dependency to job
+ * @job:job to add the last fence dependency to
+ * @vm: virtual memory job belongs to
+ *
+ * Returns:
+ * 0 on success, or an error on failing to expand the array.
+ */
+int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm)
+{
+	struct dma_fence *fence;
+
+	fence = xe_exec_queue_last_fence_get(job->q, vm);
+
+	return drm_sched_job_add_dependency(&job->drm, fence);
+}
diff --git a/drivers/gpu/drm/xe/xe_sched_job.h b/drivers/gpu/drm/xe/xe_sched_job.h
new file mode 100644
index 000000000000..34f475ba7f50
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sched_job.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_SCHED_JOB_H_
+#define _XE_SCHED_JOB_H_
+
+#include "xe_sched_job_types.h"
+
+struct xe_vm;
+
+#define XE_SCHED_HANG_LIMIT 1
+#define XE_SCHED_JOB_TIMEOUT LONG_MAX
+
+int xe_sched_job_module_init(void);
+void xe_sched_job_module_exit(void);
+
+struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
+					 u64 *batch_addr);
+void xe_sched_job_destroy(struct kref *ref);
+
+/**
+ * xe_sched_job_get - get reference to XE schedule job
+ * @job: XE schedule job object
+ *
+ * Increment XE schedule job's reference count
+ */
+static inline struct xe_sched_job *xe_sched_job_get(struct xe_sched_job *job)
+{
+	kref_get(&job->refcount);
+	return job;
+}
+
+/**
+ * xe_sched_job_put - put reference to XE schedule job
+ * @job: XE schedule job object
+ *
+ * Decrement XE schedule job's reference count, call xe_sched_job_destroy when
+ * reference count == 0.
+ */
+static inline void xe_sched_job_put(struct xe_sched_job *job)
+{
+	kref_put(&job->refcount, xe_sched_job_destroy);
+}
+
+void xe_sched_job_set_error(struct xe_sched_job *job, int error);
+static inline bool xe_sched_job_is_error(struct xe_sched_job *job)
+{
+	return job->fence->error < 0;
+}
+
+bool xe_sched_job_started(struct xe_sched_job *job);
+bool xe_sched_job_completed(struct xe_sched_job *job);
+
+void xe_sched_job_arm(struct xe_sched_job *job);
+void xe_sched_job_push(struct xe_sched_job *job);
+
+int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm);
+
+static inline struct xe_sched_job *
+to_xe_sched_job(struct drm_sched_job *drm)
+{
+	return container_of(drm, struct xe_sched_job, drm);
+}
+
+static inline u32 xe_sched_job_seqno(struct xe_sched_job *job)
+{
+	return job->fence->seqno;
+}
+
+static inline void
+xe_sched_job_add_migrate_flush(struct xe_sched_job *job, u32 flags)
+{
+	job->migrate_flush_flags = flags;
+}
+
+bool xe_sched_job_is_migration(struct xe_exec_queue *q);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h
new file mode 100644
index 000000000000..71213ba9735b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sched_job_types.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_SCHED_JOB_TYPES_H_
+#define _XE_SCHED_JOB_TYPES_H_
+
+#include <linux/kref.h>
+
+#include <drm/gpu_scheduler.h>
+
+struct xe_exec_queue;
+
+/**
+ * struct xe_sched_job - XE schedule job (batch buffer tracking)
+ */
+struct xe_sched_job {
+	/** @drm: base DRM scheduler job */
+	struct drm_sched_job drm;
+	/** @q: Exec queue */
+	struct xe_exec_queue *q;
+	/** @refcount: ref count of this job */
+	struct kref refcount;
+	/**
+	 * @fence: dma fence to indicate completion. 1 way relationship - job
+	 * can safely reference fence, fence cannot safely reference job.
+	 */
+#define JOB_FLAG_SUBMIT		DMA_FENCE_FLAG_USER_BITS
+	struct dma_fence *fence;
+	/** @user_fence: write back value when BB is complete */
+	struct {
+		/** @used: user fence is used */
+		bool used;
+		/** @addr: address to write to */
+		u64 addr;
+		/** @value: write back value */
+		u64 value;
+	} user_fence;
+	/** @migrate_flush_flags: Additional flush flags for migration jobs */
+	u32 migrate_flush_flags;
+	/** @batch_addr: batch buffer address of job */
+	u64 batch_addr[];
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c
new file mode 100644
index 000000000000..42a0e0c917a0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_assert.h"
+#include "xe_sriov.h"
+
+/**
+ * xe_sriov_mode_to_string - Convert enum value to string.
+ * @mode: the &xe_sriov_mode to convert
+ *
+ * Returns: SR-IOV mode as a user friendly string.
+ */
+const char *xe_sriov_mode_to_string(enum xe_sriov_mode mode)
+{
+	switch (mode) {
+	case XE_SRIOV_MODE_NONE:
+		return "none";
+	case XE_SRIOV_MODE_PF:
+		return "SR-IOV PF";
+	case XE_SRIOV_MODE_VF:
+		return "SR-IOV VF";
+	default:
+		return "<invalid>";
+	}
+}
+
+/**
+ * xe_sriov_probe_early - Probe a SR-IOV mode.
+ * @xe: the &xe_device to probe mode on
+ * @has_sriov: flag indicating hardware support for SR-IOV
+ *
+ * This function should be called only once and as soon as possible during
+ * driver probe to detect whether we are running a SR-IOV Physical Function
+ * (PF) or a Virtual Function (VF) device.
+ *
+ * SR-IOV PF mode detection is based on PCI @dev_is_pf() function.
+ * SR-IOV VF mode detection is based on dedicated MMIO register read.
+ */
+void xe_sriov_probe_early(struct xe_device *xe, bool has_sriov)
+{
+	enum xe_sriov_mode mode = XE_SRIOV_MODE_NONE;
+
+	/* TODO: replace with proper mode detection */
+	xe_assert(xe, !has_sriov);
+
+	xe_assert(xe, !xe->sriov.__mode);
+	xe->sriov.__mode = mode;
+	xe_assert(xe, xe->sriov.__mode);
+
+	if (has_sriov)
+		drm_info(&xe->drm, "Running in %s mode\n",
+			 xe_sriov_mode_to_string(xe_device_sriov_mode(xe)));
+}
diff --git a/drivers/gpu/drm/xe/xe_sriov.h b/drivers/gpu/drm/xe/xe_sriov.h
new file mode 100644
index 000000000000..5af73a3172b0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_SRIOV_H_
+#define _XE_SRIOV_H_
+
+#include "xe_assert.h"
+#include "xe_device_types.h"
+#include "xe_sriov_types.h"
+
+const char *xe_sriov_mode_to_string(enum xe_sriov_mode mode);
+
+void xe_sriov_probe_early(struct xe_device *xe, bool has_sriov);
+
+static inline enum xe_sriov_mode xe_device_sriov_mode(struct xe_device *xe)
+{
+	xe_assert(xe, xe->sriov.__mode);
+	return xe->sriov.__mode;
+}
+
+static inline bool xe_device_is_sriov_pf(struct xe_device *xe)
+{
+	return xe_device_sriov_mode(xe) == XE_SRIOV_MODE_PF;
+}
+
+static inline bool xe_device_is_sriov_vf(struct xe_device *xe)
+{
+	return xe_device_sriov_mode(xe) == XE_SRIOV_MODE_VF;
+}
+
+#ifdef CONFIG_PCI_IOV
+#define IS_SRIOV_PF(xe) xe_device_is_sriov_pf(xe)
+#else
+#define IS_SRIOV_PF(xe) (typecheck(struct xe_device *, (xe)) && false)
+#endif
+#define IS_SRIOV_VF(xe) xe_device_is_sriov_vf(xe)
+
+#define IS_SRIOV(xe) (IS_SRIOV_PF(xe) || IS_SRIOV_VF(xe))
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_printk.h b/drivers/gpu/drm/xe/xe_sriov_printk.h
new file mode 100644
index 000000000000..117e1d541692
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_printk.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_SRIOV_PRINTK_H_
+#define _XE_SRIOV_PRINTK_H_
+
+#include <drm/drm_print.h>
+
+#include "xe_device_types.h"
+#include "xe_sriov_types.h"
+
+#define xe_sriov_printk_prefix(xe) \
+	((xe)->sriov.__mode == XE_SRIOV_MODE_PF ? "PF: " : \
+	 (xe)->sriov.__mode == XE_SRIOV_MODE_VF ? "VF: " : "")
+
+#define xe_sriov_printk(xe, _level, fmt, ...) \
+	drm_##_level(&(xe)->drm, "%s" fmt, xe_sriov_printk_prefix(xe), ##__VA_ARGS__)
+
+#define xe_sriov_err(xe, fmt, ...) \
+	xe_sriov_printk((xe), err, fmt, ##__VA_ARGS__)
+
+#define xe_sriov_err_ratelimited(xe, fmt, ...) \
+	xe_sriov_printk((xe), err_ratelimited, fmt, ##__VA_ARGS__)
+
+#define xe_sriov_warn(xe, fmt, ...) \
+	xe_sriov_printk((xe), warn, fmt, ##__VA_ARGS__)
+
+#define xe_sriov_notice(xe, fmt, ...) \
+	xe_sriov_printk((xe), notice, fmt, ##__VA_ARGS__)
+
+#define xe_sriov_info(xe, fmt, ...) \
+	xe_sriov_printk((xe), info, fmt, ##__VA_ARGS__)
+
+#define xe_sriov_dbg(xe, fmt, ...) \
+	xe_sriov_printk((xe), dbg, fmt, ##__VA_ARGS__)
+
+/* for low level noisy debug messages */
+#ifdef CONFIG_DRM_XE_DEBUG_SRIOV
+#define xe_sriov_dbg_verbose(xe, fmt, ...) xe_sriov_dbg(xe, fmt, ##__VA_ARGS__)
+#else
+#define xe_sriov_dbg_verbose(xe, fmt, ...) typecheck(struct xe_device *, (xe))
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_types.h b/drivers/gpu/drm/xe/xe_sriov_types.h
new file mode 100644
index 000000000000..999a4311b98b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_types.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_SRIOV_TYPES_H_
+#define _XE_SRIOV_TYPES_H_
+
+#include <linux/build_bug.h>
+
+/**
+ * enum xe_sriov_mode - SR-IOV mode
+ * @XE_SRIOV_MODE_NONE: bare-metal mode (non-virtualized)
+ * @XE_SRIOV_MODE_PF: SR-IOV Physical Function (PF) mode
+ * @XE_SRIOV_MODE_VF: SR-IOV Virtual Function (VF) mode
+ */
+enum xe_sriov_mode {
+	/*
+	 * Note: We don't use default enum value 0 to allow catch any too early
+	 * attempt of checking the SR-IOV mode prior to the actual mode probe.
+	 */
+	XE_SRIOV_MODE_NONE = 1,
+	XE_SRIOV_MODE_PF,
+	XE_SRIOV_MODE_VF,
+};
+static_assert(XE_SRIOV_MODE_NONE);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_step.c b/drivers/gpu/drm/xe/xe_step.c
new file mode 100644
index 000000000000..eaf1b718f26c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_step.c
@@ -0,0 +1,264 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_step.h"
+
+#include <linux/bitfield.h>
+
+#include "xe_device.h"
+#include "xe_platform_types.h"
+
+/*
+ * Provide mapping between PCI's revision ID to the individual GMD
+ * (Graphics/Media/Display) stepping values that can be compared numerically.
+ *
+ * Some platforms may have unusual ways of mapping PCI revision ID to GMD
+ * steppings.  E.g., in some cases a higher PCI revision may translate to a
+ * lower stepping of the GT and/or display IP.
+ *
+ * Also note that some revisions/steppings may have been set aside as
+ * placeholders but never materialized in real hardware; in those cases there
+ * may be jumps in the revision IDs or stepping values in the tables below.
+ */
+
+/*
+ * Some platforms always have the same stepping value for GT and display;
+ * use a macro to define these to make it easier to identify the platforms
+ * where the two steppings can deviate.
+ */
+#define COMMON_GT_MEDIA_STEP(x_)	\
+	.graphics = STEP_##x_,		\
+	.media = STEP_##x_
+
+#define COMMON_STEP(x_)			\
+	COMMON_GT_MEDIA_STEP(x_),	\
+	.graphics = STEP_##x_,		\
+	.media = STEP_##x_,		\
+	.display = STEP_##x_
+
+__diag_push();
+__diag_ignore_all("-Woverride-init", "Allow field overrides in table");
+
+/* Same GT stepping between tgl_uy_revids and tgl_revids don't mean the same HW */
+static const struct xe_step_info tgl_revids[] = {
+	[0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_B0 },
+	[1] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_D0 },
+};
+
+static const struct xe_step_info dg1_revids[] = {
+	[0] = { COMMON_STEP(A0) },
+	[1] = { COMMON_STEP(B0) },
+};
+
+static const struct xe_step_info adls_revids[] = {
+	[0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 },
+	[0x1] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A2 },
+	[0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_B0 },
+	[0x8] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_B0 },
+	[0xC] = { COMMON_GT_MEDIA_STEP(D0), .display = STEP_C0 },
+};
+
+static const struct xe_step_info adls_rpls_revids[] = {
+	[0x4] = { COMMON_GT_MEDIA_STEP(D0), .display = STEP_D0 },
+	[0xC] = { COMMON_GT_MEDIA_STEP(D0), .display = STEP_C0 },
+};
+
+static const struct xe_step_info adlp_revids[] = {
+	[0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 },
+	[0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_B0 },
+	[0x8] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_C0 },
+	[0xC] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_D0 },
+};
+
+static const struct xe_step_info adlp_rpl_revids[] = {
+	[0x4] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_E0 },
+};
+
+static const struct xe_step_info adln_revids[] = {
+	[0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_D0 },
+};
+
+static const struct xe_step_info dg2_g10_revid_step_tbl[] = {
+	[0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 },
+	[0x1] = { COMMON_GT_MEDIA_STEP(A1), .display = STEP_A0 },
+	[0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_B0 },
+	[0x8] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_C0 },
+};
+
+static const struct xe_step_info dg2_g11_revid_step_tbl[] = {
+	[0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_B0 },
+	[0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_C0 },
+	[0x5] = { COMMON_GT_MEDIA_STEP(B1), .display = STEP_C0 },
+};
+
+static const struct xe_step_info dg2_g12_revid_step_tbl[] = {
+	[0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_C0 },
+	[0x1] = { COMMON_GT_MEDIA_STEP(A1), .display = STEP_C0 },
+};
+
+static const struct xe_step_info pvc_revid_step_tbl[] = {
+	[0x5] = { .graphics = STEP_B0 },
+	[0x6] = { .graphics = STEP_B1 },
+	[0x7] = { .graphics = STEP_C0 },
+};
+
+static const int pvc_basedie_subids[] = {
+	[0x3] = STEP_B0,
+	[0x4] = STEP_B1,
+	[0x5] = STEP_B3,
+};
+
+__diag_pop();
+
+/**
+ * xe_step_pre_gmdid_get - Determine IP steppings from PCI revid
+ * @xe: Xe device
+ *
+ * Convert the PCI revid into proper IP steppings.  This should only be
+ * used on platforms that do not have GMD_ID support.
+ */
+struct xe_step_info xe_step_pre_gmdid_get(struct xe_device *xe)
+{
+	const struct xe_step_info *revids = NULL;
+	struct xe_step_info step = {};
+	u16 revid = xe->info.revid;
+	int size = 0;
+	const int *basedie_info = NULL;
+	int basedie_size = 0;
+	int baseid = 0;
+
+	if (xe->info.platform == XE_PVC) {
+		baseid = FIELD_GET(GENMASK(5, 3), xe->info.revid);
+		revid = FIELD_GET(GENMASK(2, 0), xe->info.revid);
+		revids = pvc_revid_step_tbl;
+		size = ARRAY_SIZE(pvc_revid_step_tbl);
+		basedie_info = pvc_basedie_subids;
+		basedie_size = ARRAY_SIZE(pvc_basedie_subids);
+	} else if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G10) {
+		revids = dg2_g10_revid_step_tbl;
+		size = ARRAY_SIZE(dg2_g10_revid_step_tbl);
+	} else if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G11) {
+		revids = dg2_g11_revid_step_tbl;
+		size = ARRAY_SIZE(dg2_g11_revid_step_tbl);
+	} else if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G12) {
+		revids = dg2_g12_revid_step_tbl;
+		size = ARRAY_SIZE(dg2_g12_revid_step_tbl);
+	} else if (xe->info.platform == XE_ALDERLAKE_N) {
+		revids = adln_revids;
+		size = ARRAY_SIZE(adln_revids);
+	} else if (xe->info.subplatform == XE_SUBPLATFORM_ALDERLAKE_S_RPLS) {
+		revids = adls_rpls_revids;
+		size = ARRAY_SIZE(adls_rpls_revids);
+	} else if (xe->info.subplatform == XE_SUBPLATFORM_ALDERLAKE_P_RPLU) {
+		revids = adlp_rpl_revids;
+		size = ARRAY_SIZE(adlp_rpl_revids);
+	} else if (xe->info.platform == XE_ALDERLAKE_P) {
+		revids = adlp_revids;
+		size = ARRAY_SIZE(adlp_revids);
+	} else if (xe->info.platform == XE_ALDERLAKE_S) {
+		revids = adls_revids;
+		size = ARRAY_SIZE(adls_revids);
+	} else if (xe->info.platform == XE_DG1) {
+		revids = dg1_revids;
+		size = ARRAY_SIZE(dg1_revids);
+	} else if (xe->info.platform == XE_TIGERLAKE) {
+		revids = tgl_revids;
+		size = ARRAY_SIZE(tgl_revids);
+	}
+
+	/* Not using the stepping scheme for the platform yet. */
+	if (!revids)
+		return step;
+
+	if (revid < size && revids[revid].graphics != STEP_NONE) {
+		step = revids[revid];
+	} else {
+		drm_warn(&xe->drm, "Unknown revid 0x%02x\n", revid);
+
+		/*
+		 * If we hit a gap in the revid array, use the information for
+		 * the next revid.
+		 *
+		 * This may be wrong in all sorts of ways, especially if the
+		 * steppings in the array are not monotonically increasing, but
+		 * it's better than defaulting to 0.
+		 */
+		while (revid < size && revids[revid].graphics == STEP_NONE)
+			revid++;
+
+		if (revid < size) {
+			drm_dbg(&xe->drm, "Using steppings for revid 0x%02x\n",
+				revid);
+			step = revids[revid];
+		} else {
+			drm_dbg(&xe->drm, "Using future steppings\n");
+			step.graphics = STEP_FUTURE;
+			step.display = STEP_FUTURE;
+		}
+	}
+
+	drm_WARN_ON(&xe->drm, step.graphics == STEP_NONE);
+
+	if (basedie_info && basedie_size) {
+		if (baseid < basedie_size && basedie_info[baseid] != STEP_NONE) {
+			step.basedie = basedie_info[baseid];
+		} else {
+			drm_warn(&xe->drm, "Unknown baseid 0x%02x\n", baseid);
+			step.basedie = STEP_FUTURE;
+		}
+	}
+
+	return step;
+}
+
+/**
+ * xe_step_gmdid_get - Determine IP steppings from GMD_ID revid fields
+ * @xe: Xe device
+ * @graphics_gmdid_revid: value of graphics GMD_ID register's revid field
+ * @media_gmdid_revid: value of media GMD_ID register's revid field
+ *
+ * Convert the revid fields of the GMD_ID registers into proper IP steppings.
+ *
+ * GMD_ID revid values are currently expected to have consistent meanings on
+ * all platforms:  major steppings (A0, B0, etc.) are 4 apart, with minor
+ * steppings (A1, A2, etc.) taking the values in between.
+ */
+struct xe_step_info xe_step_gmdid_get(struct xe_device *xe,
+				      u32 graphics_gmdid_revid,
+				      u32 media_gmdid_revid)
+{
+	struct xe_step_info step = {
+		.graphics = STEP_A0 + graphics_gmdid_revid,
+		.media = STEP_A0 + media_gmdid_revid,
+	};
+
+	if (step.graphics >= STEP_FUTURE) {
+		step.graphics = STEP_FUTURE;
+		drm_dbg(&xe->drm, "Graphics GMD_ID revid value %d treated as future stepping\n",
+			graphics_gmdid_revid);
+	}
+
+	if (step.media >= STEP_FUTURE) {
+		step.media = STEP_FUTURE;
+		drm_dbg(&xe->drm, "Media GMD_ID revid value %d treated as future stepping\n",
+			media_gmdid_revid);
+	}
+
+	return step;
+}
+
+#define STEP_NAME_CASE(name)	\
+	case STEP_##name:	\
+		return #name;
+
+const char *xe_step_name(enum xe_step step)
+{
+	switch (step) {
+	STEP_NAME_LIST(STEP_NAME_CASE);
+
+	default:
+		return "**";
+	}
+}
diff --git a/drivers/gpu/drm/xe/xe_step.h b/drivers/gpu/drm/xe/xe_step.h
new file mode 100644
index 000000000000..686cb59200c2
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_step.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_STEP_H_
+#define _XE_STEP_H_
+
+#include <linux/types.h>
+
+#include "xe_step_types.h"
+
+struct xe_device;
+
+struct xe_step_info xe_step_pre_gmdid_get(struct xe_device *xe);
+struct xe_step_info xe_step_gmdid_get(struct xe_device *xe,
+				      u32 graphics_gmdid_revid,
+				      u32 media_gmdid_revid);
+static inline u32 xe_step_to_gmdid(enum xe_step step) { return step - STEP_A0; }
+
+const char *xe_step_name(enum xe_step step);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_step_types.h b/drivers/gpu/drm/xe/xe_step_types.h
new file mode 100644
index 000000000000..ccc9b4795e95
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_step_types.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_STEP_TYPES_H_
+#define _XE_STEP_TYPES_H_
+
+#include <linux/types.h>
+
+struct xe_step_info {
+	u8 graphics;
+	u8 media;
+	u8 display;
+	u8 basedie;
+};
+
+#define STEP_ENUM_VAL(name)  STEP_##name,
+
+#define STEP_NAME_LIST(func)		\
+	func(A0)			\
+	func(A1)			\
+	func(A2)			\
+	func(A3)			\
+	func(B0)			\
+	func(B1)			\
+	func(B2)			\
+	func(B3)			\
+	func(C0)			\
+	func(C1)			\
+	func(C2)			\
+	func(C3)			\
+	func(D0)			\
+	func(D1)			\
+	func(D2)			\
+	func(D3)			\
+	func(E0)
+
+/*
+ * Symbolic steppings that do not match the hardware. These are valid both as gt
+ * and display steppings as symbolic names.
+ */
+enum xe_step {
+	STEP_NONE = 0,
+	STEP_NAME_LIST(STEP_ENUM_VAL)
+	STEP_FUTURE,
+	STEP_FOREVER,
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
new file mode 100644
index 000000000000..02c9577fe418
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -0,0 +1,380 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_sync.h"
+
+#include <linux/dma-fence-array.h>
+#include <linux/kthread.h>
+#include <linux/sched/mm.h>
+#include <linux/uaccess.h>
+
+#include <drm/drm_print.h>
+#include <drm/drm_syncobj.h>
+#include <drm/xe_drm.h>
+
+#include "xe_device_types.h"
+#include "xe_exec_queue.h"
+#include "xe_macros.h"
+#include "xe_sched_job_types.h"
+
+struct xe_user_fence {
+	struct xe_device *xe;
+	struct kref refcount;
+	struct dma_fence_cb cb;
+	struct work_struct worker;
+	struct mm_struct *mm;
+	u64 __user *addr;
+	u64 value;
+	int signalled;
+};
+
+static void user_fence_destroy(struct kref *kref)
+{
+	struct xe_user_fence *ufence = container_of(kref, struct xe_user_fence,
+						 refcount);
+
+	mmdrop(ufence->mm);
+	kfree(ufence);
+}
+
+static void user_fence_get(struct xe_user_fence *ufence)
+{
+	kref_get(&ufence->refcount);
+}
+
+static void user_fence_put(struct xe_user_fence *ufence)
+{
+	kref_put(&ufence->refcount, user_fence_destroy);
+}
+
+static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr,
+					       u64 value)
+{
+	struct xe_user_fence *ufence;
+
+	ufence = kmalloc(sizeof(*ufence), GFP_KERNEL);
+	if (!ufence)
+		return NULL;
+
+	ufence->xe = xe;
+	kref_init(&ufence->refcount);
+	ufence->addr = u64_to_user_ptr(addr);
+	ufence->value = value;
+	ufence->mm = current->mm;
+	mmgrab(ufence->mm);
+
+	return ufence;
+}
+
+static void user_fence_worker(struct work_struct *w)
+{
+	struct xe_user_fence *ufence = container_of(w, struct xe_user_fence, worker);
+
+	if (mmget_not_zero(ufence->mm)) {
+		kthread_use_mm(ufence->mm);
+		if (copy_to_user(ufence->addr, &ufence->value, sizeof(ufence->value)))
+			XE_WARN_ON("Copy to user failed");
+		kthread_unuse_mm(ufence->mm);
+		mmput(ufence->mm);
+	}
+
+	wake_up_all(&ufence->xe->ufence_wq);
+	WRITE_ONCE(ufence->signalled, 1);
+	user_fence_put(ufence);
+}
+
+static void kick_ufence(struct xe_user_fence *ufence, struct dma_fence *fence)
+{
+	INIT_WORK(&ufence->worker, user_fence_worker);
+	queue_work(ufence->xe->ordered_wq, &ufence->worker);
+	dma_fence_put(fence);
+}
+
+static void user_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+	struct xe_user_fence *ufence = container_of(cb, struct xe_user_fence, cb);
+
+	kick_ufence(ufence, fence);
+}
+
+int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
+			struct xe_sync_entry *sync,
+			struct drm_xe_sync __user *sync_user,
+			unsigned int flags)
+{
+	struct drm_xe_sync sync_in;
+	int err;
+	bool exec = flags & SYNC_PARSE_FLAG_EXEC;
+	bool in_lr_mode = flags & SYNC_PARSE_FLAG_LR_MODE;
+	bool disallow_user_fence = flags & SYNC_PARSE_FLAG_DISALLOW_USER_FENCE;
+	bool signal;
+
+	if (copy_from_user(&sync_in, sync_user, sizeof(*sync_user)))
+		return -EFAULT;
+
+	if (XE_IOCTL_DBG(xe, sync_in.flags & ~DRM_XE_SYNC_FLAG_SIGNAL) ||
+	    XE_IOCTL_DBG(xe, sync_in.reserved[0] || sync_in.reserved[1]))
+		return -EINVAL;
+
+	signal = sync_in.flags & DRM_XE_SYNC_FLAG_SIGNAL;
+	switch (sync_in.type) {
+	case DRM_XE_SYNC_TYPE_SYNCOBJ:
+		if (XE_IOCTL_DBG(xe, in_lr_mode && signal))
+			return -EOPNOTSUPP;
+
+		if (XE_IOCTL_DBG(xe, upper_32_bits(sync_in.addr)))
+			return -EINVAL;
+
+		sync->syncobj = drm_syncobj_find(xef->drm, sync_in.handle);
+		if (XE_IOCTL_DBG(xe, !sync->syncobj))
+			return -ENOENT;
+
+		if (!signal) {
+			sync->fence = drm_syncobj_fence_get(sync->syncobj);
+			if (XE_IOCTL_DBG(xe, !sync->fence))
+				return -EINVAL;
+		}
+		break;
+
+	case DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ:
+		if (XE_IOCTL_DBG(xe, in_lr_mode && signal))
+			return -EOPNOTSUPP;
+
+		if (XE_IOCTL_DBG(xe, upper_32_bits(sync_in.addr)))
+			return -EINVAL;
+
+		if (XE_IOCTL_DBG(xe, sync_in.timeline_value == 0))
+			return -EINVAL;
+
+		sync->syncobj = drm_syncobj_find(xef->drm, sync_in.handle);
+		if (XE_IOCTL_DBG(xe, !sync->syncobj))
+			return -ENOENT;
+
+		if (signal) {
+			sync->chain_fence = dma_fence_chain_alloc();
+			if (!sync->chain_fence)
+				return -ENOMEM;
+		} else {
+			sync->fence = drm_syncobj_fence_get(sync->syncobj);
+			if (XE_IOCTL_DBG(xe, !sync->fence))
+				return -EINVAL;
+
+			err = dma_fence_chain_find_seqno(&sync->fence,
+							 sync_in.timeline_value);
+			if (err)
+				return err;
+		}
+		break;
+
+	case DRM_XE_SYNC_TYPE_USER_FENCE:
+		if (XE_IOCTL_DBG(xe, disallow_user_fence))
+			return -EOPNOTSUPP;
+
+		if (XE_IOCTL_DBG(xe, !signal))
+			return -EOPNOTSUPP;
+
+		if (XE_IOCTL_DBG(xe, sync_in.addr & 0x7))
+			return -EINVAL;
+
+		if (exec) {
+			sync->addr = sync_in.addr;
+		} else {
+			sync->ufence = user_fence_create(xe, sync_in.addr,
+							 sync_in.timeline_value);
+			if (XE_IOCTL_DBG(xe, !sync->ufence))
+				return -ENOMEM;
+		}
+
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	sync->type = sync_in.type;
+	sync->flags = sync_in.flags;
+	sync->timeline_value = sync_in.timeline_value;
+
+	return 0;
+}
+
+int xe_sync_entry_wait(struct xe_sync_entry *sync)
+{
+	if (sync->fence)
+		dma_fence_wait(sync->fence, true);
+
+	return 0;
+}
+
+int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job)
+{
+	int err;
+
+	if (sync->fence) {
+		err = drm_sched_job_add_dependency(&job->drm,
+						   dma_fence_get(sync->fence));
+		if (err) {
+			dma_fence_put(sync->fence);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+void xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job,
+			  struct dma_fence *fence)
+{
+	if (!(sync->flags & DRM_XE_SYNC_FLAG_SIGNAL))
+		return;
+
+	if (sync->chain_fence) {
+		drm_syncobj_add_point(sync->syncobj, sync->chain_fence,
+				      fence, sync->timeline_value);
+		/*
+		 * The chain's ownership is transferred to the
+		 * timeline.
+		 */
+		sync->chain_fence = NULL;
+	} else if (sync->syncobj) {
+		drm_syncobj_replace_fence(sync->syncobj, fence);
+	} else if (sync->ufence) {
+		int err;
+
+		dma_fence_get(fence);
+		user_fence_get(sync->ufence);
+		err = dma_fence_add_callback(fence, &sync->ufence->cb,
+					     user_fence_cb);
+		if (err == -ENOENT) {
+			kick_ufence(sync->ufence, fence);
+		} else if (err) {
+			XE_WARN_ON("failed to add user fence");
+			user_fence_put(sync->ufence);
+			dma_fence_put(fence);
+		}
+	} else if (sync->type == DRM_XE_SYNC_TYPE_USER_FENCE) {
+		job->user_fence.used = true;
+		job->user_fence.addr = sync->addr;
+		job->user_fence.value = sync->timeline_value;
+	}
+}
+
+void xe_sync_entry_cleanup(struct xe_sync_entry *sync)
+{
+	if (sync->syncobj)
+		drm_syncobj_put(sync->syncobj);
+	if (sync->fence)
+		dma_fence_put(sync->fence);
+	if (sync->chain_fence)
+		dma_fence_put(&sync->chain_fence->base);
+	if (sync->ufence)
+		user_fence_put(sync->ufence);
+}
+
+/**
+ * xe_sync_in_fence_get() - Get a fence from syncs, exec queue, and VM
+ * @sync: input syncs
+ * @num_sync: number of syncs
+ * @q: exec queue
+ * @vm: VM
+ *
+ * Get a fence from syncs, exec queue, and VM. If syncs contain in-fences create
+ * and return a composite fence of all in-fences + last fence. If no in-fences
+ * return last fence on  input exec queue. Caller must drop reference to
+ * returned fence.
+ *
+ * Return: fence on success, ERR_PTR(-ENOMEM) on failure
+ */
+struct dma_fence *
+xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync,
+		     struct xe_exec_queue *q, struct xe_vm *vm)
+{
+	struct dma_fence **fences = NULL;
+	struct dma_fence_array *cf = NULL;
+	struct dma_fence *fence;
+	int i, num_in_fence = 0, current_fence = 0;
+
+	lockdep_assert_held(&vm->lock);
+
+	/* Count in-fences */
+	for (i = 0; i < num_sync; ++i) {
+		if (sync[i].fence) {
+			++num_in_fence;
+			fence = sync[i].fence;
+		}
+	}
+
+	/* Easy case... */
+	if (!num_in_fence) {
+		fence = xe_exec_queue_last_fence_get(q, vm);
+		return fence;
+	}
+
+	/* Create composite fence */
+	fences = kmalloc_array(num_in_fence + 1, sizeof(*fences), GFP_KERNEL);
+	if (!fences)
+		return ERR_PTR(-ENOMEM);
+	for (i = 0; i < num_sync; ++i) {
+		if (sync[i].fence) {
+			dma_fence_get(sync[i].fence);
+			fences[current_fence++] = sync[i].fence;
+		}
+	}
+	fences[current_fence++] = xe_exec_queue_last_fence_get(q, vm);
+	cf = dma_fence_array_create(num_in_fence, fences,
+				    vm->composite_fence_ctx,
+				    vm->composite_fence_seqno++,
+				    false);
+	if (!cf) {
+		--vm->composite_fence_seqno;
+		goto err_out;
+	}
+
+	return &cf->base;
+
+err_out:
+	while (current_fence)
+		dma_fence_put(fences[--current_fence]);
+	kfree(fences);
+	kfree(cf);
+
+	return ERR_PTR(-ENOMEM);
+}
+
+/**
+ * xe_sync_ufence_get() - Get user fence from sync
+ * @sync: input sync
+ *
+ * Get a user fence reference from sync.
+ *
+ * Return: xe_user_fence pointer with reference
+ */
+struct xe_user_fence *xe_sync_ufence_get(struct xe_sync_entry *sync)
+{
+	user_fence_get(sync->ufence);
+
+	return sync->ufence;
+}
+
+/**
+ * xe_sync_ufence_put() - Put user fence reference
+ * @ufence: user fence reference
+ *
+ */
+void xe_sync_ufence_put(struct xe_user_fence *ufence)
+{
+	user_fence_put(ufence);
+}
+
+/**
+ * xe_sync_ufence_get_status() - Get user fence status
+ * @ufence: user fence
+ *
+ * Return: 1 if signalled, 0 not signalled, <0 on error
+ */
+int xe_sync_ufence_get_status(struct xe_user_fence *ufence)
+{
+	return READ_ONCE(ufence->signalled);
+}
diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h
new file mode 100644
index 000000000000..0fd0d51208e6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sync.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_SYNC_H_
+#define _XE_SYNC_H_
+
+#include "xe_sync_types.h"
+
+struct xe_device;
+struct xe_exec_queue;
+struct xe_file;
+struct xe_sched_job;
+struct xe_vm;
+
+#define SYNC_PARSE_FLAG_EXEC			BIT(0)
+#define SYNC_PARSE_FLAG_LR_MODE			BIT(1)
+#define SYNC_PARSE_FLAG_DISALLOW_USER_FENCE	BIT(2)
+
+int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
+			struct xe_sync_entry *sync,
+			struct drm_xe_sync __user *sync_user,
+			unsigned int flags);
+int xe_sync_entry_wait(struct xe_sync_entry *sync);
+int xe_sync_entry_add_deps(struct xe_sync_entry *sync,
+			   struct xe_sched_job *job);
+void xe_sync_entry_signal(struct xe_sync_entry *sync,
+			  struct xe_sched_job *job,
+			  struct dma_fence *fence);
+void xe_sync_entry_cleanup(struct xe_sync_entry *sync);
+struct dma_fence *
+xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync,
+		     struct xe_exec_queue *q, struct xe_vm *vm);
+
+static inline bool xe_sync_is_ufence(struct xe_sync_entry *sync)
+{
+	return !!sync->ufence;
+}
+
+struct xe_user_fence *xe_sync_ufence_get(struct xe_sync_entry *sync);
+void xe_sync_ufence_put(struct xe_user_fence *ufence);
+int xe_sync_ufence_get_status(struct xe_user_fence *ufence);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sync_types.h b/drivers/gpu/drm/xe/xe_sync_types.h
new file mode 100644
index 000000000000..30ac3f51993b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sync_types.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_SYNC_TYPES_H_
+#define _XE_SYNC_TYPES_H_
+
+#include <linux/types.h>
+
+struct drm_syncobj;
+struct dma_fence;
+struct dma_fence_chain;
+struct drm_xe_sync;
+struct user_fence;
+
+struct xe_sync_entry {
+	struct drm_syncobj *syncobj;
+	struct dma_fence *fence;
+	struct dma_fence_chain *chain_fence;
+	struct xe_user_fence *ufence;
+	u64 addr;
+	u64 timeline_value;
+	u32 type;
+	u32 flags;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c
new file mode 100644
index 000000000000..044c20881de7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tile.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "xe_device.h"
+#include "xe_ggtt.h"
+#include "xe_gt.h"
+#include "xe_migrate.h"
+#include "xe_sa.h"
+#include "xe_tile.h"
+#include "xe_tile_sysfs.h"
+#include "xe_ttm_vram_mgr.h"
+#include "xe_wa.h"
+
+/**
+ * DOC: Multi-tile Design
+ *
+ * Different vendors use the term "tile" a bit differently, but in the Intel
+ * world, a 'tile' is pretty close to what most people would think of as being
+ * a complete GPU.  When multiple GPUs are placed behind a single PCI device,
+ * that's what is referred to as a "multi-tile device."  In such cases, pretty
+ * much all hardware is replicated per-tile, although certain responsibilities
+ * like PCI communication, reporting of interrupts to the OS, etc. are handled
+ * solely by the "root tile."  A multi-tile platform takes care of tying the
+ * tiles together in a way such that interrupt notifications from remote tiles
+ * are forwarded to the root tile, the per-tile vram is combined into a single
+ * address space, etc.
+ *
+ * In contrast, a "GT" (which officially stands for "Graphics Technology") is
+ * the subset of a GPU/tile that is responsible for implementing graphics
+ * and/or media operations.  The GT is where a lot of the driver implementation
+ * happens since it's where the hardware engines, the execution units, and the
+ * GuC all reside.
+ *
+ * Historically most Intel devices were single-tile devices that contained a
+ * single GT.  PVC is an example of an Intel platform built on a multi-tile
+ * design (i.e., multiple GPUs behind a single PCI device); each PVC tile only
+ * has a single GT.  In contrast, platforms like MTL that have separate chips
+ * for render and media IP are still only a single logical GPU, but the
+ * graphics and media IP blocks are each exposed as a separate GT within that
+ * single GPU.  This is important from a software perspective because multi-GT
+ * platforms like MTL only replicate a subset of the GPU hardware and behave
+ * differently than multi-tile platforms like PVC where nearly everything is
+ * replicated.
+ *
+ * Per-tile functionality (shared by all GTs within the tile):
+ *  - Complete 4MB MMIO space (containing SGunit/SoC registers, GT
+ *    registers, display registers, etc.)
+ *  - Global GTT
+ *  - VRAM (if discrete)
+ *  - Interrupt flows
+ *  - Migration context
+ *  - kernel batchbuffer pool
+ *  - Primary GT
+ *  - Media GT (if media version >= 13)
+ *
+ * Per-GT functionality:
+ *  - GuC
+ *  - Hardware engines
+ *  - Programmable hardware units (subslices, EUs)
+ *  - GSI subset of registers (multiple copies of these registers reside
+ *    within the complete MMIO space provided by the tile, but at different
+ *    offsets --- 0 for render, 0x380000 for media)
+ *  - Multicast register steering
+ *  - TLBs to cache page table translations
+ *  - Reset capability
+ *  - Low-level power management (e.g., C6)
+ *  - Clock frequency
+ *  - MOCS and PAT programming
+ */
+
+/**
+ * xe_tile_alloc - Perform per-tile memory allocation
+ * @tile: Tile to perform allocations for
+ *
+ * Allocates various per-tile data structures using DRM-managed allocations.
+ * Does not touch the hardware.
+ *
+ * Returns -ENOMEM if allocations fail, otherwise 0.
+ */
+static int xe_tile_alloc(struct xe_tile *tile)
+{
+	struct drm_device *drm = &tile_to_xe(tile)->drm;
+
+	tile->mem.ggtt = drmm_kzalloc(drm, sizeof(*tile->mem.ggtt),
+				      GFP_KERNEL);
+	if (!tile->mem.ggtt)
+		return -ENOMEM;
+	tile->mem.ggtt->tile = tile;
+
+	tile->mem.vram_mgr = drmm_kzalloc(drm, sizeof(*tile->mem.vram_mgr), GFP_KERNEL);
+	if (!tile->mem.vram_mgr)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/**
+ * xe_tile_init_early - Initialize the tile and primary GT
+ * @tile: Tile to initialize
+ * @xe: Parent Xe device
+ * @id: Tile ID
+ *
+ * Initializes per-tile resources that don't require any interactions with the
+ * hardware or any knowledge about the Graphics/Media IP version.
+ *
+ * Returns: 0 on success, negative error code on error.
+ */
+int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id)
+{
+	int err;
+
+	tile->xe = xe;
+	tile->id = id;
+
+	err = xe_tile_alloc(tile);
+	if (err)
+		return err;
+
+	tile->primary_gt = xe_gt_alloc(tile);
+	if (IS_ERR(tile->primary_gt))
+		return PTR_ERR(tile->primary_gt);
+
+	return 0;
+}
+
+static int tile_ttm_mgr_init(struct xe_tile *tile)
+{
+	struct xe_device *xe = tile_to_xe(tile);
+	int err;
+
+	if (tile->mem.vram.usable_size) {
+		err = xe_ttm_vram_mgr_init(tile, tile->mem.vram_mgr);
+		if (err)
+			return err;
+		xe->info.mem_region_mask |= BIT(tile->id) << 1;
+	}
+
+	return 0;
+}
+
+/**
+ * xe_tile_init_noalloc - Init tile up to the point where allocations can happen.
+ * @tile: The tile to initialize.
+ *
+ * This function prepares the tile to allow memory allocations to VRAM, but is
+ * not allowed to allocate memory itself. This state is useful for display
+ * readout, because the inherited display framebuffer will otherwise be
+ * overwritten as it is usually put at the start of VRAM.
+ *
+ * Note that since this is tile initialization, it should not perform any
+ * GT-specific operations, and thus does not need to hold GT forcewake.
+ *
+ * Returns: 0 on success, negative error code on error.
+ */
+int xe_tile_init_noalloc(struct xe_tile *tile)
+{
+	int err;
+
+	xe_device_mem_access_get(tile_to_xe(tile));
+
+	err = tile_ttm_mgr_init(tile);
+	if (err)
+		goto err_mem_access;
+
+	tile->mem.kernel_bb_pool = xe_sa_bo_manager_init(tile, SZ_1M, 16);
+	if (IS_ERR(tile->mem.kernel_bb_pool))
+		err = PTR_ERR(tile->mem.kernel_bb_pool);
+
+	xe_wa_apply_tile_workarounds(tile);
+
+	xe_tile_sysfs_init(tile);
+
+err_mem_access:
+	xe_device_mem_access_put(tile_to_xe(tile));
+	return err;
+}
+
+void xe_tile_migrate_wait(struct xe_tile *tile)
+{
+	xe_migrate_wait(tile->migrate);
+}
diff --git a/drivers/gpu/drm/xe/xe_tile.h b/drivers/gpu/drm/xe/xe_tile.h
new file mode 100644
index 000000000000..1c9e42ade6b0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tile.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_TILE_H_
+#define _XE_TILE_H_
+
+#include "xe_device_types.h"
+
+struct xe_tile;
+
+int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id);
+int xe_tile_init_noalloc(struct xe_tile *tile);
+
+void xe_tile_migrate_wait(struct xe_tile *tile);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.c b/drivers/gpu/drm/xe/xe_tile_sysfs.c
new file mode 100644
index 000000000000..0f8d3e7fce46
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tile_sysfs.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+#include <drm/drm_managed.h>
+
+#include "xe_tile.h"
+#include "xe_tile_sysfs.h"
+
+static void xe_tile_sysfs_kobj_release(struct kobject *kobj)
+{
+	kfree(kobj);
+}
+
+static const struct kobj_type xe_tile_sysfs_kobj_type = {
+	.release = xe_tile_sysfs_kobj_release,
+	.sysfs_ops = &kobj_sysfs_ops,
+};
+
+static void tile_sysfs_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_tile *tile = arg;
+
+	kobject_put(tile->sysfs);
+}
+
+void xe_tile_sysfs_init(struct xe_tile *tile)
+{
+	struct xe_device *xe = tile_to_xe(tile);
+	struct device *dev = xe->drm.dev;
+	struct kobj_tile *kt;
+	int err;
+
+	kt = kzalloc(sizeof(*kt), GFP_KERNEL);
+	if (!kt)
+		return;
+
+	kobject_init(&kt->base, &xe_tile_sysfs_kobj_type);
+	kt->tile = tile;
+
+	err = kobject_add(&kt->base, &dev->kobj, "tile%d", tile->id);
+	if (err) {
+		kobject_put(&kt->base);
+		drm_warn(&xe->drm, "failed to register TILE sysfs directory, err: %d\n", err);
+		return;
+	}
+
+	tile->sysfs = &kt->base;
+
+	err = drmm_add_action_or_reset(&xe->drm, tile_sysfs_fini, tile);
+	if (err)
+		drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
+			 __func__, err);
+}
diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.h b/drivers/gpu/drm/xe/xe_tile_sysfs.h
new file mode 100644
index 000000000000..e4f065039eba
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tile_sysfs.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_TILE_SYSFS_H_
+#define _XE_TILE_SYSFS_H_
+
+#include "xe_tile_sysfs_types.h"
+
+void xe_tile_sysfs_init(struct xe_tile *tile);
+
+static inline struct xe_tile *
+kobj_to_tile(struct kobject *kobj)
+{
+	return container_of(kobj, struct kobj_tile, base)->tile;
+}
+
+#endif /* _XE_TILE_SYSFS_H_ */
diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs_types.h b/drivers/gpu/drm/xe/xe_tile_sysfs_types.h
new file mode 100644
index 000000000000..75906ba11a9e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tile_sysfs_types.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_TILE_SYSFS_TYPES_H_
+#define _XE_TILE_SYSFS_TYPES_H_
+
+#include <linux/kobject.h>
+
+struct xe_tile;
+
+/**
+ * struct kobj_tile - A tile's kobject struct that connects the kobject
+ * and the TILE
+ *
+ * When dealing with multiple TILEs, this struct helps to understand which
+ * TILE needs to be addressed on a given sysfs call.
+ */
+struct kobj_tile {
+	/** @base: The actual kobject */
+	struct kobject base;
+	/** @tile: A pointer to the tile itself */
+	struct xe_tile *tile;
+};
+
+#endif	/* _XE_TILE_SYSFS_TYPES_H_ */
diff --git a/drivers/gpu/drm/xe/xe_trace.c b/drivers/gpu/drm/xe/xe_trace.c
new file mode 100644
index 000000000000..2527c556bff1
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_trace.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "xe_trace.h"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
new file mode 100644
index 000000000000..4ddc55527f9a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -0,0 +1,631 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM xe
+
+#if !defined(_XE_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _XE_TRACE_H_
+
+#include <linux/tracepoint.h>
+#include <linux/types.h>
+
+#include "xe_bo.h"
+#include "xe_bo_types.h"
+#include "xe_exec_queue_types.h"
+#include "xe_gpu_scheduler_types.h"
+#include "xe_gt_tlb_invalidation_types.h"
+#include "xe_gt_types.h"
+#include "xe_guc_exec_queue_types.h"
+#include "xe_sched_job.h"
+#include "xe_vm.h"
+
+DECLARE_EVENT_CLASS(xe_gt_tlb_invalidation_fence,
+		    TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
+		    TP_ARGS(fence),
+
+		    TP_STRUCT__entry(
+			     __field(struct xe_gt_tlb_invalidation_fence *, fence)
+			     __field(int, seqno)
+			     ),
+
+		    TP_fast_assign(
+			   __entry->fence = fence;
+			   __entry->seqno = fence->seqno;
+			   ),
+
+		    TP_printk("fence=%p, seqno=%d",
+			      __entry->fence, __entry->seqno)
+);
+
+DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_create,
+	     TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
+	     TP_ARGS(fence)
+);
+
+DEFINE_EVENT(xe_gt_tlb_invalidation_fence,
+	     xe_gt_tlb_invalidation_fence_work_func,
+	     TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
+	     TP_ARGS(fence)
+);
+
+DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_cb,
+	     TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
+	     TP_ARGS(fence)
+);
+
+DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_send,
+	     TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
+	     TP_ARGS(fence)
+);
+
+DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_recv,
+	     TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
+	     TP_ARGS(fence)
+);
+
+DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_signal,
+	     TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
+	     TP_ARGS(fence)
+);
+
+DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_timeout,
+	     TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
+	     TP_ARGS(fence)
+);
+
+DECLARE_EVENT_CLASS(xe_bo,
+		    TP_PROTO(struct xe_bo *bo),
+		    TP_ARGS(bo),
+
+		    TP_STRUCT__entry(
+			     __field(size_t, size)
+			     __field(u32, flags)
+			     __field(struct xe_vm *, vm)
+			     ),
+
+		    TP_fast_assign(
+			   __entry->size = bo->size;
+			   __entry->flags = bo->flags;
+			   __entry->vm = bo->vm;
+			   ),
+
+		    TP_printk("size=%zu, flags=0x%02x, vm=%p",
+			      __entry->size, __entry->flags, __entry->vm)
+);
+
+DEFINE_EVENT(xe_bo, xe_bo_cpu_fault,
+	     TP_PROTO(struct xe_bo *bo),
+	     TP_ARGS(bo)
+);
+
+TRACE_EVENT(xe_bo_move,
+	    TP_PROTO(struct xe_bo *bo, uint32_t new_placement, uint32_t old_placement,
+		     bool move_lacks_source),
+	    TP_ARGS(bo, new_placement, old_placement, move_lacks_source),
+	    TP_STRUCT__entry(
+		     __field(struct xe_bo *, bo)
+		     __field(size_t, size)
+		     __field(u32, new_placement)
+		     __field(u32, old_placement)
+		     __array(char, device_id, 12)
+		     __field(bool, move_lacks_source)
+			),
+
+	    TP_fast_assign(
+		   __entry->bo      = bo;
+		   __entry->size = bo->size;
+		   __entry->new_placement = new_placement;
+		   __entry->old_placement = old_placement;
+		   strscpy(__entry->device_id, dev_name(xe_bo_device(__entry->bo)->drm.dev), 12);
+		   __entry->move_lacks_source = move_lacks_source;
+		   ),
+	    TP_printk("move_lacks_source:%s, migrate object %p [size %zu] from %s to %s device_id:%s",
+		      __entry->move_lacks_source ? "yes" : "no", __entry->bo, __entry->size,
+		      xe_mem_type_to_name[__entry->old_placement],
+		      xe_mem_type_to_name[__entry->new_placement], __entry->device_id)
+);
+
+DECLARE_EVENT_CLASS(xe_exec_queue,
+		    TP_PROTO(struct xe_exec_queue *q),
+		    TP_ARGS(q),
+
+		    TP_STRUCT__entry(
+			     __field(enum xe_engine_class, class)
+			     __field(u32, logical_mask)
+			     __field(u8, gt_id)
+			     __field(u16, width)
+			     __field(u16, guc_id)
+			     __field(u32, guc_state)
+			     __field(u32, flags)
+			     ),
+
+		    TP_fast_assign(
+			   __entry->class = q->class;
+			   __entry->logical_mask = q->logical_mask;
+			   __entry->gt_id = q->gt->info.id;
+			   __entry->width = q->width;
+			   __entry->guc_id = q->guc->id;
+			   __entry->guc_state = atomic_read(&q->guc->state);
+			   __entry->flags = q->flags;
+			   ),
+
+		    TP_printk("%d:0x%x, gt=%d, width=%d, guc_id=%d, guc_state=0x%x, flags=0x%x",
+			      __entry->class, __entry->logical_mask,
+			      __entry->gt_id, __entry->width, __entry->guc_id,
+			      __entry->guc_state, __entry->flags)
+);
+
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_create,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
+);
+
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_supress_resume,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
+);
+
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_submit,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
+);
+
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_scheduling_enable,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
+);
+
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_scheduling_disable,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
+);
+
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_scheduling_done,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
+);
+
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_register,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
+);
+
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_deregister,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
+);
+
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_deregister_done,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
+);
+
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_close,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
+);
+
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_kill,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
+);
+
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_cleanup_entity,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
+);
+
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_destroy,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
+);
+
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_reset,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
+);
+
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_memory_cat_error,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
+);
+
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_stop,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
+);
+
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_resubmit,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
+);
+
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_lr_cleanup,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
+);
+
+DECLARE_EVENT_CLASS(xe_sched_job,
+		    TP_PROTO(struct xe_sched_job *job),
+		    TP_ARGS(job),
+
+		    TP_STRUCT__entry(
+			     __field(u32, seqno)
+			     __field(u16, guc_id)
+			     __field(u32, guc_state)
+			     __field(u32, flags)
+			     __field(int, error)
+			     __field(u64, fence)
+			     __field(u64, batch_addr)
+			     ),
+
+		    TP_fast_assign(
+			   __entry->seqno = xe_sched_job_seqno(job);
+			   __entry->guc_id = job->q->guc->id;
+			   __entry->guc_state =
+			   atomic_read(&job->q->guc->state);
+			   __entry->flags = job->q->flags;
+			   __entry->error = job->fence->error;
+			   __entry->fence = (unsigned long)job->fence;
+			   __entry->batch_addr = (u64)job->batch_addr[0];
+			   ),
+
+		    TP_printk("fence=0x%016llx, seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d",
+			      __entry->fence, __entry->seqno, __entry->guc_id,
+			      __entry->batch_addr, __entry->guc_state,
+			      __entry->flags, __entry->error)
+);
+
+DEFINE_EVENT(xe_sched_job, xe_sched_job_create,
+	     TP_PROTO(struct xe_sched_job *job),
+	     TP_ARGS(job)
+);
+
+DEFINE_EVENT(xe_sched_job, xe_sched_job_exec,
+	     TP_PROTO(struct xe_sched_job *job),
+	     TP_ARGS(job)
+);
+
+DEFINE_EVENT(xe_sched_job, xe_sched_job_run,
+	     TP_PROTO(struct xe_sched_job *job),
+	     TP_ARGS(job)
+);
+
+DEFINE_EVENT(xe_sched_job, xe_sched_job_free,
+	     TP_PROTO(struct xe_sched_job *job),
+	     TP_ARGS(job)
+);
+
+DEFINE_EVENT(xe_sched_job, xe_sched_job_timedout,
+	     TP_PROTO(struct xe_sched_job *job),
+	     TP_ARGS(job)
+);
+
+DEFINE_EVENT(xe_sched_job, xe_sched_job_set_error,
+	     TP_PROTO(struct xe_sched_job *job),
+	     TP_ARGS(job)
+);
+
+DEFINE_EVENT(xe_sched_job, xe_sched_job_ban,
+	     TP_PROTO(struct xe_sched_job *job),
+	     TP_ARGS(job)
+);
+
+DECLARE_EVENT_CLASS(xe_sched_msg,
+		    TP_PROTO(struct xe_sched_msg *msg),
+		    TP_ARGS(msg),
+
+		    TP_STRUCT__entry(
+			     __field(u32, opcode)
+			     __field(u16, guc_id)
+			     ),
+
+		    TP_fast_assign(
+			   __entry->opcode = msg->opcode;
+			   __entry->guc_id =
+			   ((struct xe_exec_queue *)msg->private_data)->guc->id;
+			   ),
+
+		    TP_printk("guc_id=%d, opcode=%u", __entry->guc_id,
+			      __entry->opcode)
+);
+
+DEFINE_EVENT(xe_sched_msg, xe_sched_msg_add,
+	     TP_PROTO(struct xe_sched_msg *msg),
+	     TP_ARGS(msg)
+);
+
+DEFINE_EVENT(xe_sched_msg, xe_sched_msg_recv,
+	     TP_PROTO(struct xe_sched_msg *msg),
+	     TP_ARGS(msg)
+);
+
+DECLARE_EVENT_CLASS(xe_hw_fence,
+		    TP_PROTO(struct xe_hw_fence *fence),
+		    TP_ARGS(fence),
+
+		    TP_STRUCT__entry(
+			     __field(u64, ctx)
+			     __field(u32, seqno)
+			     __field(struct xe_hw_fence *, fence)
+			     ),
+
+		    TP_fast_assign(
+			   __entry->ctx = fence->dma.context;
+			   __entry->seqno = fence->dma.seqno;
+			   __entry->fence = fence;
+			   ),
+
+		    TP_printk("ctx=0x%016llx, fence=%p, seqno=%u",
+			      __entry->ctx, __entry->fence, __entry->seqno)
+);
+
+DEFINE_EVENT(xe_hw_fence, xe_hw_fence_create,
+	     TP_PROTO(struct xe_hw_fence *fence),
+	     TP_ARGS(fence)
+);
+
+DEFINE_EVENT(xe_hw_fence, xe_hw_fence_signal,
+	     TP_PROTO(struct xe_hw_fence *fence),
+	     TP_ARGS(fence)
+);
+
+DEFINE_EVENT(xe_hw_fence, xe_hw_fence_try_signal,
+	     TP_PROTO(struct xe_hw_fence *fence),
+	     TP_ARGS(fence)
+);
+
+DEFINE_EVENT(xe_hw_fence, xe_hw_fence_free,
+	     TP_PROTO(struct xe_hw_fence *fence),
+	     TP_ARGS(fence)
+);
+
+DECLARE_EVENT_CLASS(xe_vma,
+		    TP_PROTO(struct xe_vma *vma),
+		    TP_ARGS(vma),
+
+		    TP_STRUCT__entry(
+			     __field(struct xe_vma *, vma)
+			     __field(u32, asid)
+			     __field(u64, start)
+			     __field(u64, end)
+			     __field(u64, ptr)
+			     ),
+
+		    TP_fast_assign(
+			   __entry->vma = vma;
+			   __entry->asid = xe_vma_vm(vma)->usm.asid;
+			   __entry->start = xe_vma_start(vma);
+			   __entry->end = xe_vma_end(vma) - 1;
+			   __entry->ptr = xe_vma_userptr(vma);
+			   ),
+
+		    TP_printk("vma=%p, asid=0x%05x, start=0x%012llx, end=0x%012llx, userptr=0x%012llx,",
+			      __entry->vma, __entry->asid, __entry->start,
+			      __entry->end, __entry->ptr)
+)
+
+DEFINE_EVENT(xe_vma, xe_vma_flush,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_pagefault,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_acc,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_fail,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_bind,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_pf_bind,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_unbind,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_userptr_rebind_worker,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_userptr_rebind_exec,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_rebind_worker,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_rebind_exec,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_userptr_invalidate,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_usm_invalidate,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_evict,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_userptr_invalidate_complete,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DECLARE_EVENT_CLASS(xe_vm,
+		    TP_PROTO(struct xe_vm *vm),
+		    TP_ARGS(vm),
+
+		    TP_STRUCT__entry(
+			     __field(struct xe_vm *, vm)
+			     __field(u32, asid)
+			     ),
+
+		    TP_fast_assign(
+			   __entry->vm = vm;
+			   __entry->asid = vm->usm.asid;
+			   ),
+
+		    TP_printk("vm=%p, asid=0x%05x",  __entry->vm,
+			      __entry->asid)
+);
+
+DEFINE_EVENT(xe_vm, xe_vm_kill,
+	     TP_PROTO(struct xe_vm *vm),
+	     TP_ARGS(vm)
+);
+
+DEFINE_EVENT(xe_vm, xe_vm_create,
+	     TP_PROTO(struct xe_vm *vm),
+	     TP_ARGS(vm)
+);
+
+DEFINE_EVENT(xe_vm, xe_vm_free,
+	     TP_PROTO(struct xe_vm *vm),
+	     TP_ARGS(vm)
+);
+
+DEFINE_EVENT(xe_vm, xe_vm_cpu_bind,
+	     TP_PROTO(struct xe_vm *vm),
+	     TP_ARGS(vm)
+);
+
+DEFINE_EVENT(xe_vm, xe_vm_restart,
+	     TP_PROTO(struct xe_vm *vm),
+	     TP_ARGS(vm)
+);
+
+DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_enter,
+	     TP_PROTO(struct xe_vm *vm),
+	     TP_ARGS(vm)
+);
+
+DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_retry,
+	     TP_PROTO(struct xe_vm *vm),
+	     TP_ARGS(vm)
+);
+
+DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_exit,
+	     TP_PROTO(struct xe_vm *vm),
+	     TP_ARGS(vm)
+);
+
+/* GuC */
+DECLARE_EVENT_CLASS(xe_guc_ct_flow_control,
+		    TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len),
+		    TP_ARGS(_head, _tail, size, space, len),
+
+		    TP_STRUCT__entry(
+			     __field(u32, _head)
+			     __field(u32, _tail)
+			     __field(u32, size)
+			     __field(u32, space)
+			     __field(u32, len)
+			     ),
+
+		    TP_fast_assign(
+			   __entry->_head = _head;
+			   __entry->_tail = _tail;
+			   __entry->size = size;
+			   __entry->space = space;
+			   __entry->len = len;
+			   ),
+
+		    TP_printk("h2g flow control: head=%u, tail=%u, size=%u, space=%u, len=%u",
+			      __entry->_head, __entry->_tail, __entry->size,
+			      __entry->space, __entry->len)
+);
+
+DEFINE_EVENT(xe_guc_ct_flow_control, xe_guc_ct_h2g_flow_control,
+	     TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len),
+	     TP_ARGS(_head, _tail, size, space, len)
+);
+
+DEFINE_EVENT_PRINT(xe_guc_ct_flow_control, xe_guc_ct_g2h_flow_control,
+		   TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len),
+		   TP_ARGS(_head, _tail, size, space, len),
+
+		   TP_printk("g2h flow control: head=%u, tail=%u, size=%u, space=%u, len=%u",
+			     __entry->_head, __entry->_tail, __entry->size,
+			     __entry->space, __entry->len)
+);
+
+DECLARE_EVENT_CLASS(xe_guc_ctb,
+		    TP_PROTO(u8 gt_id, u32 action, u32 len, u32 _head, u32 tail),
+		    TP_ARGS(gt_id, action, len, _head, tail),
+
+		    TP_STRUCT__entry(
+				__field(u8, gt_id)
+				__field(u32, action)
+				__field(u32, len)
+				__field(u32, tail)
+				__field(u32, _head)
+		    ),
+
+		    TP_fast_assign(
+			    __entry->gt_id = gt_id;
+			    __entry->action = action;
+			    __entry->len = len;
+			    __entry->tail = tail;
+			    __entry->_head = _head;
+		    ),
+
+		    TP_printk("gt%d: H2G CTB: action=0x%x, len=%d, tail=%d, head=%d\n",
+			      __entry->gt_id, __entry->action, __entry->len,
+			      __entry->tail, __entry->_head)
+);
+
+DEFINE_EVENT(xe_guc_ctb, xe_guc_ctb_h2g,
+	     TP_PROTO(u8 gt_id, u32 action, u32 len, u32 _head, u32 tail),
+	     TP_ARGS(gt_id, action, len, _head, tail)
+);
+
+DEFINE_EVENT_PRINT(xe_guc_ctb, xe_guc_ctb_g2h,
+		   TP_PROTO(u8 gt_id, u32 action, u32 len, u32 _head, u32 tail),
+		   TP_ARGS(gt_id, action, len, _head, tail),
+
+		   TP_printk("gt%d: G2H CTB: action=0x%x, len=%d, tail=%d, head=%d\n",
+			     __entry->gt_id, __entry->action, __entry->len,
+			     __entry->tail, __entry->_head)
+
+);
+
+#endif
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/xe
+#define TRACE_INCLUDE_FILE xe_trace
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
new file mode 100644
index 000000000000..e5d7d5e2bec1
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -0,0 +1,334 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021-2023 Intel Corporation
+ * Copyright (C) 2021-2002 Red Hat
+ */
+
+#include <drm/drm_managed.h>
+#include <drm/drm_mm.h>
+
+#include <drm/ttm/ttm_device.h>
+#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_range_manager.h>
+
+#include "generated/xe_wa_oob.h"
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_regs.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_mmio.h"
+#include "xe_res_cursor.h"
+#include "xe_ttm_stolen_mgr.h"
+#include "xe_ttm_vram_mgr.h"
+#include "xe_wa.h"
+
+struct xe_ttm_stolen_mgr {
+	struct xe_ttm_vram_mgr base;
+
+	/* PCI base offset */
+	resource_size_t io_base;
+	/* GPU base offset */
+	resource_size_t stolen_base;
+
+	void __iomem *mapping;
+};
+
+static inline struct xe_ttm_stolen_mgr *
+to_stolen_mgr(struct ttm_resource_manager *man)
+{
+	return container_of(man, struct xe_ttm_stolen_mgr, base.manager);
+}
+
+/**
+ * xe_ttm_stolen_cpu_access_needs_ggtt() - If we can't directly CPU access
+ * stolen, can we then fallback to mapping through the GGTT.
+ * @xe: xe device
+ *
+ * Some older integrated platforms don't support reliable CPU access for stolen,
+ * however on such hardware we can always use the mappable part of the GGTT for
+ * CPU access. Check if that's the case for this device.
+ */
+bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe)
+{
+	return GRAPHICS_VERx100(xe) < 1270 && !IS_DGFX(xe);
+}
+
+static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
+{
+	struct xe_tile *tile = xe_device_get_root_tile(xe);
+	struct xe_gt *mmio = xe_root_mmio_gt(xe);
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	u64 stolen_size;
+	u64 tile_offset;
+	u64 tile_size;
+
+	tile_offset = tile->mem.vram.io_start - xe->mem.vram.io_start;
+	tile_size = tile->mem.vram.actual_physical_size;
+
+	/* Use DSM base address instead for stolen memory */
+	mgr->stolen_base = (xe_mmio_read64_2x32(mmio, DSMBASE) & BDSM_MASK) - tile_offset;
+	if (drm_WARN_ON(&xe->drm, tile_size < mgr->stolen_base))
+		return 0;
+
+	stolen_size = tile_size - mgr->stolen_base;
+
+	/* Verify usage fits in the actual resource available */
+	if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, LMEM_BAR))
+		mgr->io_base = tile->mem.vram.io_start + mgr->stolen_base;
+
+	/*
+	 * There may be few KB of platform dependent reserved memory at the end
+	 * of vram which is not part of the DSM. Such reserved memory portion is
+	 * always less then DSM granularity so align down the stolen_size to DSM
+	 * granularity to accommodate such reserve vram portion.
+	 */
+	return ALIGN_DOWN(stolen_size, SZ_1M);
+}
+
+static u32 get_wopcm_size(struct xe_device *xe)
+{
+	u32 wopcm_size;
+	u64 val;
+
+	val = xe_mmio_read64_2x32(xe_root_mmio_gt(xe), STOLEN_RESERVED);
+	val = REG_FIELD_GET64(WOPCM_SIZE_MASK, val);
+
+	switch (val) {
+	case 0x5 ... 0x6:
+		val--;
+		fallthrough;
+	case 0x0 ... 0x3:
+		wopcm_size = (1U << val) * SZ_1M;
+		break;
+	default:
+		WARN(1, "Missing case wopcm_size=%llx\n", val);
+		wopcm_size = 0;
+	}
+
+	return wopcm_size;
+}
+
+static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
+{
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	struct xe_gt *media_gt = xe_device_get_root_tile(xe)->media_gt;
+	u32 stolen_size, wopcm_size;
+	u32 ggc, gms;
+
+	ggc = xe_mmio_read32(xe_root_mmio_gt(xe), GGC);
+
+	/*
+	 * Check GGMS: it should be fixed 0x3 (8MB), which corresponds to the
+	 * GTT size
+	 */
+	if (drm_WARN_ON(&xe->drm, (ggc & GGMS_MASK) != GGMS_MASK))
+		return 0;
+
+	/*
+	 * Graphics >= 1270 uses the offset to the GSMBASE as address in the
+	 * PTEs, together with the DM flag being set. Previously there was no
+	 * such flag so the address was the io_base.
+	 *
+	 * DSMBASE = GSMBASE + 8MB
+	 */
+	mgr->stolen_base = SZ_8M;
+	mgr->io_base = pci_resource_start(pdev, 2) + mgr->stolen_base;
+
+	/* return valid GMS value, -EIO if invalid */
+	gms = REG_FIELD_GET(GMS_MASK, ggc);
+	switch (gms) {
+	case 0x0 ... 0x04:
+		stolen_size = gms * 32 * SZ_1M;
+		break;
+	case 0xf0 ... 0xfe:
+		stolen_size = (gms - 0xf0 + 1) * 4 * SZ_1M;
+		break;
+	default:
+		return 0;
+	}
+
+	/* Carve out the top of DSM as it contains the reserved WOPCM region */
+	wopcm_size = get_wopcm_size(xe);
+	if (drm_WARN_ON(&xe->drm, !wopcm_size))
+		return 0;
+
+	stolen_size -= wopcm_size;
+
+	if (media_gt && XE_WA(media_gt, 14019821291)) {
+		u64 gscpsmi_base = xe_mmio_read64_2x32(media_gt, GSCPSMI_BASE)
+			& ~GENMASK_ULL(5, 0);
+
+		/*
+		 * This workaround is primarily implemented by the BIOS.  We
+		 * just need to figure out whether the BIOS has applied the
+		 * workaround (meaning the programmed address falls within
+		 * the DSM) and, if so, reserve that part of the DSM to
+		 * prevent accidental reuse.  The DSM location should be just
+		 * below the WOPCM.
+		 */
+		if (gscpsmi_base >= mgr->io_base &&
+		    gscpsmi_base < mgr->io_base + stolen_size) {
+			xe_gt_dbg(media_gt,
+				  "Reserving %llu bytes of DSM for Wa_14019821291\n",
+				  mgr->io_base + stolen_size - gscpsmi_base);
+			stolen_size = gscpsmi_base - mgr->io_base;
+		}
+	}
+
+	if (drm_WARN_ON(&xe->drm, stolen_size + SZ_8M > pci_resource_len(pdev, 2)))
+		return 0;
+
+	return stolen_size;
+}
+
+extern struct resource intel_graphics_stolen_res;
+
+static u64 detect_stolen(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
+{
+#ifdef CONFIG_X86
+	/* Map into GGTT */
+	mgr->io_base = pci_resource_start(to_pci_dev(xe->drm.dev), 2);
+
+	/* Stolen memory is x86 only */
+	mgr->stolen_base = intel_graphics_stolen_res.start;
+	return resource_size(&intel_graphics_stolen_res);
+#else
+	return 0;
+#endif
+}
+
+void xe_ttm_stolen_mgr_init(struct xe_device *xe)
+{
+	struct xe_ttm_stolen_mgr *mgr = drmm_kzalloc(&xe->drm, sizeof(*mgr), GFP_KERNEL);
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	u64 stolen_size, io_size, pgsize;
+	int err;
+
+	if (IS_DGFX(xe))
+		stolen_size = detect_bar2_dgfx(xe, mgr);
+	else if (GRAPHICS_VERx100(xe) >= 1270)
+		stolen_size = detect_bar2_integrated(xe, mgr);
+	else
+		stolen_size = detect_stolen(xe, mgr);
+
+	if (!stolen_size) {
+		drm_dbg_kms(&xe->drm, "No stolen memory support\n");
+		return;
+	}
+
+	pgsize = xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K;
+	if (pgsize < PAGE_SIZE)
+		pgsize = PAGE_SIZE;
+
+	/*
+	 * We don't try to attempt partial visible support for stolen vram,
+	 * since stolen is always at the end of vram, and the BAR size is pretty
+	 * much always 256M, with small-bar.
+	 */
+	io_size = 0;
+	if (mgr->io_base && !xe_ttm_stolen_cpu_access_needs_ggtt(xe))
+		io_size = stolen_size;
+
+	err = __xe_ttm_vram_mgr_init(xe, &mgr->base, XE_PL_STOLEN, stolen_size,
+				     io_size, pgsize);
+	if (err) {
+		drm_dbg_kms(&xe->drm, "Stolen mgr init failed: %i\n", err);
+		return;
+	}
+
+	drm_dbg_kms(&xe->drm, "Initialized stolen memory support with %llu bytes\n",
+		    stolen_size);
+
+	if (io_size)
+		mgr->mapping = devm_ioremap_wc(&pdev->dev, mgr->io_base, io_size);
+}
+
+u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+	struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN);
+	struct xe_ttm_stolen_mgr *mgr = to_stolen_mgr(ttm_mgr);
+	struct xe_res_cursor cur;
+
+	XE_WARN_ON(!mgr->io_base);
+
+	if (xe_ttm_stolen_cpu_access_needs_ggtt(xe))
+		return mgr->io_base + xe_bo_ggtt_addr(bo) + offset;
+
+	xe_res_first(bo->ttm.resource, offset, 4096, &cur);
+	return mgr->io_base + cur.start;
+}
+
+static int __xe_ttm_stolen_io_mem_reserve_bar2(struct xe_device *xe,
+					       struct xe_ttm_stolen_mgr *mgr,
+					       struct ttm_resource *mem)
+{
+	struct xe_res_cursor cur;
+
+	if (!mgr->io_base)
+		return -EIO;
+
+	xe_res_first(mem, 0, 4096, &cur);
+	mem->bus.offset = cur.start;
+
+	drm_WARN_ON(&xe->drm, !(mem->placement & TTM_PL_FLAG_CONTIGUOUS));
+
+	if (mem->placement & TTM_PL_FLAG_CONTIGUOUS && mgr->mapping)
+		mem->bus.addr = (u8 __force *)mgr->mapping + mem->bus.offset;
+
+	mem->bus.offset += mgr->io_base;
+	mem->bus.is_iomem = true;
+	mem->bus.caching = ttm_write_combined;
+
+	return 0;
+}
+
+static int __xe_ttm_stolen_io_mem_reserve_stolen(struct xe_device *xe,
+						 struct xe_ttm_stolen_mgr *mgr,
+						 struct ttm_resource *mem)
+{
+#ifdef CONFIG_X86
+	struct xe_bo *bo = ttm_to_xe_bo(mem->bo);
+
+	XE_WARN_ON(IS_DGFX(xe));
+
+	/* XXX: Require BO to be mapped to GGTT? */
+	if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_CREATE_GGTT_BIT)))
+		return -EIO;
+
+	/* GGTT is always contiguously mapped */
+	mem->bus.offset = xe_bo_ggtt_addr(bo) + mgr->io_base;
+
+	mem->bus.is_iomem = true;
+	mem->bus.caching = ttm_write_combined;
+
+	return 0;
+#else
+	/* How is it even possible to get here without gen12 stolen? */
+	drm_WARN_ON(&xe->drm, 1);
+	return -EIO;
+#endif
+}
+
+int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem)
+{
+	struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN);
+	struct xe_ttm_stolen_mgr *mgr = ttm_mgr ? to_stolen_mgr(ttm_mgr) : NULL;
+
+	if (!mgr || !mgr->io_base)
+		return -EIO;
+
+	if (xe_ttm_stolen_cpu_access_needs_ggtt(xe))
+		return __xe_ttm_stolen_io_mem_reserve_stolen(xe, mgr, mem);
+	else
+		return __xe_ttm_stolen_io_mem_reserve_bar2(xe, mgr, mem);
+}
+
+u64 xe_ttm_stolen_gpu_offset(struct xe_device *xe)
+{
+	struct xe_ttm_stolen_mgr *mgr =
+		to_stolen_mgr(ttm_manager_type(&xe->ttm, XE_PL_STOLEN));
+
+	return mgr->stolen_base;
+}
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h
new file mode 100644
index 000000000000..1777245ff810
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_TTM_STOLEN_MGR_H_
+#define _XE_TTM_STOLEN_MGR_H_
+
+#include <linux/types.h>
+
+struct ttm_resource;
+struct xe_bo;
+struct xe_device;
+
+void xe_ttm_stolen_mgr_init(struct xe_device *xe);
+int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem);
+bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe);
+u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset);
+u64 xe_ttm_stolen_gpu_offset(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c
new file mode 100644
index 000000000000..3e1fa0c832ca
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021-2022 Intel Corporation
+ * Copyright (C) 2021-2002 Red Hat
+ */
+
+#include "xe_ttm_sys_mgr.h"
+
+#include <drm/drm_managed.h>
+
+#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_range_manager.h>
+#include <drm/ttm/ttm_tt.h>
+
+#include "xe_bo.h"
+#include "xe_gt.h"
+
+struct xe_ttm_sys_node {
+	struct ttm_buffer_object *tbo;
+	struct ttm_range_mgr_node base;
+};
+
+static inline struct xe_ttm_sys_node *
+to_xe_ttm_sys_node(struct ttm_resource *res)
+{
+	return container_of(res, struct xe_ttm_sys_node, base.base);
+}
+
+static int xe_ttm_sys_mgr_new(struct ttm_resource_manager *man,
+			      struct ttm_buffer_object *tbo,
+			      const struct ttm_place *place,
+			      struct ttm_resource **res)
+{
+	struct xe_ttm_sys_node *node;
+	int r;
+
+	node = kzalloc(struct_size(node, base.mm_nodes, 1), GFP_KERNEL);
+	if (!node)
+		return -ENOMEM;
+
+	node->tbo = tbo;
+	ttm_resource_init(tbo, place, &node->base.base);
+
+	if (!(place->flags & TTM_PL_FLAG_TEMPORARY) &&
+	    ttm_resource_manager_usage(man) > (man->size << PAGE_SHIFT)) {
+		r = -ENOSPC;
+		goto err_fini;
+	}
+
+	node->base.mm_nodes[0].start = 0;
+	node->base.mm_nodes[0].size = PFN_UP(node->base.base.size);
+	node->base.base.start = XE_BO_INVALID_OFFSET;
+
+	*res = &node->base.base;
+
+	return 0;
+
+err_fini:
+	ttm_resource_fini(man, &node->base.base);
+	kfree(node);
+	return r;
+}
+
+static void xe_ttm_sys_mgr_del(struct ttm_resource_manager *man,
+			       struct ttm_resource *res)
+{
+	struct xe_ttm_sys_node *node = to_xe_ttm_sys_node(res);
+
+	ttm_resource_fini(man, res);
+	kfree(node);
+}
+
+static void xe_ttm_sys_mgr_debug(struct ttm_resource_manager *man,
+				 struct drm_printer *printer)
+{
+
+}
+
+static const struct ttm_resource_manager_func xe_ttm_sys_mgr_func = {
+	.alloc = xe_ttm_sys_mgr_new,
+	.free = xe_ttm_sys_mgr_del,
+	.debug = xe_ttm_sys_mgr_debug
+};
+
+static void ttm_sys_mgr_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_device *xe = (struct xe_device *)arg;
+	struct ttm_resource_manager *man = &xe->mem.sys_mgr;
+	int err;
+
+	ttm_resource_manager_set_used(man, false);
+
+	err = ttm_resource_manager_evict_all(&xe->ttm, man);
+	if (err)
+		return;
+
+	ttm_resource_manager_cleanup(man);
+	ttm_set_driver_manager(&xe->ttm, XE_PL_TT, NULL);
+}
+
+int xe_ttm_sys_mgr_init(struct xe_device *xe)
+{
+	struct ttm_resource_manager *man = &xe->mem.sys_mgr;
+	struct sysinfo si;
+	u64 gtt_size;
+
+	si_meminfo(&si);
+	gtt_size = (u64)si.totalram * si.mem_unit;
+	/* TTM limits allocation of all TTM devices by 50% of system memory */
+	gtt_size /= 2;
+
+	man->use_tt = true;
+	man->func = &xe_ttm_sys_mgr_func;
+	ttm_resource_manager_init(man, &xe->ttm, gtt_size >> PAGE_SHIFT);
+	ttm_set_driver_manager(&xe->ttm, XE_PL_TT, man);
+	ttm_resource_manager_set_used(man, true);
+	return drmm_add_action_or_reset(&xe->drm, ttm_sys_mgr_fini, xe);
+}
diff --git a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.h b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.h
new file mode 100644
index 000000000000..e8f5cd395b28
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_TTM_SYS_MGR_H_
+#define _XE_TTM_SYS_MGR_H_
+
+struct xe_device;
+
+int xe_ttm_sys_mgr_init(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
new file mode 100644
index 000000000000..115ec745e502
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
@@ -0,0 +1,480 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021-2022 Intel Corporation
+ * Copyright (C) 2021-2002 Red Hat
+ */
+
+#include <drm/drm_managed.h>
+
+#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_range_manager.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_res_cursor.h"
+#include "xe_ttm_vram_mgr.h"
+
+static inline struct drm_buddy_block *
+xe_ttm_vram_mgr_first_block(struct list_head *list)
+{
+	return list_first_entry_or_null(list, struct drm_buddy_block, link);
+}
+
+static inline bool xe_is_vram_mgr_blocks_contiguous(struct drm_buddy *mm,
+						    struct list_head *head)
+{
+	struct drm_buddy_block *block;
+	u64 start, size;
+
+	block = xe_ttm_vram_mgr_first_block(head);
+	if (!block)
+		return false;
+
+	while (head != block->link.next) {
+		start = drm_buddy_block_offset(block);
+		size = drm_buddy_block_size(mm, block);
+
+		block = list_entry(block->link.next, struct drm_buddy_block,
+				   link);
+		if (start + size != drm_buddy_block_offset(block))
+			return false;
+	}
+
+	return true;
+}
+
+static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
+			       struct ttm_buffer_object *tbo,
+			       const struct ttm_place *place,
+			       struct ttm_resource **res)
+{
+	struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
+	struct xe_ttm_vram_mgr_resource *vres;
+	struct drm_buddy *mm = &mgr->mm;
+	u64 size, remaining_size, min_page_size;
+	unsigned long lpfn;
+	int err;
+
+	lpfn = place->lpfn;
+	if (!lpfn || lpfn > man->size >> PAGE_SHIFT)
+		lpfn = man->size >> PAGE_SHIFT;
+
+	if (tbo->base.size >> PAGE_SHIFT > (lpfn - place->fpfn))
+		return -E2BIG; /* don't trigger eviction for the impossible */
+
+	vres = kzalloc(sizeof(*vres), GFP_KERNEL);
+	if (!vres)
+		return -ENOMEM;
+
+	ttm_resource_init(tbo, place, &vres->base);
+
+	/* bail out quickly if there's likely not enough VRAM for this BO */
+	if (ttm_resource_manager_usage(man) > man->size) {
+		err = -ENOSPC;
+		goto error_fini;
+	}
+
+	INIT_LIST_HEAD(&vres->blocks);
+
+	if (place->flags & TTM_PL_FLAG_TOPDOWN)
+		vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
+
+	if (place->fpfn || lpfn != man->size >> PAGE_SHIFT)
+		vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
+
+	if (WARN_ON(!vres->base.size)) {
+		err = -EINVAL;
+		goto error_fini;
+	}
+	size = vres->base.size;
+
+	min_page_size = mgr->default_page_size;
+	if (tbo->page_alignment)
+		min_page_size = tbo->page_alignment << PAGE_SHIFT;
+
+	if (WARN_ON(min_page_size < mm->chunk_size)) {
+		err = -EINVAL;
+		goto error_fini;
+	}
+
+	if (WARN_ON(min_page_size > SZ_2G)) { /* FIXME: sg limit */
+		err = -EINVAL;
+		goto error_fini;
+	}
+
+	if (WARN_ON((size > SZ_2G &&
+		     (vres->base.placement & TTM_PL_FLAG_CONTIGUOUS)))) {
+		err = -EINVAL;
+		goto error_fini;
+	}
+
+	if (WARN_ON(!IS_ALIGNED(size, min_page_size))) {
+		err = -EINVAL;
+		goto error_fini;
+	}
+
+	mutex_lock(&mgr->lock);
+	if (lpfn <= mgr->visible_size >> PAGE_SHIFT && size > mgr->visible_avail) {
+		mutex_unlock(&mgr->lock);
+		err = -ENOSPC;
+		goto error_fini;
+	}
+
+	if (place->fpfn + (size >> PAGE_SHIFT) != place->lpfn &&
+	    place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+		size = roundup_pow_of_two(size);
+		min_page_size = size;
+
+		lpfn = max_t(unsigned long, place->fpfn + (size >> PAGE_SHIFT), lpfn);
+	}
+
+	remaining_size = size;
+	do {
+		/*
+		 * Limit maximum size to 2GiB due to SG table limitations.
+		 * FIXME: Should maybe be handled as part of sg construction.
+		 */
+		u64 alloc_size = min_t(u64, remaining_size, SZ_2G);
+
+		err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
+					     (u64)lpfn << PAGE_SHIFT,
+					     alloc_size,
+					     min_page_size,
+					     &vres->blocks,
+					     vres->flags);
+		if (err)
+			goto error_free_blocks;
+
+		remaining_size -= alloc_size;
+	} while (remaining_size);
+
+	if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+		if (!drm_buddy_block_trim(mm, vres->base.size, &vres->blocks))
+			size = vres->base.size;
+	}
+
+	if (lpfn <= mgr->visible_size >> PAGE_SHIFT) {
+		vres->used_visible_size = size;
+	} else {
+		struct drm_buddy_block *block;
+
+		list_for_each_entry(block, &vres->blocks, link) {
+			u64 start = drm_buddy_block_offset(block);
+
+			if (start < mgr->visible_size) {
+				u64 end = start + drm_buddy_block_size(mm, block);
+
+				vres->used_visible_size +=
+					min(end, mgr->visible_size) - start;
+			}
+		}
+	}
+
+	mgr->visible_avail -= vres->used_visible_size;
+	mutex_unlock(&mgr->lock);
+
+	if (!(vres->base.placement & TTM_PL_FLAG_CONTIGUOUS) &&
+	    xe_is_vram_mgr_blocks_contiguous(mm, &vres->blocks))
+		vres->base.placement |= TTM_PL_FLAG_CONTIGUOUS;
+
+	/*
+	 * For some kernel objects we still rely on the start when io mapping
+	 * the object.
+	 */
+	if (vres->base.placement & TTM_PL_FLAG_CONTIGUOUS) {
+		struct drm_buddy_block *block = list_first_entry(&vres->blocks,
+								 typeof(*block),
+								 link);
+
+		vres->base.start = drm_buddy_block_offset(block) >> PAGE_SHIFT;
+	} else {
+		vres->base.start = XE_BO_INVALID_OFFSET;
+	}
+
+	*res = &vres->base;
+	return 0;
+
+error_free_blocks:
+	drm_buddy_free_list(mm, &vres->blocks);
+	mutex_unlock(&mgr->lock);
+error_fini:
+	ttm_resource_fini(man, &vres->base);
+	kfree(vres);
+
+	return err;
+}
+
+static void xe_ttm_vram_mgr_del(struct ttm_resource_manager *man,
+				struct ttm_resource *res)
+{
+	struct xe_ttm_vram_mgr_resource *vres =
+		to_xe_ttm_vram_mgr_resource(res);
+	struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
+	struct drm_buddy *mm = &mgr->mm;
+
+	mutex_lock(&mgr->lock);
+	drm_buddy_free_list(mm, &vres->blocks);
+	mgr->visible_avail += vres->used_visible_size;
+	mutex_unlock(&mgr->lock);
+
+	ttm_resource_fini(man, res);
+
+	kfree(vres);
+}
+
+static void xe_ttm_vram_mgr_debug(struct ttm_resource_manager *man,
+				  struct drm_printer *printer)
+{
+	struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
+	struct drm_buddy *mm = &mgr->mm;
+
+	mutex_lock(&mgr->lock);
+	drm_printf(printer, "default_page_size: %lluKiB\n",
+		   mgr->default_page_size >> 10);
+	drm_printf(printer, "visible_avail: %lluMiB\n",
+		   (u64)mgr->visible_avail >> 20);
+	drm_printf(printer, "visible_size: %lluMiB\n",
+		   (u64)mgr->visible_size >> 20);
+
+	drm_buddy_print(mm, printer);
+	mutex_unlock(&mgr->lock);
+	drm_printf(printer, "man size:%llu\n", man->size);
+}
+
+static bool xe_ttm_vram_mgr_intersects(struct ttm_resource_manager *man,
+				       struct ttm_resource *res,
+				       const struct ttm_place *place,
+				       size_t size)
+{
+	struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
+	struct xe_ttm_vram_mgr_resource *vres =
+		to_xe_ttm_vram_mgr_resource(res);
+	struct drm_buddy *mm = &mgr->mm;
+	struct drm_buddy_block *block;
+
+	if (!place->fpfn && !place->lpfn)
+		return true;
+
+	if (!place->fpfn && place->lpfn == mgr->visible_size >> PAGE_SHIFT)
+		return vres->used_visible_size > 0;
+
+	list_for_each_entry(block, &vres->blocks, link) {
+		unsigned long fpfn =
+			drm_buddy_block_offset(block) >> PAGE_SHIFT;
+		unsigned long lpfn = fpfn +
+			(drm_buddy_block_size(mm, block) >> PAGE_SHIFT);
+
+		if (place->fpfn < lpfn && place->lpfn > fpfn)
+			return true;
+	}
+
+	return false;
+}
+
+static bool xe_ttm_vram_mgr_compatible(struct ttm_resource_manager *man,
+				       struct ttm_resource *res,
+				       const struct ttm_place *place,
+				       size_t size)
+{
+	struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
+	struct xe_ttm_vram_mgr_resource *vres =
+		to_xe_ttm_vram_mgr_resource(res);
+	struct drm_buddy *mm = &mgr->mm;
+	struct drm_buddy_block *block;
+
+	if (!place->fpfn && !place->lpfn)
+		return true;
+
+	if (!place->fpfn && place->lpfn == mgr->visible_size >> PAGE_SHIFT)
+		return vres->used_visible_size == size;
+
+	list_for_each_entry(block, &vres->blocks, link) {
+		unsigned long fpfn =
+			drm_buddy_block_offset(block) >> PAGE_SHIFT;
+		unsigned long lpfn = fpfn +
+			(drm_buddy_block_size(mm, block) >> PAGE_SHIFT);
+
+		if (fpfn < place->fpfn || lpfn > place->lpfn)
+			return false;
+	}
+
+	return true;
+}
+
+static const struct ttm_resource_manager_func xe_ttm_vram_mgr_func = {
+	.alloc	= xe_ttm_vram_mgr_new,
+	.free	= xe_ttm_vram_mgr_del,
+	.intersects = xe_ttm_vram_mgr_intersects,
+	.compatible = xe_ttm_vram_mgr_compatible,
+	.debug	= xe_ttm_vram_mgr_debug
+};
+
+static void ttm_vram_mgr_fini(struct drm_device *dev, void *arg)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_ttm_vram_mgr *mgr = arg;
+	struct ttm_resource_manager *man = &mgr->manager;
+
+	ttm_resource_manager_set_used(man, false);
+
+	if (ttm_resource_manager_evict_all(&xe->ttm, man))
+		return;
+
+	WARN_ON_ONCE(mgr->visible_avail != mgr->visible_size);
+
+	drm_buddy_fini(&mgr->mm);
+
+	ttm_resource_manager_cleanup(&mgr->manager);
+
+	ttm_set_driver_manager(&xe->ttm, mgr->mem_type, NULL);
+
+	mutex_destroy(&mgr->lock);
+}
+
+int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr,
+			   u32 mem_type, u64 size, u64 io_size,
+			   u64 default_page_size)
+{
+	struct ttm_resource_manager *man = &mgr->manager;
+	int err;
+
+	man->func = &xe_ttm_vram_mgr_func;
+	mgr->mem_type = mem_type;
+	mutex_init(&mgr->lock);
+	mgr->default_page_size = default_page_size;
+	mgr->visible_size = io_size;
+	mgr->visible_avail = io_size;
+
+	ttm_resource_manager_init(man, &xe->ttm, size);
+	err = drm_buddy_init(&mgr->mm, man->size, default_page_size);
+	if (err)
+		return err;
+
+	ttm_set_driver_manager(&xe->ttm, mem_type, &mgr->manager);
+	ttm_resource_manager_set_used(&mgr->manager, true);
+
+	return drmm_add_action_or_reset(&xe->drm, ttm_vram_mgr_fini, mgr);
+}
+
+int xe_ttm_vram_mgr_init(struct xe_tile *tile, struct xe_ttm_vram_mgr *mgr)
+{
+	struct xe_device *xe = tile_to_xe(tile);
+	struct xe_mem_region *vram = &tile->mem.vram;
+
+	mgr->vram = vram;
+	return __xe_ttm_vram_mgr_init(xe, mgr, XE_PL_VRAM0 + tile->id,
+				      vram->usable_size, vram->io_size,
+				      PAGE_SIZE);
+}
+
+int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
+			      struct ttm_resource *res,
+			      u64 offset, u64 length,
+			      struct device *dev,
+			      enum dma_data_direction dir,
+			      struct sg_table **sgt)
+{
+	struct xe_tile *tile = &xe->tiles[res->mem_type - XE_PL_VRAM0];
+	struct xe_ttm_vram_mgr_resource *vres = to_xe_ttm_vram_mgr_resource(res);
+	struct xe_res_cursor cursor;
+	struct scatterlist *sg;
+	int num_entries = 0;
+	int i, r;
+
+	if (vres->used_visible_size < res->size)
+		return -EOPNOTSUPP;
+
+	*sgt = kmalloc(sizeof(**sgt), GFP_KERNEL);
+	if (!*sgt)
+		return -ENOMEM;
+
+	/* Determine the number of DRM_BUDDY blocks to export */
+	xe_res_first(res, offset, length, &cursor);
+	while (cursor.remaining) {
+		num_entries++;
+		xe_res_next(&cursor, cursor.size);
+	}
+
+	r = sg_alloc_table(*sgt, num_entries, GFP_KERNEL);
+	if (r)
+		goto error_free;
+
+	/* Initialize scatterlist nodes of sg_table */
+	for_each_sgtable_sg((*sgt), sg, i)
+		sg->length = 0;
+
+	/*
+	 * Walk down DRM_BUDDY blocks to populate scatterlist nodes
+	 * @note: Use iterator api to get first the DRM_BUDDY block
+	 * and the number of bytes from it. Access the following
+	 * DRM_BUDDY block(s) if more buffer needs to exported
+	 */
+	xe_res_first(res, offset, length, &cursor);
+	for_each_sgtable_sg((*sgt), sg, i) {
+		phys_addr_t phys = cursor.start + tile->mem.vram.io_start;
+		size_t size = cursor.size;
+		dma_addr_t addr;
+
+		addr = dma_map_resource(dev, phys, size, dir,
+					DMA_ATTR_SKIP_CPU_SYNC);
+		r = dma_mapping_error(dev, addr);
+		if (r)
+			goto error_unmap;
+
+		sg_set_page(sg, NULL, size, 0);
+		sg_dma_address(sg) = addr;
+		sg_dma_len(sg) = size;
+
+		xe_res_next(&cursor, cursor.size);
+	}
+
+	return 0;
+
+error_unmap:
+	for_each_sgtable_sg((*sgt), sg, i) {
+		if (!sg->length)
+			continue;
+
+		dma_unmap_resource(dev, sg->dma_address,
+				   sg->length, dir,
+				   DMA_ATTR_SKIP_CPU_SYNC);
+	}
+	sg_free_table(*sgt);
+
+error_free:
+	kfree(*sgt);
+	return r;
+}
+
+void xe_ttm_vram_mgr_free_sgt(struct device *dev, enum dma_data_direction dir,
+			      struct sg_table *sgt)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sgtable_sg(sgt, sg, i)
+		dma_unmap_resource(dev, sg->dma_address,
+				   sg->length, dir,
+				   DMA_ATTR_SKIP_CPU_SYNC);
+	sg_free_table(sgt);
+	kfree(sgt);
+}
+
+u64 xe_ttm_vram_get_cpu_visible_size(struct ttm_resource_manager *man)
+{
+	struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
+
+	return mgr->visible_size;
+}
+
+void xe_ttm_vram_get_used(struct ttm_resource_manager *man,
+			  u64 *used, u64 *used_visible)
+{
+	struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
+
+	mutex_lock(&mgr->lock);
+	*used = mgr->mm.size - mgr->mm.avail;
+	*used_visible = mgr->visible_size - mgr->visible_avail;
+	mutex_unlock(&mgr->lock);
+}
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
new file mode 100644
index 000000000000..d184e19a9230
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_TTM_VRAM_MGR_H_
+#define _XE_TTM_VRAM_MGR_H_
+
+#include "xe_ttm_vram_mgr_types.h"
+
+enum dma_data_direction;
+struct xe_device;
+struct xe_tile;
+
+int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr,
+			   u32 mem_type, u64 size, u64 io_size,
+			   u64 default_page_size);
+int xe_ttm_vram_mgr_init(struct xe_tile *tile, struct xe_ttm_vram_mgr *mgr);
+int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
+			      struct ttm_resource *res,
+			      u64 offset, u64 length,
+			      struct device *dev,
+			      enum dma_data_direction dir,
+			      struct sg_table **sgt);
+void xe_ttm_vram_mgr_free_sgt(struct device *dev, enum dma_data_direction dir,
+			      struct sg_table *sgt);
+
+u64 xe_ttm_vram_get_cpu_visible_size(struct ttm_resource_manager *man);
+void xe_ttm_vram_get_used(struct ttm_resource_manager *man,
+			  u64 *used, u64 *used_visible);
+
+static inline struct xe_ttm_vram_mgr_resource *
+to_xe_ttm_vram_mgr_resource(struct ttm_resource *res)
+{
+	return container_of(res, struct xe_ttm_vram_mgr_resource, base);
+}
+
+static inline struct xe_ttm_vram_mgr *
+to_xe_ttm_vram_mgr(struct ttm_resource_manager *man)
+{
+	return container_of(man, struct xe_ttm_vram_mgr, manager);
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
new file mode 100644
index 000000000000..2d75cf126289
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_TTM_VRAM_MGR_TYPES_H_
+#define _XE_TTM_VRAM_MGR_TYPES_H_
+
+#include <drm/drm_buddy.h>
+#include <drm/ttm/ttm_device.h>
+
+struct xe_mem_region;
+
+/**
+ * struct xe_ttm_vram_mgr - XE TTM VRAM manager
+ *
+ * Manages placement of TTM resource in VRAM.
+ */
+struct xe_ttm_vram_mgr {
+	/** @manager: Base TTM resource manager */
+	struct ttm_resource_manager manager;
+	/** @mm: DRM buddy allocator which manages the VRAM */
+	struct drm_buddy mm;
+	/** @vram: ptr to details of associated VRAM region */
+	struct xe_mem_region *vram;
+	/** @visible_size: Proped size of the CPU visible portion */
+	u64 visible_size;
+	/** @visible_avail: CPU visible portion still unallocated */
+	u64 visible_avail;
+	/** @default_page_size: default page size */
+	u64 default_page_size;
+	/** @lock: protects allocations of VRAM */
+	struct mutex lock;
+	/** @mem_type: The TTM memory type */
+	u32 mem_type;
+};
+
+/**
+ * struct xe_ttm_vram_mgr_resource - XE TTM VRAM resource
+ */
+struct xe_ttm_vram_mgr_resource {
+	/** @base: Base TTM resource */
+	struct ttm_resource base;
+	/** @blocks: list of DRM buddy blocks */
+	struct list_head blocks;
+	/** @used_visible_size: How many CPU visible bytes this resource is using */
+	u64 used_visible_size;
+	/** @flags: flags associated with the resource */
+	unsigned long flags;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
new file mode 100644
index 000000000000..53ccd338fd8c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tuning.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_tuning.h"
+
+#include <kunit/visibility.h>
+
+#include "regs/xe_gt_regs.h"
+#include "xe_gt_types.h"
+#include "xe_platform_types.h"
+#include "xe_rtp.h"
+
+#undef XE_REG_MCR
+#define XE_REG_MCR(...)     XE_REG(__VA_ARGS__, .mcr = 1)
+
+static const struct xe_rtp_entry_sr gt_tunings[] = {
+	{ XE_RTP_NAME("Tuning: Blend Fill Caching Optimization Disable"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_ACTIONS(SET(XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS))
+	},
+	{ XE_RTP_NAME("Tuning: 32B Access Enable"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_ACTIONS(SET(XEHP_SQCM, EN_32B_ACCESS))
+	},
+
+	/* Xe2 */
+
+	{ XE_RTP_NAME("Tuning: L3 cache"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004)),
+	  XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
+				   REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
+	},
+	{ XE_RTP_NAME("Tuning: L3 cache - media"),
+	  XE_RTP_RULES(MEDIA_VERSION(2000)),
+	  XE_RTP_ACTIONS(FIELD_SET(XE2LPM_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
+				   REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
+	},
+
+	{}
+};
+
+static const struct xe_rtp_entry_sr engine_tunings[] = {
+	{ XE_RTP_NAME("Tuning: Set Indirect State Override"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1271),
+		       ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(SAMPLER_MODE, INDIRECT_STATE_BASE_ADDR_OVERRIDE))
+	},
+	{}
+};
+
+static const struct xe_rtp_entry_sr lrc_tunings[] = {
+	{ XE_RTP_NAME("Tuning: ganged timer, also known as 16011163337"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
+	  /* read verification is ignored due to 1608008084. */
+	  XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(FF_MODE2,
+						FF_MODE2_GS_TIMER_MASK,
+						FF_MODE2_GS_TIMER_224))
+	},
+
+	/* DG2 */
+
+	{ XE_RTP_NAME("Tuning: L3 cache"),
+	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
+				   REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
+	},
+	{ XE_RTP_NAME("Tuning: TDS gang timer"),
+	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
+	  /* read verification is ignored as in i915 - need to check enabling */
+	  XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(XEHP_FF_MODE2,
+						FF_MODE2_TDS_TIMER_MASK,
+						FF_MODE2_TDS_TIMER_128))
+	},
+	{ XE_RTP_NAME("Tuning: TBIMR fast clip"),
+	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(CHICKEN_RASTER_2, TBIMR_FAST_CLIP))
+	},
+
+	/* Xe_LPG */
+
+	{ XE_RTP_NAME("Tuning: L3 cache"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
+				   REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
+	},
+
+	{}
+};
+
+void xe_tuning_process_gt(struct xe_gt *gt)
+{
+	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt);
+
+	xe_rtp_process_to_sr(&ctx, gt_tunings, &gt->reg_sr);
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_gt);
+
+void xe_tuning_process_engine(struct xe_hw_engine *hwe)
+{
+	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
+
+	xe_rtp_process_to_sr(&ctx, engine_tunings, &hwe->reg_sr);
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_engine);
+
+/**
+ * xe_tuning_process_lrc - process lrc tunings
+ * @hwe: engine instance to process tunings for
+ *
+ * Process LRC table for this platform, saving in @hwe all the tunings that need
+ * to be applied on context restore. These are tunings touching registers that
+ * are part of the HW context image.
+ */
+void xe_tuning_process_lrc(struct xe_hw_engine *hwe)
+{
+	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
+
+	xe_rtp_process_to_sr(&ctx, lrc_tunings, &hwe->reg_lrc);
+}
diff --git a/drivers/gpu/drm/xe/xe_tuning.h b/drivers/gpu/drm/xe/xe_tuning.h
new file mode 100644
index 000000000000..4f9c3ac3b516
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tuning.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_TUNING_
+#define _XE_TUNING_
+
+struct xe_gt;
+struct xe_hw_engine;
+
+void xe_tuning_process_gt(struct xe_gt *gt);
+void xe_tuning_process_engine(struct xe_hw_engine *hwe);
+void xe_tuning_process_lrc(struct xe_hw_engine *hwe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
new file mode 100644
index 000000000000..25e1ddfd2f86
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -0,0 +1,258 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_uc.h"
+
+#include "xe_device.h"
+#include "xe_gsc.h"
+#include "xe_gt.h"
+#include "xe_guc.h"
+#include "xe_guc_pc.h"
+#include "xe_guc_submit.h"
+#include "xe_huc.h"
+#include "xe_uc_fw.h"
+#include "xe_wopcm.h"
+
+static struct xe_gt *
+uc_to_gt(struct xe_uc *uc)
+{
+	return container_of(uc, struct xe_gt, uc);
+}
+
+static struct xe_device *
+uc_to_xe(struct xe_uc *uc)
+{
+	return gt_to_xe(uc_to_gt(uc));
+}
+
+/* Should be called once at driver load only */
+int xe_uc_init(struct xe_uc *uc)
+{
+	int ret;
+
+	/*
+	 * We call the GuC/HuC/GSC init functions even if GuC submission is off
+	 * to correctly move our tracking of the FW state to "disabled".
+	 */
+
+	ret = xe_guc_init(&uc->guc);
+	if (ret)
+		goto err;
+
+	ret = xe_huc_init(&uc->huc);
+	if (ret)
+		goto err;
+
+	ret = xe_gsc_init(&uc->gsc);
+	if (ret)
+		goto err;
+
+	if (!xe_device_uc_enabled(uc_to_xe(uc)))
+		return 0;
+
+	ret = xe_wopcm_init(&uc->wopcm);
+	if (ret)
+		goto err;
+
+	ret = xe_guc_submit_init(&uc->guc);
+	if (ret)
+		goto err;
+
+	return 0;
+
+err:
+	return ret;
+}
+
+/**
+ * xe_uc_init_post_hwconfig - init Uc post hwconfig load
+ * @uc: The UC object
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_uc_init_post_hwconfig(struct xe_uc *uc)
+{
+	int err;
+
+	/* GuC submission not enabled, nothing to do */
+	if (!xe_device_uc_enabled(uc_to_xe(uc)))
+		return 0;
+
+	err = xe_uc_sanitize_reset(uc);
+	if (err)
+		return err;
+
+	err = xe_guc_init_post_hwconfig(&uc->guc);
+	if (err)
+		return err;
+
+	return xe_gsc_init_post_hwconfig(&uc->gsc);
+}
+
+static int uc_reset(struct xe_uc *uc)
+{
+	struct xe_device *xe = uc_to_xe(uc);
+	int ret;
+
+	ret = xe_guc_reset(&uc->guc);
+	if (ret) {
+		drm_err(&xe->drm, "Failed to reset GuC, ret = %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void xe_uc_sanitize(struct xe_uc *uc)
+{
+	xe_huc_sanitize(&uc->huc);
+	xe_guc_sanitize(&uc->guc);
+}
+
+int xe_uc_sanitize_reset(struct xe_uc *uc)
+{
+	xe_uc_sanitize(uc);
+
+	return uc_reset(uc);
+}
+
+/**
+ * xe_uc_init_hwconfig - minimally init Uc, read and parse hwconfig
+ * @uc: The UC object
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_uc_init_hwconfig(struct xe_uc *uc)
+{
+	int ret;
+
+	/* GuC submission not enabled, nothing to do */
+	if (!xe_device_uc_enabled(uc_to_xe(uc)))
+		return 0;
+
+	ret = xe_guc_min_load_for_hwconfig(&uc->guc);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+/*
+ * Should be called during driver load, after every GT reset, and after every
+ * suspend to reload / auth the firmwares.
+ */
+int xe_uc_init_hw(struct xe_uc *uc)
+{
+	int ret;
+
+	/* GuC submission not enabled, nothing to do */
+	if (!xe_device_uc_enabled(uc_to_xe(uc)))
+		return 0;
+
+	ret = xe_huc_upload(&uc->huc);
+	if (ret)
+		return ret;
+
+	ret = xe_guc_upload(&uc->guc);
+	if (ret)
+		return ret;
+
+	ret = xe_guc_enable_communication(&uc->guc);
+	if (ret)
+		return ret;
+
+	ret = xe_gt_record_default_lrcs(uc_to_gt(uc));
+	if (ret)
+		return ret;
+
+	ret = xe_guc_post_load_init(&uc->guc);
+	if (ret)
+		return ret;
+
+	ret = xe_guc_pc_start(&uc->guc.pc);
+	if (ret)
+		return ret;
+
+	/* We don't fail the driver load if HuC fails to auth, but let's warn */
+	ret = xe_huc_auth(&uc->huc, XE_HUC_AUTH_VIA_GUC);
+	xe_gt_assert(uc_to_gt(uc), !ret);
+
+	/* GSC load is async */
+	xe_gsc_load_start(&uc->gsc);
+
+	return 0;
+}
+
+int xe_uc_fini_hw(struct xe_uc *uc)
+{
+	return xe_uc_sanitize_reset(uc);
+}
+
+int xe_uc_reset_prepare(struct xe_uc *uc)
+{
+	/* GuC submission not enabled, nothing to do */
+	if (!xe_device_uc_enabled(uc_to_xe(uc)))
+		return 0;
+
+	return xe_guc_reset_prepare(&uc->guc);
+}
+
+void xe_uc_gucrc_disable(struct xe_uc *uc)
+{
+	XE_WARN_ON(xe_guc_pc_gucrc_disable(&uc->guc.pc));
+}
+
+void xe_uc_stop_prepare(struct xe_uc *uc)
+{
+	xe_gsc_wait_for_worker_completion(&uc->gsc);
+	xe_guc_stop_prepare(&uc->guc);
+}
+
+int xe_uc_stop(struct xe_uc *uc)
+{
+	/* GuC submission not enabled, nothing to do */
+	if (!xe_device_uc_enabled(uc_to_xe(uc)))
+		return 0;
+
+	return xe_guc_stop(&uc->guc);
+}
+
+int xe_uc_start(struct xe_uc *uc)
+{
+	/* GuC submission not enabled, nothing to do */
+	if (!xe_device_uc_enabled(uc_to_xe(uc)))
+		return 0;
+
+	return xe_guc_start(&uc->guc);
+}
+
+static void uc_reset_wait(struct xe_uc *uc)
+{
+	int ret;
+
+again:
+	xe_guc_reset_wait(&uc->guc);
+
+	ret = xe_uc_reset_prepare(uc);
+	if (ret)
+		goto again;
+}
+
+int xe_uc_suspend(struct xe_uc *uc)
+{
+	int ret;
+
+	/* GuC submission not enabled, nothing to do */
+	if (!xe_device_uc_enabled(uc_to_xe(uc)))
+		return 0;
+
+	uc_reset_wait(uc);
+
+	ret = xe_uc_stop(uc);
+	if (ret)
+		return ret;
+
+	return xe_guc_suspend(&uc->guc);
+}
diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h
new file mode 100644
index 000000000000..5d5110c0c834
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_UC_H_
+#define _XE_UC_H_
+
+#include "xe_uc_types.h"
+
+int xe_uc_init(struct xe_uc *uc);
+int xe_uc_init_hwconfig(struct xe_uc *uc);
+int xe_uc_init_post_hwconfig(struct xe_uc *uc);
+int xe_uc_init_hw(struct xe_uc *uc);
+int xe_uc_fini_hw(struct xe_uc *uc);
+void xe_uc_gucrc_disable(struct xe_uc *uc);
+int xe_uc_reset_prepare(struct xe_uc *uc);
+void xe_uc_stop_prepare(struct xe_uc *uc);
+int xe_uc_stop(struct xe_uc *uc);
+int xe_uc_start(struct xe_uc *uc);
+int xe_uc_suspend(struct xe_uc *uc);
+int xe_uc_sanitize_reset(struct xe_uc *uc);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_uc_debugfs.c b/drivers/gpu/drm/xe/xe_uc_debugfs.c
new file mode 100644
index 000000000000..0a39ec5a6e99
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc_debugfs.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/drm_debugfs.h>
+
+#include "xe_gt.h"
+#include "xe_guc_debugfs.h"
+#include "xe_huc_debugfs.h"
+#include "xe_macros.h"
+#include "xe_uc_debugfs.h"
+
+void xe_uc_debugfs_register(struct xe_uc *uc, struct dentry *parent)
+{
+	struct dentry *root;
+
+	root = debugfs_create_dir("uc", parent);
+	if (IS_ERR(root)) {
+		XE_WARN_ON("Create UC directory failed");
+		return;
+	}
+
+	xe_guc_debugfs_register(&uc->guc, root);
+	xe_huc_debugfs_register(&uc->huc, root);
+}
diff --git a/drivers/gpu/drm/xe/xe_uc_debugfs.h b/drivers/gpu/drm/xe/xe_uc_debugfs.h
new file mode 100644
index 000000000000..a13382df2bd7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc_debugfs.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_UC_DEBUGFS_H_
+#define _XE_UC_DEBUGFS_H_
+
+struct dentry;
+struct xe_uc;
+
+void xe_uc_debugfs_register(struct xe_uc *uc, struct dentry *parent);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
new file mode 100644
index 000000000000..9dff96dfe455
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -0,0 +1,882 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/bitfield.h>
+#include <linux/firmware.h>
+
+#include <drm/drm_managed.h>
+
+#include "regs/xe_guc_regs.h"
+#include "xe_bo.h"
+#include "xe_device_types.h"
+#include "xe_force_wake.h"
+#include "xe_gsc.h"
+#include "xe_gt.h"
+#include "xe_map.h"
+#include "xe_mmio.h"
+#include "xe_module.h"
+#include "xe_uc_fw.h"
+
+/*
+ * List of required GuC and HuC binaries per-platform. They must be ordered
+ * based on platform, from newer to older.
+ *
+ * Versioning follows the guidelines from
+ * Documentation/driver-api/firmware/firmware-usage-guidelines.rst. There is a
+ * distinction for platforms being officially supported by the driver or not.
+ * Platforms not available publicly or not yet officially supported by the
+ * driver (under force-probe), use the mmp_ver(): the firmware autoselect logic
+ * will select the firmware from disk with filename that matches the full
+ * "mpp version", i.e. major.minor.patch. mmp_ver() should only be used for
+ * this case.
+ *
+ * For platforms officially supported by the driver, the filename always only
+ * ever contains the major version (GuC) or no version at all (HuC).
+ *
+ * After loading the file, the driver parses the versions embedded in the blob.
+ * The major version needs to match a major version supported by the driver (if
+ * any). The minor version is also checked and a notice emitted to the log if
+ * the version found is smaller than the version wanted. This is done only for
+ * informational purposes so users may have a chance to upgrade, but the driver
+ * still loads and use the older firmware.
+ *
+ * Examples:
+ *
+ *	1) Platform officially supported by i915 - using Tigerlake as example.
+ *	   Driver loads the following firmware blobs from disk:
+ *
+ *		- i915/tgl_guc_<major>.bin
+ *		- i915/tgl_huc.bin
+ *
+ *	   <major> number for GuC is checked that it matches the version inside
+ *	   the blob. <minor> version is checked and if smaller than the expected
+ *	   an info message is emitted about that.
+ *
+ *	1) XE_<FUTUREINTELPLATFORM>, still under require_force_probe. Using
+ *	   "wipplat" as a short-name. Driver loads the following firmware blobs
+ *	   from disk:
+ *
+ *		- xe/wipplat_guc_<major>.<minor>.<patch>.bin
+ *		- xe/wipplat_huc_<major>.<minor>.<patch>.bin
+ *
+ *	   <major> and <minor> are checked that they match the version inside
+ *	   the blob. Both of them need to match exactly what the driver is
+ *	   expecting, otherwise it fails.
+ *
+ *	3) Platform officially supported by xe and out of force-probe. Using
+ *	   "plat" as a short-name. Except for the different directory, the
+ *	   behavior is the same as (1). Driver loads the following firmware
+ *	   blobs from disk:
+ *
+ *		- xe/plat_guc_<major>.bin
+ *		- xe/plat_huc.bin
+ *
+ *	   <major> number for GuC is checked that it matches the version inside
+ *	   the blob. <minor> version is checked and if smaller than the expected
+ *	   an info message is emitted about that.
+ *
+ * For the platforms already released with a major version, they should never be
+ * removed from the table. Instead new entries with newer versions may be added
+ * before them, so they take precedence.
+ *
+ * TODO: Currently there's no fallback on major version. That's because xe
+ * driver only supports the one major version of each firmware in the table.
+ * This needs to be fixed when the major version of GuC is updated.
+ */
+
+struct uc_fw_entry {
+	enum xe_platform platform;
+	struct {
+		const char *path;
+		u16 major;
+		u16 minor;
+		bool full_ver_required;
+	};
+};
+
+struct fw_blobs_by_type {
+	const struct uc_fw_entry *entries;
+	u32 count;
+};
+
+#define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver)			\
+	fw_def(METEORLAKE,	major_ver(i915,	guc,	mtl,	70, 7))		\
+	fw_def(DG2,		major_ver(i915,	guc,	dg2,	70, 5))		\
+	fw_def(DG1,		major_ver(i915,	guc,	dg1,	70, 5))		\
+	fw_def(ALDERLAKE_N,	major_ver(i915,	guc,	tgl,	70, 5))		\
+	fw_def(ALDERLAKE_P,	major_ver(i915,	guc,	adlp,	70, 5))		\
+	fw_def(ALDERLAKE_S,	major_ver(i915,	guc,	tgl,	70, 5))		\
+	fw_def(ROCKETLAKE,	major_ver(i915,	guc,	tgl,	70, 5))		\
+	fw_def(TIGERLAKE,	major_ver(i915,	guc,	tgl,	70, 5))
+
+#define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver)		\
+	fw_def(METEORLAKE,	no_ver(i915,	huc_gsc,	mtl))		\
+	fw_def(DG1,		no_ver(i915,	huc,		dg1))		\
+	fw_def(ALDERLAKE_P,	no_ver(i915,	huc,		tgl))		\
+	fw_def(ALDERLAKE_S,	no_ver(i915,	huc,		tgl))		\
+	fw_def(ROCKETLAKE,	no_ver(i915,	huc,		tgl))		\
+	fw_def(TIGERLAKE,	no_ver(i915,	huc,		tgl))
+
+/* for the GSC FW we match the compatibility version and not the release one */
+#define XE_GSC_FIRMWARE_DEFS(fw_def, major_ver)		\
+	fw_def(METEORLAKE,	major_ver(i915,	gsc,	mtl,	1, 0))
+
+#define MAKE_FW_PATH(dir__, uc__, shortname__, version__)			\
+	__stringify(dir__) "/" __stringify(shortname__) "_" __stringify(uc__) version__ ".bin"
+
+#define fw_filename_mmp_ver(dir_, uc_, shortname_, a, b, c)			\
+	MAKE_FW_PATH(dir_, uc_, shortname_, "_" __stringify(a ## . ## b ## . ## c))
+#define fw_filename_major_ver(dir_, uc_, shortname_, a, b)			\
+	MAKE_FW_PATH(dir_, uc_, shortname_, "_" __stringify(a))
+#define fw_filename_no_ver(dir_, uc_, shortname_)				\
+	MAKE_FW_PATH(dir_, uc_, shortname_, "")
+
+#define uc_fw_entry_mmp_ver(dir_, uc_, shortname_, a, b, c)			\
+	{ fw_filename_mmp_ver(dir_, uc_, shortname_, a, b, c),			\
+	  a, b, true }
+#define uc_fw_entry_major_ver(dir_, uc_, shortname_, a, b)			\
+	{ fw_filename_major_ver(dir_, uc_, shortname_, a, b),			\
+	  a, b }
+#define uc_fw_entry_no_ver(dir_, uc_, shortname_)				\
+	{ fw_filename_no_ver(dir_, uc_, shortname_),				\
+	  0, 0 }
+
+/* All blobs need to be declared via MODULE_FIRMWARE() */
+#define XE_UC_MODULE_FIRMWARE(platform__, fw_filename)				\
+	MODULE_FIRMWARE(fw_filename);
+
+#define XE_UC_FW_ENTRY(platform__, entry__)					\
+	{									\
+		.platform = XE_ ## platform__,					\
+		entry__,							\
+	},
+
+XE_GUC_FIRMWARE_DEFS(XE_UC_MODULE_FIRMWARE,
+		     fw_filename_mmp_ver, fw_filename_major_ver)
+XE_HUC_FIRMWARE_DEFS(XE_UC_MODULE_FIRMWARE,
+		     fw_filename_mmp_ver, fw_filename_no_ver)
+XE_GSC_FIRMWARE_DEFS(XE_UC_MODULE_FIRMWARE, fw_filename_major_ver)
+
+static struct xe_gt *
+__uc_fw_to_gt(struct xe_uc_fw *uc_fw, enum xe_uc_fw_type type)
+{
+	XE_WARN_ON(type >= XE_UC_FW_NUM_TYPES);
+
+	switch (type) {
+	case XE_UC_FW_TYPE_GUC:
+		return container_of(uc_fw, struct xe_gt, uc.guc.fw);
+	case XE_UC_FW_TYPE_HUC:
+		return container_of(uc_fw, struct xe_gt, uc.huc.fw);
+	case XE_UC_FW_TYPE_GSC:
+		return container_of(uc_fw, struct xe_gt, uc.gsc.fw);
+	default:
+		return NULL;
+	}
+}
+
+static struct xe_gt *uc_fw_to_gt(struct xe_uc_fw *uc_fw)
+{
+	return __uc_fw_to_gt(uc_fw, uc_fw->type);
+}
+
+static struct xe_device *uc_fw_to_xe(struct xe_uc_fw *uc_fw)
+{
+	return gt_to_xe(uc_fw_to_gt(uc_fw));
+}
+
+static void
+uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw)
+{
+	static const struct uc_fw_entry entries_guc[] = {
+		XE_GUC_FIRMWARE_DEFS(XE_UC_FW_ENTRY,
+				     uc_fw_entry_mmp_ver,
+				     uc_fw_entry_major_ver)
+	};
+	static const struct uc_fw_entry entries_huc[] = {
+		XE_HUC_FIRMWARE_DEFS(XE_UC_FW_ENTRY,
+				     uc_fw_entry_mmp_ver,
+				     uc_fw_entry_no_ver)
+	};
+	static const struct uc_fw_entry entries_gsc[] = {
+		XE_GSC_FIRMWARE_DEFS(XE_UC_FW_ENTRY, uc_fw_entry_major_ver)
+	};
+	static const struct fw_blobs_by_type blobs_all[XE_UC_FW_NUM_TYPES] = {
+		[XE_UC_FW_TYPE_GUC] = { entries_guc, ARRAY_SIZE(entries_guc) },
+		[XE_UC_FW_TYPE_HUC] = { entries_huc, ARRAY_SIZE(entries_huc) },
+		[XE_UC_FW_TYPE_GSC] = { entries_gsc, ARRAY_SIZE(entries_gsc) },
+	};
+	static const struct uc_fw_entry *entries;
+	enum xe_platform p = xe->info.platform;
+	u32 count;
+	int i;
+
+	xe_assert(xe, uc_fw->type < ARRAY_SIZE(blobs_all));
+	entries = blobs_all[uc_fw->type].entries;
+	count = blobs_all[uc_fw->type].count;
+
+	for (i = 0; i < count && p <= entries[i].platform; i++) {
+		if (p == entries[i].platform) {
+			uc_fw->path = entries[i].path;
+			uc_fw->versions.wanted.major = entries[i].major;
+			uc_fw->versions.wanted.minor = entries[i].minor;
+			uc_fw->full_ver_required = entries[i].full_ver_required;
+
+			if (uc_fw->type == XE_UC_FW_TYPE_GSC)
+				uc_fw->versions.wanted_type = XE_UC_FW_VER_COMPATIBILITY;
+			else
+				uc_fw->versions.wanted_type = XE_UC_FW_VER_RELEASE;
+
+			break;
+		}
+	}
+}
+
+static void
+uc_fw_override(struct xe_uc_fw *uc_fw)
+{
+	char *path_override = NULL;
+
+	/* empty string disables, but it's not allowed for GuC */
+	switch (uc_fw->type) {
+	case XE_UC_FW_TYPE_GUC:
+		if (xe_modparam.guc_firmware_path && *xe_modparam.guc_firmware_path)
+			path_override = xe_modparam.guc_firmware_path;
+		break;
+	case XE_UC_FW_TYPE_HUC:
+		path_override = xe_modparam.huc_firmware_path;
+		break;
+	case XE_UC_FW_TYPE_GSC:
+		path_override = xe_modparam.gsc_firmware_path;
+		break;
+	default:
+		break;
+	}
+
+	if (path_override) {
+		uc_fw->path = path_override;
+		uc_fw->user_overridden = true;
+	}
+}
+
+/**
+ * xe_uc_fw_copy_rsa - copy fw RSA to buffer
+ *
+ * @uc_fw: uC firmware
+ * @dst: dst buffer
+ * @max_len: max number of bytes to copy
+ *
+ * Return: number of copied bytes.
+ */
+size_t xe_uc_fw_copy_rsa(struct xe_uc_fw *uc_fw, void *dst, u32 max_len)
+{
+	struct xe_device *xe = uc_fw_to_xe(uc_fw);
+	u32 size = min_t(u32, uc_fw->rsa_size, max_len);
+
+	xe_assert(xe, !(size % 4));
+	xe_assert(xe, xe_uc_fw_is_available(uc_fw));
+
+	xe_map_memcpy_from(xe, dst, &uc_fw->bo->vmap,
+			   xe_uc_fw_rsa_offset(uc_fw), size);
+
+	return size;
+}
+
+static void uc_fw_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_uc_fw *uc_fw = arg;
+
+	if (!xe_uc_fw_is_available(uc_fw))
+		return;
+
+	xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_SELECTED);
+}
+
+static void guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css)
+{
+	struct xe_gt *gt = uc_fw_to_gt(uc_fw);
+	struct xe_uc_fw_version *release = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE];
+	struct xe_uc_fw_version *compatibility = &uc_fw->versions.found[XE_UC_FW_VER_COMPATIBILITY];
+
+	xe_gt_assert(gt, uc_fw->type == XE_UC_FW_TYPE_GUC);
+	xe_gt_assert(gt, release->major >= 70);
+
+	if (release->major > 70 || release->minor >= 6) {
+		/* v70.6.0 adds CSS header support */
+		compatibility->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR,
+						 css->submission_version);
+		compatibility->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR,
+						 css->submission_version);
+		compatibility->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH,
+						 css->submission_version);
+	} else if (release->minor >= 3) {
+		/* v70.3.0 introduced v1.1.0 */
+		compatibility->major = 1;
+		compatibility->minor = 1;
+		compatibility->patch = 0;
+	} else {
+		/* v70.0.0 introduced v1.0.0 */
+		compatibility->major = 1;
+		compatibility->minor = 0;
+		compatibility->patch = 0;
+	}
+
+	uc_fw->private_data_size = css->private_data_size;
+}
+
+int xe_uc_fw_check_version_requirements(struct xe_uc_fw *uc_fw)
+{
+	struct xe_device *xe = uc_fw_to_xe(uc_fw);
+	struct xe_uc_fw_version *wanted = &uc_fw->versions.wanted;
+	struct xe_uc_fw_version *found = &uc_fw->versions.found[uc_fw->versions.wanted_type];
+
+	/* Driver has no requirement on any version, any is good. */
+	if (!wanted->major)
+		return 0;
+
+	/*
+	 * If full version is required, both major and minor should match.
+	 * Otherwise, at least the major version.
+	 */
+	if (wanted->major != found->major ||
+	    (uc_fw->full_ver_required && wanted->minor != found->minor)) {
+		drm_notice(&xe->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n",
+			   xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+			   found->major, found->minor,
+			   wanted->major, wanted->minor);
+		goto fail;
+	}
+
+	if (wanted->minor > found->minor) {
+		drm_notice(&xe->drm, "%s firmware (%u.%u) is recommended, but only (%u.%u) was found in %s\n",
+			   xe_uc_fw_type_repr(uc_fw->type),
+			   wanted->major, wanted->minor,
+			   found->major, found->minor,
+			   uc_fw->path);
+		drm_info(&xe->drm, "Consider updating your linux-firmware pkg or downloading from %s\n",
+			 XE_UC_FIRMWARE_URL);
+	}
+
+	return 0;
+
+fail:
+	if (xe_uc_fw_is_overridden(uc_fw))
+		return 0;
+
+	return -ENOEXEC;
+}
+
+/* Refer to the "CSS-based Firmware Layout" documentation entry for details */
+static int parse_css_header(struct xe_uc_fw *uc_fw, const void *fw_data, size_t fw_size)
+{
+	struct xe_device *xe = uc_fw_to_xe(uc_fw);
+	struct xe_uc_fw_version *release = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE];
+	struct uc_css_header *css;
+	size_t size;
+
+	/* Check the size of the blob before examining buffer contents */
+	if (unlikely(fw_size < sizeof(struct uc_css_header))) {
+		drm_warn(&xe->drm, "%s firmware %s: invalid size: %zu < %zu\n",
+			 xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+			 fw_size, sizeof(struct uc_css_header));
+		return -ENODATA;
+	}
+
+	css = (struct uc_css_header *)fw_data;
+
+	/* Check integrity of size values inside CSS header */
+	size = (css->header_size_dw - css->key_size_dw - css->modulus_size_dw -
+		css->exponent_size_dw) * sizeof(u32);
+	if (unlikely(size != sizeof(struct uc_css_header))) {
+		drm_warn(&xe->drm,
+			 "%s firmware %s: unexpected header size: %zu != %zu\n",
+			 xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+			 fw_size, sizeof(struct uc_css_header));
+		return -EPROTO;
+	}
+
+	/* uCode size must calculated from other sizes */
+	uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32);
+
+	/* now RSA */
+	uc_fw->rsa_size = css->key_size_dw * sizeof(u32);
+
+	/* At least, it should have header, uCode and RSA. Size of all three. */
+	size = sizeof(struct uc_css_header) + uc_fw->ucode_size +
+		uc_fw->rsa_size;
+	if (unlikely(fw_size < size)) {
+		drm_warn(&xe->drm, "%s firmware %s: invalid size: %zu < %zu\n",
+			 xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+			 fw_size, size);
+		return -ENOEXEC;
+	}
+
+	/* Get version numbers from the CSS header */
+	release->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css->sw_version);
+	release->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->sw_version);
+	release->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->sw_version);
+
+	if (uc_fw->type == XE_UC_FW_TYPE_GUC)
+		guc_read_css_info(uc_fw, css);
+
+	return 0;
+}
+
+static bool is_cpd_header(const void *data)
+{
+	const u32 *marker = data;
+
+	return *marker == GSC_CPD_HEADER_MARKER;
+}
+
+static u32 entry_offset(const struct gsc_cpd_header_v2 *header, const char *name)
+{
+	const struct gsc_cpd_entry *entry;
+	int i;
+
+	entry = (void *)header + header->header_length;
+
+	for (i = 0; i < header->num_of_entries; i++, entry++)
+		if (strcmp(entry->name, name) == 0)
+			return entry->offset & GSC_CPD_ENTRY_OFFSET_MASK;
+
+	return 0;
+}
+
+/* Refer to the "GSC-based Firmware Layout" documentation entry for details */
+static int parse_cpd_header(struct xe_uc_fw *uc_fw, const void *data, size_t size,
+			    const char *manifest_entry, const char *css_entry)
+{
+	struct xe_gt *gt = uc_fw_to_gt(uc_fw);
+	struct xe_device *xe = gt_to_xe(gt);
+	const struct gsc_cpd_header_v2 *header = data;
+	struct xe_uc_fw_version *release = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE];
+	const struct gsc_manifest_header *manifest;
+	size_t min_size = sizeof(*header);
+	u32 offset;
+
+	/* manifest_entry is mandatory, css_entry is optional */
+	xe_assert(xe, manifest_entry);
+
+	if (size < min_size || !is_cpd_header(header))
+		return -ENOENT;
+
+	if (header->header_length < sizeof(struct gsc_cpd_header_v2)) {
+		xe_gt_err(gt, "invalid CPD header length %u!\n", header->header_length);
+		return -EINVAL;
+	}
+
+	min_size = header->header_length + sizeof(struct gsc_cpd_entry) * header->num_of_entries;
+	if (size < min_size) {
+		xe_gt_err(gt, "FW too small! %zu < %zu\n", size, min_size);
+		return -ENODATA;
+	}
+
+	/* Look for the manifest first */
+	offset = entry_offset(header, manifest_entry);
+	if (!offset) {
+		xe_gt_err(gt, "Failed to find %s manifest!\n",
+			  xe_uc_fw_type_repr(uc_fw->type));
+		return -ENODATA;
+	}
+
+	min_size = offset + sizeof(struct gsc_manifest_header);
+	if (size < min_size) {
+		xe_gt_err(gt, "FW too small! %zu < %zu\n", size, min_size);
+		return -ENODATA;
+	}
+
+	manifest = data + offset;
+
+	release->major = manifest->fw_version.major;
+	release->minor = manifest->fw_version.minor;
+	release->patch = manifest->fw_version.hotfix;
+
+	if (uc_fw->type == XE_UC_FW_TYPE_GSC) {
+		struct xe_gsc *gsc = container_of(uc_fw, struct xe_gsc, fw);
+
+		release->build = manifest->fw_version.build;
+		gsc->security_version = manifest->security_version;
+	}
+
+	/* then optionally look for the css header */
+	if (css_entry) {
+		int ret;
+
+		/*
+		 * This section does not contain a CSS entry on DG2. We
+		 * don't support DG2 HuC right now, so no need to handle
+		 * it, just add a reminder in case that changes.
+		 */
+		xe_assert(xe, xe->info.platform != XE_DG2);
+
+		offset = entry_offset(header, css_entry);
+
+		/* the CSS header parser will check that the CSS header fits */
+		if (offset > size) {
+			xe_gt_err(gt, "FW too small! %zu < %u\n", size, offset);
+			return -ENODATA;
+		}
+
+		ret = parse_css_header(uc_fw, data + offset, size - offset);
+		if (ret)
+			return ret;
+
+		uc_fw->css_offset = offset;
+	}
+
+	uc_fw->has_gsc_headers = true;
+
+	return 0;
+}
+
+static int parse_gsc_layout(struct xe_uc_fw *uc_fw, const void *data, size_t size)
+{
+	struct xe_gt *gt = uc_fw_to_gt(uc_fw);
+	const struct gsc_layout_pointers *layout = data;
+	const struct gsc_bpdt_header *bpdt_header = NULL;
+	const struct gsc_bpdt_entry *bpdt_entry = NULL;
+	size_t min_size = sizeof(*layout);
+	int i;
+
+	if (size < min_size) {
+		xe_gt_err(gt, "GSC FW too small! %zu < %zu\n", size, min_size);
+		return -ENODATA;
+	}
+
+	min_size = layout->boot1.offset + layout->boot1.size;
+	if (size < min_size) {
+		xe_gt_err(gt, "GSC FW too small for boot section! %zu < %zu\n",
+			  size, min_size);
+		return -ENODATA;
+	}
+
+	min_size = sizeof(*bpdt_header);
+	if (layout->boot1.size < min_size) {
+		xe_gt_err(gt, "GSC FW boot section too small for BPDT header: %u < %zu\n",
+			  layout->boot1.size, min_size);
+		return -ENODATA;
+	}
+
+	bpdt_header = data + layout->boot1.offset;
+	if (bpdt_header->signature != GSC_BPDT_HEADER_SIGNATURE) {
+		xe_gt_err(gt, "invalid signature for BPDT header: 0x%08x!\n",
+			  bpdt_header->signature);
+		return -EINVAL;
+	}
+
+	min_size += sizeof(*bpdt_entry) * bpdt_header->descriptor_count;
+	if (layout->boot1.size < min_size) {
+		xe_gt_err(gt, "GSC FW boot section too small for BPDT entries: %u < %zu\n",
+			  layout->boot1.size, min_size);
+		return -ENODATA;
+	}
+
+	bpdt_entry = (void *)bpdt_header + sizeof(*bpdt_header);
+	for (i = 0; i < bpdt_header->descriptor_count; i++, bpdt_entry++) {
+		if ((bpdt_entry->type & GSC_BPDT_ENTRY_TYPE_MASK) !=
+		    GSC_BPDT_ENTRY_TYPE_GSC_RBE)
+			continue;
+
+		min_size = bpdt_entry->sub_partition_offset;
+
+		/* the CPD header parser will check that the CPD header fits */
+		if (layout->boot1.size < min_size) {
+			xe_gt_err(gt, "GSC FW boot section too small for CPD offset: %u < %zu\n",
+				  layout->boot1.size, min_size);
+			return -ENODATA;
+		}
+
+		return parse_cpd_header(uc_fw,
+					(void *)bpdt_header + min_size,
+					layout->boot1.size - min_size,
+					"RBEP.man", NULL);
+	}
+
+	xe_gt_err(gt, "couldn't find CPD header in GSC binary!\n");
+	return -ENODATA;
+}
+
+static int parse_headers(struct xe_uc_fw *uc_fw, const struct firmware *fw)
+{
+	int ret;
+
+	/*
+	 * All GuC releases and older HuC ones use CSS headers, while newer HuC
+	 * releases use GSC CPD headers.
+	 */
+	switch (uc_fw->type) {
+	case XE_UC_FW_TYPE_GSC:
+		return parse_gsc_layout(uc_fw, fw->data, fw->size);
+	case XE_UC_FW_TYPE_HUC:
+		ret = parse_cpd_header(uc_fw, fw->data, fw->size, "HUCP.man", "huc_fw");
+		if (!ret || ret != -ENOENT)
+			return ret;
+		fallthrough;
+	case XE_UC_FW_TYPE_GUC:
+		return parse_css_header(uc_fw, fw->data, fw->size);
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+#define print_uc_fw_version(p_, version_, prefix_, ...) \
+do { \
+	struct xe_uc_fw_version *ver_ = (version_); \
+	if (ver_->build) \
+		drm_printf(p_, prefix_ " version %u.%u.%u.%u\n", ##__VA_ARGS__, \
+			   ver_->major, ver_->minor, \
+			   ver_->patch, ver_->build); \
+	else \
+		drm_printf(p_, prefix_ " version %u.%u.%u\n", ##__VA_ARGS__, \
+			  ver_->major, ver_->minor, ver_->patch); \
+} while (0)
+
+static int uc_fw_request(struct xe_uc_fw *uc_fw, const struct firmware **firmware_p)
+{
+	struct xe_device *xe = uc_fw_to_xe(uc_fw);
+	struct device *dev = xe->drm.dev;
+	struct drm_printer p = drm_info_printer(dev);
+	const struct firmware *fw = NULL;
+	int err;
+
+	/*
+	 * we use FIRMWARE_UNINITIALIZED to detect checks against uc_fw->status
+	 * before we're looked at the HW caps to see if we have uc support
+	 */
+	BUILD_BUG_ON(XE_UC_FIRMWARE_UNINITIALIZED);
+	xe_assert(xe, !uc_fw->status);
+	xe_assert(xe, !uc_fw->path);
+
+	uc_fw_auto_select(xe, uc_fw);
+	xe_uc_fw_change_status(uc_fw, uc_fw->path ?
+			       XE_UC_FIRMWARE_SELECTED :
+			       XE_UC_FIRMWARE_NOT_SUPPORTED);
+
+	if (!xe_uc_fw_is_supported(uc_fw))
+		return 0;
+
+	uc_fw_override(uc_fw);
+
+	/* an empty path means the firmware is disabled */
+	if (!xe_device_uc_enabled(xe) || !(*uc_fw->path)) {
+		xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_DISABLED);
+		drm_dbg(&xe->drm, "%s disabled", xe_uc_fw_type_repr(uc_fw->type));
+		return 0;
+	}
+
+	err = request_firmware(&fw, uc_fw->path, dev);
+	if (err)
+		goto fail;
+
+	err = parse_headers(uc_fw, fw);
+	if (err)
+		goto fail;
+
+	print_uc_fw_version(&p,
+			    &uc_fw->versions.found[XE_UC_FW_VER_RELEASE],
+			    "Using %s firmware from %s",
+			    xe_uc_fw_type_repr(uc_fw->type), uc_fw->path);
+
+	/* for GSC FW we want the compatibility version, which we query after load */
+	if (uc_fw->type != XE_UC_FW_TYPE_GSC) {
+		err = xe_uc_fw_check_version_requirements(uc_fw);
+		if (err)
+			goto fail;
+	}
+
+	*firmware_p = fw;
+
+	return 0;
+
+fail:
+	xe_uc_fw_change_status(uc_fw, err == -ENOENT ?
+			       XE_UC_FIRMWARE_MISSING :
+			       XE_UC_FIRMWARE_ERROR);
+
+	drm_notice(&xe->drm, "%s firmware %s: fetch failed with error %d\n",
+		   xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, err);
+	drm_info(&xe->drm, "%s firmware(s) can be downloaded from %s\n",
+		 xe_uc_fw_type_repr(uc_fw->type), XE_UC_FIRMWARE_URL);
+
+	release_firmware(fw);		/* OK even if fw is NULL */
+
+	return err;
+}
+
+static void uc_fw_release(const struct firmware *fw)
+{
+	release_firmware(fw);
+}
+
+static int uc_fw_copy(struct xe_uc_fw *uc_fw, const void *data, size_t size, u32 flags)
+{
+	struct xe_device *xe = uc_fw_to_xe(uc_fw);
+	struct xe_gt *gt = uc_fw_to_gt(uc_fw);
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_bo *obj;
+	int err;
+
+	obj = xe_managed_bo_create_from_data(xe, tile, data, size, flags);
+	if (IS_ERR(obj)) {
+		drm_notice(&xe->drm, "%s firmware %s: failed to create / populate bo",
+			   xe_uc_fw_type_repr(uc_fw->type), uc_fw->path);
+		err = PTR_ERR(obj);
+		goto fail;
+	}
+
+	uc_fw->bo = obj;
+	uc_fw->size = size;
+
+	xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_AVAILABLE);
+
+	err = drmm_add_action_or_reset(&xe->drm, uc_fw_fini, uc_fw);
+	if (err)
+		goto fail;
+
+	return 0;
+
+fail:
+	xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_ERROR);
+	drm_notice(&xe->drm, "%s firmware %s: copy failed with error %d\n",
+		   xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, err);
+
+	return err;
+}
+
+int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
+{
+	const struct firmware *fw = NULL;
+	int err;
+
+	err = uc_fw_request(uc_fw, &fw);
+	if (err)
+		return err;
+
+	/* no error and no firmware means nothing to copy */
+	if (!fw)
+		return 0;
+
+	err = uc_fw_copy(uc_fw, fw->data, fw->size,
+			 XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_GGTT_BIT);
+
+	uc_fw_release(fw);
+
+	return err;
+}
+
+static u32 uc_fw_ggtt_offset(struct xe_uc_fw *uc_fw)
+{
+	return xe_bo_ggtt_addr(uc_fw->bo);
+}
+
+static int uc_fw_xfer(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags)
+{
+	struct xe_device *xe = uc_fw_to_xe(uc_fw);
+	struct xe_gt *gt = uc_fw_to_gt(uc_fw);
+	u32 src_offset, dma_ctrl;
+	int ret;
+
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+
+	/* Set the source address for the uCode */
+	src_offset = uc_fw_ggtt_offset(uc_fw) + uc_fw->css_offset;
+	xe_mmio_write32(gt, DMA_ADDR_0_LOW, lower_32_bits(src_offset));
+	xe_mmio_write32(gt, DMA_ADDR_0_HIGH,
+			upper_32_bits(src_offset) | DMA_ADDRESS_SPACE_GGTT);
+
+	/* Set the DMA destination */
+	xe_mmio_write32(gt, DMA_ADDR_1_LOW, offset);
+	xe_mmio_write32(gt, DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM);
+
+	/*
+	 * Set the transfer size. The header plus uCode will be copied to WOPCM
+	 * via DMA, excluding any other components
+	 */
+	xe_mmio_write32(gt, DMA_COPY_SIZE,
+			sizeof(struct uc_css_header) + uc_fw->ucode_size);
+
+	/* Start the DMA */
+	xe_mmio_write32(gt, DMA_CTRL,
+			_MASKED_BIT_ENABLE(dma_flags | START_DMA));
+
+	/* Wait for DMA to finish */
+	ret = xe_mmio_wait32(gt, DMA_CTRL, START_DMA, 0, 100000, &dma_ctrl,
+			     false);
+	if (ret)
+		drm_err(&xe->drm, "DMA for %s fw failed, DMA_CTRL=%u\n",
+			xe_uc_fw_type_repr(uc_fw->type), dma_ctrl);
+
+	/* Disable the bits once DMA is over */
+	xe_mmio_write32(gt, DMA_CTRL, _MASKED_BIT_DISABLE(dma_flags));
+
+	return ret;
+}
+
+int xe_uc_fw_upload(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags)
+{
+	struct xe_device *xe = uc_fw_to_xe(uc_fw);
+	int err;
+
+	/* make sure the status was cleared the last time we reset the uc */
+	xe_assert(xe, !xe_uc_fw_is_loaded(uc_fw));
+
+	if (!xe_uc_fw_is_loadable(uc_fw))
+		return -ENOEXEC;
+
+	/* Call custom loader */
+	err = uc_fw_xfer(uc_fw, offset, dma_flags);
+	if (err)
+		goto fail;
+
+	xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_TRANSFERRED);
+	return 0;
+
+fail:
+	drm_err(&xe->drm, "Failed to load %s firmware %s (%d)\n",
+		xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+		err);
+	xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_LOAD_FAIL);
+	return err;
+}
+
+static const char *version_type_repr(enum xe_uc_fw_version_types type)
+{
+	switch (type) {
+	case XE_UC_FW_VER_RELEASE:
+		return "release";
+	case XE_UC_FW_VER_COMPATIBILITY:
+		return "compatibility";
+	default:
+		return "Unknown version type";
+	}
+}
+
+void xe_uc_fw_print(struct xe_uc_fw *uc_fw, struct drm_printer *p)
+{
+	int i;
+
+	drm_printf(p, "%s firmware: %s\n",
+		   xe_uc_fw_type_repr(uc_fw->type), uc_fw->path);
+	drm_printf(p, "\tstatus: %s\n",
+		   xe_uc_fw_status_repr(uc_fw->status));
+
+	print_uc_fw_version(p, &uc_fw->versions.wanted, "\twanted %s",
+			    version_type_repr(uc_fw->versions.wanted_type));
+
+	for (i = 0; i < XE_UC_FW_VER_TYPE_COUNT; i++) {
+		struct xe_uc_fw_version *ver = &uc_fw->versions.found[i];
+
+		if (ver->major)
+			print_uc_fw_version(p, ver, "\tfound %s",
+					    version_type_repr(i));
+	}
+
+	if (uc_fw->ucode_size)
+		drm_printf(p, "\tuCode: %u bytes\n", uc_fw->ucode_size);
+	if (uc_fw->rsa_size)
+		drm_printf(p, "\tRSA: %u bytes\n", uc_fw->rsa_size);
+}
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.h b/drivers/gpu/drm/xe/xe_uc_fw.h
new file mode 100644
index 000000000000..85c20795d1f8
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc_fw.h
@@ -0,0 +1,184 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_UC_FW_H_
+#define _XE_UC_FW_H_
+
+#include <linux/errno.h>
+
+#include "xe_macros.h"
+#include "xe_uc_fw_abi.h"
+#include "xe_uc_fw_types.h"
+
+struct drm_printer;
+
+int xe_uc_fw_init(struct xe_uc_fw *uc_fw);
+size_t xe_uc_fw_copy_rsa(struct xe_uc_fw *uc_fw, void *dst, u32 max_len);
+int xe_uc_fw_upload(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags);
+int xe_uc_fw_check_version_requirements(struct xe_uc_fw *uc_fw);
+void xe_uc_fw_print(struct xe_uc_fw *uc_fw, struct drm_printer *p);
+
+static inline u32 xe_uc_fw_rsa_offset(struct xe_uc_fw *uc_fw)
+{
+	return sizeof(struct uc_css_header) + uc_fw->ucode_size + uc_fw->css_offset;
+}
+
+static inline void xe_uc_fw_change_status(struct xe_uc_fw *uc_fw,
+					  enum xe_uc_fw_status status)
+{
+	uc_fw->__status = status;
+}
+
+static inline
+const char *xe_uc_fw_status_repr(enum xe_uc_fw_status status)
+{
+	switch (status) {
+	case XE_UC_FIRMWARE_NOT_SUPPORTED:
+		return "N/A";
+	case XE_UC_FIRMWARE_UNINITIALIZED:
+		return "UNINITIALIZED";
+	case XE_UC_FIRMWARE_DISABLED:
+		return "DISABLED";
+	case XE_UC_FIRMWARE_SELECTED:
+		return "SELECTED";
+	case XE_UC_FIRMWARE_MISSING:
+		return "MISSING";
+	case XE_UC_FIRMWARE_ERROR:
+		return "ERROR";
+	case XE_UC_FIRMWARE_AVAILABLE:
+		return "AVAILABLE";
+	case XE_UC_FIRMWARE_INIT_FAIL:
+		return "INIT FAIL";
+	case XE_UC_FIRMWARE_LOADABLE:
+		return "LOADABLE";
+	case XE_UC_FIRMWARE_LOAD_FAIL:
+		return "LOAD FAIL";
+	case XE_UC_FIRMWARE_TRANSFERRED:
+		return "TRANSFERRED";
+	case XE_UC_FIRMWARE_RUNNING:
+		return "RUNNING";
+	}
+	return "<invalid>";
+}
+
+static inline int xe_uc_fw_status_to_error(enum xe_uc_fw_status status)
+{
+	switch (status) {
+	case XE_UC_FIRMWARE_NOT_SUPPORTED:
+		return -ENODEV;
+	case XE_UC_FIRMWARE_UNINITIALIZED:
+		return -EACCES;
+	case XE_UC_FIRMWARE_DISABLED:
+		return -EPERM;
+	case XE_UC_FIRMWARE_MISSING:
+		return -ENOENT;
+	case XE_UC_FIRMWARE_ERROR:
+		return -ENOEXEC;
+	case XE_UC_FIRMWARE_INIT_FAIL:
+	case XE_UC_FIRMWARE_LOAD_FAIL:
+		return -EIO;
+	case XE_UC_FIRMWARE_SELECTED:
+		return -ESTALE;
+	case XE_UC_FIRMWARE_AVAILABLE:
+	case XE_UC_FIRMWARE_LOADABLE:
+	case XE_UC_FIRMWARE_TRANSFERRED:
+	case XE_UC_FIRMWARE_RUNNING:
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static inline const char *xe_uc_fw_type_repr(enum xe_uc_fw_type type)
+{
+	switch (type) {
+	case XE_UC_FW_TYPE_GUC:
+		return "GuC";
+	case XE_UC_FW_TYPE_HUC:
+		return "HuC";
+	case XE_UC_FW_TYPE_GSC:
+		return "GSC";
+	default:
+		return "uC";
+	}
+}
+
+static inline enum xe_uc_fw_status
+__xe_uc_fw_status(struct xe_uc_fw *uc_fw)
+{
+	/* shouldn't call this before checking hw/blob availability */
+	XE_WARN_ON(uc_fw->status == XE_UC_FIRMWARE_UNINITIALIZED);
+	return uc_fw->status;
+}
+
+static inline bool xe_uc_fw_is_supported(struct xe_uc_fw *uc_fw)
+{
+	return __xe_uc_fw_status(uc_fw) != XE_UC_FIRMWARE_NOT_SUPPORTED;
+}
+
+static inline bool xe_uc_fw_is_enabled(struct xe_uc_fw *uc_fw)
+{
+	return __xe_uc_fw_status(uc_fw) > XE_UC_FIRMWARE_DISABLED;
+}
+
+static inline bool xe_uc_fw_is_disabled(struct xe_uc_fw *uc_fw)
+{
+	return __xe_uc_fw_status(uc_fw) == XE_UC_FIRMWARE_DISABLED;
+}
+
+static inline bool xe_uc_fw_is_available(struct xe_uc_fw *uc_fw)
+{
+	return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_AVAILABLE;
+}
+
+static inline bool xe_uc_fw_is_loadable(struct xe_uc_fw *uc_fw)
+{
+	return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_LOADABLE;
+}
+
+static inline bool xe_uc_fw_is_loaded(struct xe_uc_fw *uc_fw)
+{
+	return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_TRANSFERRED;
+}
+
+static inline bool xe_uc_fw_is_running(struct xe_uc_fw *uc_fw)
+{
+	return __xe_uc_fw_status(uc_fw) == XE_UC_FIRMWARE_RUNNING;
+}
+
+static inline bool xe_uc_fw_is_overridden(const struct xe_uc_fw *uc_fw)
+{
+	return uc_fw->user_overridden;
+}
+
+static inline void xe_uc_fw_sanitize(struct xe_uc_fw *uc_fw)
+{
+	if (xe_uc_fw_is_loaded(uc_fw))
+		xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_LOADABLE);
+}
+
+static inline u32 __xe_uc_fw_get_upload_size(struct xe_uc_fw *uc_fw)
+{
+	return sizeof(struct uc_css_header) + uc_fw->ucode_size;
+}
+
+/**
+ * xe_uc_fw_get_upload_size() - Get size of firmware needed to be uploaded.
+ * @uc_fw: uC firmware.
+ *
+ * Get the size of the firmware and header that will be uploaded to WOPCM.
+ *
+ * Return: Upload firmware size, or zero on firmware fetch failure.
+ */
+static inline u32 xe_uc_fw_get_upload_size(struct xe_uc_fw *uc_fw)
+{
+	if (!xe_uc_fw_is_available(uc_fw))
+		return 0;
+
+	return __xe_uc_fw_get_upload_size(uc_fw);
+}
+
+#define XE_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git"
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_uc_fw_abi.h b/drivers/gpu/drm/xe/xe_uc_fw_abi.h
new file mode 100644
index 000000000000..87ade41209d0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc_fw_abi.h
@@ -0,0 +1,321 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_UC_FW_ABI_H
+#define _XE_UC_FW_ABI_H
+
+#include <linux/build_bug.h>
+#include <linux/types.h>
+
+/**
+ * DOC: CSS-based Firmware Layout
+ *
+ * The CSS-based firmware structure is used for GuC releases on all platforms
+ * and for HuC releases up to DG1. Starting from DG2/MTL the HuC uses the GSC
+ * layout instead.
+ * The CSS firmware layout looks like this::
+ *
+ *      +======================================================================+
+ *      |  Firmware blob                                                       |
+ *      +===============+===============+============+============+============+
+ *      |  CSS header   |     uCode     |  RSA key   |  modulus   |  exponent  |
+ *      +===============+===============+============+============+============+
+ *       <-header size->                 <---header size continued ----------->
+ *       <--- size ----------------------------------------------------------->
+ *                                       <-key size->
+ *                                                    <-mod size->
+ *                                                                 <-exp size->
+ *
+ * The firmware may or may not have modulus key and exponent data. The header,
+ * uCode and RSA signature are must-have components that will be used by driver.
+ * Length of each components, which is all in dwords, can be found in header.
+ * In the case that modulus and exponent are not present in fw, a.k.a truncated
+ * image, the length value still appears in header.
+ *
+ * Driver will do some basic fw size validation based on the following rules:
+ *
+ * 1. Header, uCode and RSA are must-have components.
+ * 2. All firmware components, if they present, are in the sequence illustrated
+ *    in the layout table above.
+ * 3. Length info of each component can be found in header, in dwords.
+ * 4. Modulus and exponent key are not required by driver. They may not appear
+ *    in fw. So driver will load a truncated firmware in this case.
+ */
+
+struct uc_css_header {
+	u32 module_type;
+	/*
+	 * header_size includes all non-uCode bits, including css_header, rsa
+	 * key, modulus key and exponent data.
+	 */
+	u32 header_size_dw;
+	u32 header_version;
+	u32 module_id;
+	u32 module_vendor;
+	u32 date;
+#define CSS_DATE_DAY			(0xFF << 0)
+#define CSS_DATE_MONTH			(0xFF << 8)
+#define CSS_DATE_YEAR			(0xFFFF << 16)
+	u32 size_dw; /* uCode plus header_size_dw */
+	u32 key_size_dw;
+	u32 modulus_size_dw;
+	u32 exponent_size_dw;
+	u32 time;
+#define CSS_TIME_HOUR			(0xFF << 0)
+#define CSS_DATE_MIN			(0xFF << 8)
+#define CSS_DATE_SEC			(0xFFFF << 16)
+	char username[8];
+	char buildnumber[12];
+	u32 sw_version;
+#define CSS_SW_VERSION_UC_MAJOR		(0xFF << 16)
+#define CSS_SW_VERSION_UC_MINOR		(0xFF << 8)
+#define CSS_SW_VERSION_UC_PATCH		(0xFF << 0)
+	union {
+		u32 submission_version; /* only applies to GuC */
+		u32 reserved2;
+	};
+	u32 reserved0[12];
+	union {
+		u32 private_data_size; /* only applies to GuC */
+		u32 reserved1;
+	};
+	u32 header_info;
+} __packed;
+static_assert(sizeof(struct uc_css_header) == 128);
+
+/**
+ * DOC: GSC-based Firmware Layout
+ *
+ * The GSC-based firmware structure is used for GSC releases on all platforms
+ * and for HuC releases starting from DG2/MTL. Older HuC releases use the
+ * CSS-based layout instead. Differently from the CSS headers, the GSC headers
+ * uses a directory + entries structure (i.e., there is array of addresses
+ * pointing to specific header extensions identified by a name). Although the
+ * header structures are the same, some of the entries are specific to GSC while
+ * others are specific to HuC. The manifest header entry, which includes basic
+ * information about the binary (like the version) is always present, but it is
+ * named differently based on the binary type.
+ *
+ * The HuC binary starts with a Code Partition Directory (CPD) header. The
+ * entries we're interested in for use in the driver are:
+ *
+ * 1. "HUCP.man": points to the manifest header for the HuC.
+ * 2. "huc_fw": points to the FW code. On platforms that support load via DMA
+ *    and 2-step HuC authentication (i.e. MTL+) this is a full CSS-based binary,
+ *    while if the GSC is the one doing the load (which only happens on DG2)
+ *    this section only contains the uCode.
+ *
+ * The GSC-based HuC firmware layout looks like this::
+ *
+ *	+================================================+
+ *	|  CPD Header                                    |
+ *	+================================================+
+ *	|  CPD entries[]                                 |
+ *	|      entry1                                    |
+ *	|      ...                                       |
+ *	|      entryX                                    |
+ *	|          "HUCP.man"                            |
+ *	|           ...                                  |
+ *	|           offset  >----------------------------|------o
+ *	|      ...                                       |      |
+ *	|      entryY                                    |      |
+ *	|          "huc_fw"                              |      |
+ *	|           ...                                  |      |
+ *	|           offset  >----------------------------|----------o
+ *	+================================================+      |   |
+ *	                                                        |   |
+ *	+================================================+      |   |
+ *	|  Manifest Header                               |<-----o   |
+ *	|      ...                                       |          |
+ *	|      FW version                                |          |
+ *	|      ...                                       |          |
+ *	+================================================+          |
+ *	                                                            |
+ *	+================================================+          |
+ *	|  FW binary                                     |<---------o
+ *	|      CSS (MTL+ only)                           |
+ *	|      uCode                                     |
+ *	|      RSA Key (MTL+ only)                       |
+ *	|      ...                                       |
+ *	+================================================+
+ *
+ * The GSC binary starts instead with a layout header, which contains the
+ * locations of the various partitions of the binary. The one we're interested
+ * in is the boot1 partition, where we can find a BPDT header followed by
+ * entries, one of which points to the RBE sub-section of the partition, which
+ * contains the CPD. The GSC blob does not contain a CSS-based binary, so we
+ * only need to look for the manifest, which is under the "RBEP.man" CPD entry.
+ * Note that we have no need to find where the actual FW code is inside the
+ * image because the GSC ROM will itself parse the headers to find it and load
+ * it.
+ * The GSC firmware header layout looks like this::
+ *
+ *	+================================================+
+ *	|  Layout Pointers                               |
+ *	|      ...                                       |
+ *	|      Boot1 offset  >---------------------------|------o
+ *	|      ...                                       |      |
+ *	+================================================+      |
+ *	                                                        |
+ *	+================================================+      |
+ *	|  BPDT header                                   |<-----o
+ *	+================================================+
+ *	|  BPDT entries[]                                |
+ *	|      entry1                                    |
+ *	|      ...                                       |
+ *	|      entryX                                    |
+ *	|          type == GSC_RBE                       |
+ *	|          offset  >-----------------------------|------o
+ *	|      ...                                       |      |
+ *	+================================================+      |
+ *	                                                        |
+ *	+================================================+      |
+ *	|  CPD Header                                    |<-----o
+ *	+================================================+
+ *	|  CPD entries[]                                 |
+ *	|      entry1                                    |
+ *	|      ...                                       |
+ *	|      entryX                                    |
+ *	|          "RBEP.man"                            |
+ *	|           ...                                  |
+ *	|           offset  >----------------------------|------o
+ *	|      ...                                       |      |
+ *	+================================================+      |
+ *	                                                        |
+ *	+================================================+      |
+ *	| Manifest Header                                |<-----o
+ *	|  ...                                           |
+ *	|  FW version                                    |
+ *	|  ...                                           |
+ *	|  Security version                              |
+ *	|  ...                                           |
+ *	+================================================+
+ */
+
+struct gsc_version {
+	u16 major;
+	u16 minor;
+	u16 hotfix;
+	u16 build;
+} __packed;
+
+struct gsc_partition {
+	u32 offset;
+	u32 size;
+} __packed;
+
+struct gsc_layout_pointers {
+	u8 rom_bypass_vector[16];
+
+	/* size of this header section, not including ROM bypass vector */
+	u16 size;
+
+	/*
+	 * bit0: Backup copy of layout pointers exists
+	 * bits1-15: reserved
+	 */
+	u8 flags;
+
+	u8 reserved;
+
+	u32 crc32;
+
+	struct gsc_partition datap;
+	struct gsc_partition boot1;
+	struct gsc_partition boot2;
+	struct gsc_partition boot3;
+	struct gsc_partition boot4;
+	struct gsc_partition boot5;
+	struct gsc_partition temp_pages;
+} __packed;
+
+/* Boot partition structures */
+struct gsc_bpdt_header {
+	u32 signature;
+#define GSC_BPDT_HEADER_SIGNATURE 0x000055AA
+
+	u16 descriptor_count; /* num of entries after the header */
+
+	u8 version;
+	u8 configuration;
+
+	u32 crc32;
+
+	u32 build_version;
+	struct gsc_version tool_version;
+} __packed;
+
+struct gsc_bpdt_entry {
+	/*
+	 * Bits 0-15: BPDT entry type
+	 * Bits 16-17: reserved
+	 * Bit 18: code sub-partition
+	 * Bits 19-31: reserved
+	 */
+	u32 type;
+#define GSC_BPDT_ENTRY_TYPE_MASK GENMASK(15, 0)
+#define GSC_BPDT_ENTRY_TYPE_GSC_RBE 0x1
+
+	u32 sub_partition_offset; /* from the base of the BPDT header */
+	u32 sub_partition_size;
+} __packed;
+
+/* Code partition directory (CPD) structures */
+struct gsc_cpd_header_v2 {
+	u32 header_marker;
+#define GSC_CPD_HEADER_MARKER 0x44504324
+
+	u32 num_of_entries;
+	u8 header_version;
+	u8 entry_version;
+	u8 header_length; /* in bytes */
+	u8 flags;
+	u32 partition_name;
+	u32 crc32;
+} __packed;
+
+struct gsc_cpd_entry {
+	u8 name[12];
+
+	/*
+	 * Bits 0-24: offset from the beginning of the code partition
+	 * Bit 25: huffman compressed
+	 * Bits 26-31: reserved
+	 */
+	u32 offset;
+#define GSC_CPD_ENTRY_OFFSET_MASK GENMASK(24, 0)
+#define GSC_CPD_ENTRY_HUFFMAN_COMP BIT(25)
+
+	/*
+	 * Module/Item length, in bytes. For Huffman-compressed modules, this
+	 * refers to the uncompressed size. For software-compressed modules,
+	 * this refers to the compressed size.
+	 */
+	u32 length;
+
+	u8 reserved[4];
+} __packed;
+
+struct gsc_manifest_header {
+	u32 header_type; /* 0x4 for manifest type */
+	u32 header_length; /* in dwords */
+	u32 header_version;
+	u32 flags;
+	u32 vendor;
+	u32 date;
+	u32 size; /* In dwords, size of entire manifest (header + extensions) */
+	u32 header_id;
+	u32 internal_data;
+	struct gsc_version fw_version;
+	u32 security_version;
+	struct gsc_version meu_kit_version;
+	u32 meu_manifest_version;
+	u8 general_data[4];
+	u8 reserved3[56];
+	u32 modulus_size; /* in dwords */
+	u32 exponent_size; /* in dwords */
+} __packed;
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h
new file mode 100644
index 000000000000..ee914a5d8523
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_UC_FW_TYPES_H_
+#define _XE_UC_FW_TYPES_H_
+
+#include <linux/types.h>
+
+struct xe_bo;
+
+/*
+ * +------------+---------------------------------------------------+
+ * |   PHASE    |           FIRMWARE STATUS TRANSITIONS             |
+ * +============+===================================================+
+ * |            |               UNINITIALIZED                       |
+ * +------------+-               /   |   \                         -+
+ * |            |   DISABLED <--/    |    \--> NOT_SUPPORTED        |
+ * | init_early |                    V                              |
+ * |            |                 SELECTED                          |
+ * +------------+-               /   |   \                         -+
+ * |            |    MISSING <--/    |    \--> ERROR                |
+ * |   fetch    |                    V                              |
+ * |            |                 AVAILABLE                         |
+ * +------------+-                   |   \                         -+
+ * |            |                    |    \--> INIT FAIL            |
+ * |   init     |                    V                              |
+ * |            |        /------> LOADABLE <----<-----------\       |
+ * +------------+-       \         /    \        \           \     -+
+ * |            |    LOAD FAIL <--<      \--> TRANSFERRED     \     |
+ * |   upload   |                  \           /   \          /     |
+ * |            |                   \---------/     \--> RUNNING    |
+ * +------------+---------------------------------------------------+
+ */
+
+/*
+ * FIXME: Ported from the i915 and this is state machine is way too complicated.
+ * Circle back and simplify this.
+ */
+enum xe_uc_fw_status {
+	XE_UC_FIRMWARE_NOT_SUPPORTED = -1, /* no uc HW */
+	XE_UC_FIRMWARE_UNINITIALIZED = 0, /* used to catch checks done too early */
+	XE_UC_FIRMWARE_DISABLED, /* disabled */
+	XE_UC_FIRMWARE_SELECTED, /* selected the blob we want to load */
+	XE_UC_FIRMWARE_MISSING, /* blob not found on the system */
+	XE_UC_FIRMWARE_ERROR, /* invalid format or version */
+	XE_UC_FIRMWARE_AVAILABLE, /* blob found and copied in mem */
+	XE_UC_FIRMWARE_INIT_FAIL, /* failed to prepare fw objects for load */
+	XE_UC_FIRMWARE_LOADABLE, /* all fw-required objects are ready */
+	XE_UC_FIRMWARE_LOAD_FAIL, /* failed to xfer or init/auth the fw */
+	XE_UC_FIRMWARE_TRANSFERRED, /* dma xfer done */
+	XE_UC_FIRMWARE_RUNNING /* init/auth done */
+};
+
+enum xe_uc_fw_type {
+	XE_UC_FW_TYPE_GUC = 0,
+	XE_UC_FW_TYPE_HUC,
+	XE_UC_FW_TYPE_GSC,
+	XE_UC_FW_NUM_TYPES
+};
+
+/**
+ * struct xe_uc_fw_version - Version for XE micro controller firmware
+ */
+struct xe_uc_fw_version {
+	/** @major: major version of the FW */
+	u16 major;
+	/** @minor: minor version of the FW */
+	u16 minor;
+	/** @patch: patch version of the FW */
+	u16 patch;
+	/** @build: build version of the FW (not always available) */
+	u16 build;
+};
+
+enum xe_uc_fw_version_types {
+	XE_UC_FW_VER_RELEASE,
+	XE_UC_FW_VER_COMPATIBILITY,
+	XE_UC_FW_VER_TYPE_COUNT
+};
+
+/**
+ * struct xe_uc_fw - XE micro controller firmware
+ */
+struct xe_uc_fw {
+	/** @type: type uC firmware */
+	enum xe_uc_fw_type type;
+	union {
+		/** @status: firmware load status */
+		const enum xe_uc_fw_status status;
+		/**
+		 * @__status: private firmware load status - only to be used
+		 * by firmware laoding code
+		 */
+		enum xe_uc_fw_status __status;
+	};
+	/** @path: path to uC firmware */
+	const char *path;
+	/** @user_overridden: user provided path to uC firmware via modparam */
+	bool user_overridden;
+	/**
+	 * @full_ver_required: driver still under development and not ready
+	 * for backward-compatible firmware. To be used only for **new**
+	 * platforms, i.e. still under require_force_probe protection and not
+	 * supported by i915.
+	 */
+	bool full_ver_required;
+	/** @size: size of uC firmware including css header */
+	size_t size;
+
+	/** @bo: XE BO for uC firmware */
+	struct xe_bo *bo;
+
+	/** @has_gsc_headers: whether the FW image starts with GSC headers */
+	bool has_gsc_headers;
+
+	/*
+	 * The firmware build process will generate a version header file with
+	 * major and minor version defined. The versions are built into CSS
+	 * header of firmware. The xe kernel driver set the minimal firmware
+	 * version required per platform.
+	 */
+
+	/** @versions: FW versions wanted and found */
+	struct {
+		/** @wanted: firmware version wanted by platform */
+		struct xe_uc_fw_version wanted;
+		/** @wanted_type: type of firmware version wanted (release vs compatibility) */
+		enum xe_uc_fw_version_types wanted_type;
+		/** @found: fw versions found in firmware blob */
+		struct xe_uc_fw_version found[XE_UC_FW_VER_TYPE_COUNT];
+	} versions;
+
+	/** @rsa_size: RSA size */
+	u32 rsa_size;
+	/** @ucode_size: micro kernel size */
+	u32 ucode_size;
+	/** @css_offset: offset within the blob at which the CSS is located */
+	u32 css_offset;
+
+	/** @private_data_size: size of private data found in uC css header */
+	u32 private_data_size;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_uc_types.h b/drivers/gpu/drm/xe/xe_uc_types.h
new file mode 100644
index 000000000000..9924e4484866
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc_types.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_UC_TYPES_H_
+#define _XE_UC_TYPES_H_
+
+#include "xe_gsc_types.h"
+#include "xe_guc_types.h"
+#include "xe_huc_types.h"
+#include "xe_wopcm_types.h"
+
+/**
+ * struct xe_uc - XE micro controllers
+ */
+struct xe_uc {
+	/** @guc: Graphics micro controller */
+	struct xe_guc guc;
+	/** @huc: HuC */
+	struct xe_huc huc;
+	/** @gsc: Graphics Security Controller */
+	struct xe_gsc gsc;
+	/** @wopcm: WOPCM */
+	struct xe_wopcm wopcm;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
new file mode 100644
index 000000000000..3b21afe5b488
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -0,0 +1,3279 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_vm.h"
+
+#include <linux/dma-fence-array.h>
+#include <linux/nospec.h>
+
+#include <drm/drm_exec.h>
+#include <drm/drm_print.h>
+#include <drm/ttm/ttm_execbuf_util.h>
+#include <drm/ttm/ttm_tt.h>
+#include <drm/xe_drm.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+
+#include "xe_assert.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_drm_client.h"
+#include "xe_exec_queue.h"
+#include "xe_gt.h"
+#include "xe_gt_pagefault.h"
+#include "xe_gt_tlb_invalidation.h"
+#include "xe_migrate.h"
+#include "xe_pat.h"
+#include "xe_pm.h"
+#include "xe_preempt_fence.h"
+#include "xe_pt.h"
+#include "xe_res_cursor.h"
+#include "xe_sync.h"
+#include "xe_trace.h"
+#include "generated/xe_wa_oob.h"
+#include "xe_wa.h"
+
+static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
+{
+	return vm->gpuvm.r_obj;
+}
+
+/**
+ * xe_vma_userptr_check_repin() - Advisory check for repin needed
+ * @uvma: The userptr vma
+ *
+ * Check if the userptr vma has been invalidated since last successful
+ * repin. The check is advisory only and can the function can be called
+ * without the vm->userptr.notifier_lock held. There is no guarantee that the
+ * vma userptr will remain valid after a lockless check, so typically
+ * the call needs to be followed by a proper check under the notifier_lock.
+ *
+ * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended.
+ */
+int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma)
+{
+	return mmu_interval_check_retry(&uvma->userptr.notifier,
+					uvma->userptr.notifier_seq) ?
+		-EAGAIN : 0;
+}
+
+int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
+{
+	struct xe_userptr *userptr = &uvma->userptr;
+	struct xe_vma *vma = &uvma->vma;
+	struct xe_vm *vm = xe_vma_vm(vma);
+	struct xe_device *xe = vm->xe;
+	const unsigned long num_pages = xe_vma_size(vma) >> PAGE_SHIFT;
+	struct page **pages;
+	bool in_kthread = !current->mm;
+	unsigned long notifier_seq;
+	int pinned, ret, i;
+	bool read_only = xe_vma_read_only(vma);
+
+	lockdep_assert_held(&vm->lock);
+	xe_assert(xe, xe_vma_is_userptr(vma));
+retry:
+	if (vma->gpuva.flags & XE_VMA_DESTROYED)
+		return 0;
+
+	notifier_seq = mmu_interval_read_begin(&userptr->notifier);
+	if (notifier_seq == userptr->notifier_seq)
+		return 0;
+
+	pages = kvmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL);
+	if (!pages)
+		return -ENOMEM;
+
+	if (userptr->sg) {
+		dma_unmap_sgtable(xe->drm.dev,
+				  userptr->sg,
+				  read_only ? DMA_TO_DEVICE :
+				  DMA_BIDIRECTIONAL, 0);
+		sg_free_table(userptr->sg);
+		userptr->sg = NULL;
+	}
+
+	pinned = ret = 0;
+	if (in_kthread) {
+		if (!mmget_not_zero(userptr->notifier.mm)) {
+			ret = -EFAULT;
+			goto mm_closed;
+		}
+		kthread_use_mm(userptr->notifier.mm);
+	}
+
+	while (pinned < num_pages) {
+		ret = get_user_pages_fast(xe_vma_userptr(vma) +
+					  pinned * PAGE_SIZE,
+					  num_pages - pinned,
+					  read_only ? 0 : FOLL_WRITE,
+					  &pages[pinned]);
+		if (ret < 0)
+			break;
+
+		pinned += ret;
+		ret = 0;
+	}
+
+	if (in_kthread) {
+		kthread_unuse_mm(userptr->notifier.mm);
+		mmput(userptr->notifier.mm);
+	}
+mm_closed:
+	if (ret)
+		goto out;
+
+	ret = sg_alloc_table_from_pages_segment(&userptr->sgt, pages,
+						pinned, 0,
+						(u64)pinned << PAGE_SHIFT,
+						xe_sg_segment_size(xe->drm.dev),
+						GFP_KERNEL);
+	if (ret) {
+		userptr->sg = NULL;
+		goto out;
+	}
+	userptr->sg = &userptr->sgt;
+
+	ret = dma_map_sgtable(xe->drm.dev, userptr->sg,
+			      read_only ? DMA_TO_DEVICE :
+			      DMA_BIDIRECTIONAL,
+			      DMA_ATTR_SKIP_CPU_SYNC |
+			      DMA_ATTR_NO_KERNEL_MAPPING);
+	if (ret) {
+		sg_free_table(userptr->sg);
+		userptr->sg = NULL;
+		goto out;
+	}
+
+	for (i = 0; i < pinned; ++i) {
+		if (!read_only) {
+			lock_page(pages[i]);
+			set_page_dirty(pages[i]);
+			unlock_page(pages[i]);
+		}
+
+		mark_page_accessed(pages[i]);
+	}
+
+out:
+	release_pages(pages, pinned);
+	kvfree(pages);
+
+	if (!(ret < 0)) {
+		userptr->notifier_seq = notifier_seq;
+		if (xe_vma_userptr_check_repin(uvma) == -EAGAIN)
+			goto retry;
+	}
+
+	return ret < 0 ? ret : 0;
+}
+
+static bool preempt_fences_waiting(struct xe_vm *vm)
+{
+	struct xe_exec_queue *q;
+
+	lockdep_assert_held(&vm->lock);
+	xe_vm_assert_held(vm);
+
+	list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
+		if (!q->compute.pfence ||
+		    (q->compute.pfence && test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+						   &q->compute.pfence->flags))) {
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static void free_preempt_fences(struct list_head *list)
+{
+	struct list_head *link, *next;
+
+	list_for_each_safe(link, next, list)
+		xe_preempt_fence_free(to_preempt_fence_from_link(link));
+}
+
+static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
+				unsigned int *count)
+{
+	lockdep_assert_held(&vm->lock);
+	xe_vm_assert_held(vm);
+
+	if (*count >= vm->preempt.num_exec_queues)
+		return 0;
+
+	for (; *count < vm->preempt.num_exec_queues; ++(*count)) {
+		struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
+
+		if (IS_ERR(pfence))
+			return PTR_ERR(pfence);
+
+		list_move_tail(xe_preempt_fence_link(pfence), list);
+	}
+
+	return 0;
+}
+
+static int wait_for_existing_preempt_fences(struct xe_vm *vm)
+{
+	struct xe_exec_queue *q;
+
+	xe_vm_assert_held(vm);
+
+	list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
+		if (q->compute.pfence) {
+			long timeout = dma_fence_wait(q->compute.pfence, false);
+
+			if (timeout < 0)
+				return -ETIME;
+			dma_fence_put(q->compute.pfence);
+			q->compute.pfence = NULL;
+		}
+	}
+
+	return 0;
+}
+
+static bool xe_vm_is_idle(struct xe_vm *vm)
+{
+	struct xe_exec_queue *q;
+
+	xe_vm_assert_held(vm);
+	list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
+		if (!xe_exec_queue_is_idle(q))
+			return false;
+	}
+
+	return true;
+}
+
+static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
+{
+	struct list_head *link;
+	struct xe_exec_queue *q;
+
+	list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
+		struct dma_fence *fence;
+
+		link = list->next;
+		xe_assert(vm->xe, link != list);
+
+		fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
+					     q, q->compute.context,
+					     ++q->compute.seqno);
+		dma_fence_put(q->compute.pfence);
+		q->compute.pfence = fence;
+	}
+}
+
+static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
+{
+	struct xe_exec_queue *q;
+	int err;
+
+	if (!vm->preempt.num_exec_queues)
+		return 0;
+
+	err = xe_bo_lock(bo, true);
+	if (err)
+		return err;
+
+	err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues);
+	if (err)
+		goto out_unlock;
+
+	list_for_each_entry(q, &vm->preempt.exec_queues, compute.link)
+		if (q->compute.pfence) {
+			dma_resv_add_fence(bo->ttm.base.resv,
+					   q->compute.pfence,
+					   DMA_RESV_USAGE_BOOKKEEP);
+		}
+
+out_unlock:
+	xe_bo_unlock(bo);
+	return err;
+}
+
+static void resume_and_reinstall_preempt_fences(struct xe_vm *vm,
+						struct drm_exec *exec)
+{
+	struct xe_exec_queue *q;
+
+	lockdep_assert_held(&vm->lock);
+	xe_vm_assert_held(vm);
+
+	list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
+		q->ops->resume(q);
+
+		drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->compute.pfence,
+					 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
+	}
+}
+
+int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
+{
+	struct drm_gpuvm_exec vm_exec = {
+		.vm = &vm->gpuvm,
+		.flags = DRM_EXEC_INTERRUPTIBLE_WAIT,
+		.num_fences = 1,
+	};
+	struct drm_exec *exec = &vm_exec.exec;
+	struct dma_fence *pfence;
+	int err;
+	bool wait;
+
+	xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
+
+	down_write(&vm->lock);
+	err = drm_gpuvm_exec_lock(&vm_exec);
+	if (err)
+		goto out_up_write;
+
+	pfence = xe_preempt_fence_create(q, q->compute.context,
+					 ++q->compute.seqno);
+	if (!pfence) {
+		err = -ENOMEM;
+		goto out_fini;
+	}
+
+	list_add(&q->compute.link, &vm->preempt.exec_queues);
+	++vm->preempt.num_exec_queues;
+	q->compute.pfence = pfence;
+
+	down_read(&vm->userptr.notifier_lock);
+
+	drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence,
+				 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
+
+	/*
+	 * Check to see if a preemption on VM is in flight or userptr
+	 * invalidation, if so trigger this preempt fence to sync state with
+	 * other preempt fences on the VM.
+	 */
+	wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
+	if (wait)
+		dma_fence_enable_sw_signaling(pfence);
+
+	up_read(&vm->userptr.notifier_lock);
+
+out_fini:
+	drm_exec_fini(exec);
+out_up_write:
+	up_write(&vm->lock);
+
+	return err;
+}
+
+/**
+ * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM
+ * @vm: The VM.
+ * @q: The exec_queue
+ */
+void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
+{
+	if (!xe_vm_in_preempt_fence_mode(vm))
+		return;
+
+	down_write(&vm->lock);
+	list_del(&q->compute.link);
+	--vm->preempt.num_exec_queues;
+	if (q->compute.pfence) {
+		dma_fence_enable_sw_signaling(q->compute.pfence);
+		dma_fence_put(q->compute.pfence);
+		q->compute.pfence = NULL;
+	}
+	up_write(&vm->lock);
+}
+
+/**
+ * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs
+ * that need repinning.
+ * @vm: The VM.
+ *
+ * This function checks for whether the VM has userptrs that need repinning,
+ * and provides a release-type barrier on the userptr.notifier_lock after
+ * checking.
+ *
+ * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are.
+ */
+int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
+{
+	lockdep_assert_held_read(&vm->userptr.notifier_lock);
+
+	return (list_empty(&vm->userptr.repin_list) &&
+		list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
+}
+
+#define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
+
+static void xe_vm_kill(struct xe_vm *vm)
+{
+	struct xe_exec_queue *q;
+
+	lockdep_assert_held(&vm->lock);
+
+	xe_vm_lock(vm, false);
+	vm->flags |= XE_VM_FLAG_BANNED;
+	trace_xe_vm_kill(vm);
+
+	list_for_each_entry(q, &vm->preempt.exec_queues, compute.link)
+		q->ops->kill(q);
+	xe_vm_unlock(vm);
+
+	/* TODO: Inform user the VM is banned */
+}
+
+/**
+ * xe_vm_validate_should_retry() - Whether to retry after a validate error.
+ * @exec: The drm_exec object used for locking before validation.
+ * @err: The error returned from ttm_bo_validate().
+ * @end: A ktime_t cookie that should be set to 0 before first use and
+ * that should be reused on subsequent calls.
+ *
+ * With multiple active VMs, under memory pressure, it is possible that
+ * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM.
+ * Until ttm properly handles locking in such scenarios, best thing the
+ * driver can do is retry with a timeout. Check if that is necessary, and
+ * if so unlock the drm_exec's objects while keeping the ticket to prepare
+ * for a rerun.
+ *
+ * Return: true if a retry after drm_exec_init() is recommended;
+ * false otherwise.
+ */
+bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end)
+{
+	ktime_t cur;
+
+	if (err != -ENOMEM)
+		return false;
+
+	cur = ktime_get();
+	*end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS);
+	if (!ktime_before(cur, *end))
+		return false;
+
+	msleep(20);
+	return true;
+}
+
+static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
+{
+	struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
+	struct drm_gpuva *gpuva;
+	int ret;
+
+	lockdep_assert_held(&vm->lock);
+	drm_gpuvm_bo_for_each_va(gpuva, vm_bo)
+		list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind,
+			       &vm->rebind_list);
+
+	ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false);
+	if (ret)
+		return ret;
+
+	vm_bo->evicted = false;
+	return 0;
+}
+
+static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
+				 bool *done)
+{
+	int err;
+
+	/*
+	 * 1 fence for each preempt fence plus a fence for each tile from a
+	 * possible rebind
+	 */
+	err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, vm->preempt.num_exec_queues +
+				   vm->xe->info.tile_count);
+	if (err)
+		return err;
+
+	if (xe_vm_is_idle(vm)) {
+		vm->preempt.rebind_deactivated = true;
+		*done = true;
+		return 0;
+	}
+
+	if (!preempt_fences_waiting(vm)) {
+		*done = true;
+		return 0;
+	}
+
+	err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, vm->preempt.num_exec_queues);
+	if (err)
+		return err;
+
+	err = wait_for_existing_preempt_fences(vm);
+	if (err)
+		return err;
+
+	return drm_gpuvm_validate(&vm->gpuvm, exec);
+}
+
+static void preempt_rebind_work_func(struct work_struct *w)
+{
+	struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
+	struct drm_exec exec;
+	struct dma_fence *rebind_fence;
+	unsigned int fence_count = 0;
+	LIST_HEAD(preempt_fences);
+	ktime_t end = 0;
+	int err = 0;
+	long wait;
+	int __maybe_unused tries = 0;
+
+	xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
+	trace_xe_vm_rebind_worker_enter(vm);
+
+	down_write(&vm->lock);
+
+	if (xe_vm_is_closed_or_banned(vm)) {
+		up_write(&vm->lock);
+		trace_xe_vm_rebind_worker_exit(vm);
+		return;
+	}
+
+retry:
+	if (xe_vm_userptr_check_repin(vm)) {
+		err = xe_vm_userptr_pin(vm);
+		if (err)
+			goto out_unlock_outer;
+	}
+
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+
+	drm_exec_until_all_locked(&exec) {
+		bool done = false;
+
+		err = xe_preempt_work_begin(&exec, vm, &done);
+		drm_exec_retry_on_contention(&exec);
+		if (err || done) {
+			drm_exec_fini(&exec);
+			if (err && xe_vm_validate_should_retry(&exec, err, &end))
+				err = -EAGAIN;
+
+			goto out_unlock_outer;
+		}
+	}
+
+	err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
+	if (err)
+		goto out_unlock;
+
+	rebind_fence = xe_vm_rebind(vm, true);
+	if (IS_ERR(rebind_fence)) {
+		err = PTR_ERR(rebind_fence);
+		goto out_unlock;
+	}
+
+	if (rebind_fence) {
+		dma_fence_wait(rebind_fence, false);
+		dma_fence_put(rebind_fence);
+	}
+
+	/* Wait on munmap style VM unbinds */
+	wait = dma_resv_wait_timeout(xe_vm_resv(vm),
+				     DMA_RESV_USAGE_KERNEL,
+				     false, MAX_SCHEDULE_TIMEOUT);
+	if (wait <= 0) {
+		err = -ETIME;
+		goto out_unlock;
+	}
+
+#define retry_required(__tries, __vm) \
+	(IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
+	(!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
+	__xe_vm_userptr_needs_repin(__vm))
+
+	down_read(&vm->userptr.notifier_lock);
+	if (retry_required(tries, vm)) {
+		up_read(&vm->userptr.notifier_lock);
+		err = -EAGAIN;
+		goto out_unlock;
+	}
+
+#undef retry_required
+
+	spin_lock(&vm->xe->ttm.lru_lock);
+	ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
+	spin_unlock(&vm->xe->ttm.lru_lock);
+
+	/* Point of no return. */
+	arm_preempt_fences(vm, &preempt_fences);
+	resume_and_reinstall_preempt_fences(vm, &exec);
+	up_read(&vm->userptr.notifier_lock);
+
+out_unlock:
+	drm_exec_fini(&exec);
+out_unlock_outer:
+	if (err == -EAGAIN) {
+		trace_xe_vm_rebind_worker_retry(vm);
+		goto retry;
+	}
+
+	if (err) {
+		drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
+		xe_vm_kill(vm);
+	}
+	up_write(&vm->lock);
+
+	free_preempt_fences(&preempt_fences);
+
+	trace_xe_vm_rebind_worker_exit(vm);
+}
+
+static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
+				   const struct mmu_notifier_range *range,
+				   unsigned long cur_seq)
+{
+	struct xe_userptr *userptr = container_of(mni, typeof(*userptr), notifier);
+	struct xe_userptr_vma *uvma = container_of(userptr, typeof(*uvma), userptr);
+	struct xe_vma *vma = &uvma->vma;
+	struct xe_vm *vm = xe_vma_vm(vma);
+	struct dma_resv_iter cursor;
+	struct dma_fence *fence;
+	long err;
+
+	xe_assert(vm->xe, xe_vma_is_userptr(vma));
+	trace_xe_vma_userptr_invalidate(vma);
+
+	if (!mmu_notifier_range_blockable(range))
+		return false;
+
+	down_write(&vm->userptr.notifier_lock);
+	mmu_interval_set_seq(mni, cur_seq);
+
+	/* No need to stop gpu access if the userptr is not yet bound. */
+	if (!userptr->initial_bind) {
+		up_write(&vm->userptr.notifier_lock);
+		return true;
+	}
+
+	/*
+	 * Tell exec and rebind worker they need to repin and rebind this
+	 * userptr.
+	 */
+	if (!xe_vm_in_fault_mode(vm) &&
+	    !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->tile_present) {
+		spin_lock(&vm->userptr.invalidated_lock);
+		list_move_tail(&userptr->invalidate_link,
+			       &vm->userptr.invalidated);
+		spin_unlock(&vm->userptr.invalidated_lock);
+	}
+
+	up_write(&vm->userptr.notifier_lock);
+
+	/*
+	 * Preempt fences turn into schedule disables, pipeline these.
+	 * Note that even in fault mode, we need to wait for binds and
+	 * unbinds to complete, and those are attached as BOOKMARK fences
+	 * to the vm.
+	 */
+	dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
+			    DMA_RESV_USAGE_BOOKKEEP);
+	dma_resv_for_each_fence_unlocked(&cursor, fence)
+		dma_fence_enable_sw_signaling(fence);
+	dma_resv_iter_end(&cursor);
+
+	err = dma_resv_wait_timeout(xe_vm_resv(vm),
+				    DMA_RESV_USAGE_BOOKKEEP,
+				    false, MAX_SCHEDULE_TIMEOUT);
+	XE_WARN_ON(err <= 0);
+
+	if (xe_vm_in_fault_mode(vm)) {
+		err = xe_vm_invalidate_vma(vma);
+		XE_WARN_ON(err);
+	}
+
+	trace_xe_vma_userptr_invalidate_complete(vma);
+
+	return true;
+}
+
+static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
+	.invalidate = vma_userptr_invalidate,
+};
+
+int xe_vm_userptr_pin(struct xe_vm *vm)
+{
+	struct xe_userptr_vma *uvma, *next;
+	int err = 0;
+	LIST_HEAD(tmp_evict);
+
+	lockdep_assert_held_write(&vm->lock);
+
+	/* Collect invalidated userptrs */
+	spin_lock(&vm->userptr.invalidated_lock);
+	list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated,
+				 userptr.invalidate_link) {
+		list_del_init(&uvma->userptr.invalidate_link);
+		list_move_tail(&uvma->userptr.repin_link,
+			       &vm->userptr.repin_list);
+	}
+	spin_unlock(&vm->userptr.invalidated_lock);
+
+	/* Pin and move to temporary list */
+	list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
+				 userptr.repin_link) {
+		err = xe_vma_userptr_pin_pages(uvma);
+		if (err < 0)
+			return err;
+
+		list_del_init(&uvma->userptr.repin_link);
+		list_move_tail(&uvma->vma.combined_links.rebind, &vm->rebind_list);
+	}
+
+	return 0;
+}
+
+/**
+ * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs
+ * that need repinning.
+ * @vm: The VM.
+ *
+ * This function does an advisory check for whether the VM has userptrs that
+ * need repinning.
+ *
+ * Return: 0 if there are no indications of userptrs needing repinning,
+ * -EAGAIN if there are.
+ */
+int xe_vm_userptr_check_repin(struct xe_vm *vm)
+{
+	return (list_empty_careful(&vm->userptr.repin_list) &&
+		list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
+}
+
+static struct dma_fence *
+xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
+	       struct xe_sync_entry *syncs, u32 num_syncs,
+	       bool first_op, bool last_op);
+
+struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
+{
+	struct dma_fence *fence = NULL;
+	struct xe_vma *vma, *next;
+
+	lockdep_assert_held(&vm->lock);
+	if (xe_vm_in_lr_mode(vm) && !rebind_worker)
+		return NULL;
+
+	xe_vm_assert_held(vm);
+	list_for_each_entry_safe(vma, next, &vm->rebind_list,
+				 combined_links.rebind) {
+		xe_assert(vm->xe, vma->tile_present);
+
+		list_del_init(&vma->combined_links.rebind);
+		dma_fence_put(fence);
+		if (rebind_worker)
+			trace_xe_vma_rebind_worker(vma);
+		else
+			trace_xe_vma_rebind_exec(vma);
+		fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false);
+		if (IS_ERR(fence))
+			return fence;
+	}
+
+	return fence;
+}
+
+static void xe_vma_free(struct xe_vma *vma)
+{
+	if (xe_vma_is_userptr(vma))
+		kfree(to_userptr_vma(vma));
+	else
+		kfree(vma);
+}
+
+#define VMA_CREATE_FLAG_READ_ONLY	BIT(0)
+#define VMA_CREATE_FLAG_IS_NULL		BIT(1)
+
+static struct xe_vma *xe_vma_create(struct xe_vm *vm,
+				    struct xe_bo *bo,
+				    u64 bo_offset_or_userptr,
+				    u64 start, u64 end,
+				    u16 pat_index, unsigned int flags)
+{
+	struct xe_vma *vma;
+	struct xe_tile *tile;
+	u8 id;
+	bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY);
+	bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL);
+
+	xe_assert(vm->xe, start < end);
+	xe_assert(vm->xe, end < vm->size);
+
+	/*
+	 * Allocate and ensure that the xe_vma_is_userptr() return
+	 * matches what was allocated.
+	 */
+	if (!bo && !is_null) {
+		struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL);
+
+		if (!uvma)
+			return ERR_PTR(-ENOMEM);
+
+		vma = &uvma->vma;
+	} else {
+		vma = kzalloc(sizeof(*vma), GFP_KERNEL);
+		if (!vma)
+			return ERR_PTR(-ENOMEM);
+
+		if (is_null)
+			vma->gpuva.flags |= DRM_GPUVA_SPARSE;
+		if (bo)
+			vma->gpuva.gem.obj = &bo->ttm.base;
+	}
+
+	INIT_LIST_HEAD(&vma->combined_links.rebind);
+
+	INIT_LIST_HEAD(&vma->gpuva.gem.entry);
+	vma->gpuva.vm = &vm->gpuvm;
+	vma->gpuva.va.addr = start;
+	vma->gpuva.va.range = end - start + 1;
+	if (read_only)
+		vma->gpuva.flags |= XE_VMA_READ_ONLY;
+
+	for_each_tile(tile, vm->xe, id)
+		vma->tile_mask |= 0x1 << id;
+
+	if (GRAPHICS_VER(vm->xe) >= 20 || vm->xe->info.platform == XE_PVC)
+		vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
+
+	vma->pat_index = pat_index;
+
+	if (bo) {
+		struct drm_gpuvm_bo *vm_bo;
+
+		xe_bo_assert_held(bo);
+
+		vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base);
+		if (IS_ERR(vm_bo)) {
+			xe_vma_free(vma);
+			return ERR_CAST(vm_bo);
+		}
+
+		drm_gpuvm_bo_extobj_add(vm_bo);
+		drm_gem_object_get(&bo->ttm.base);
+		vma->gpuva.gem.offset = bo_offset_or_userptr;
+		drm_gpuva_link(&vma->gpuva, vm_bo);
+		drm_gpuvm_bo_put(vm_bo);
+	} else /* userptr or null */ {
+		if (!is_null) {
+			struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr;
+			u64 size = end - start + 1;
+			int err;
+
+			INIT_LIST_HEAD(&userptr->invalidate_link);
+			INIT_LIST_HEAD(&userptr->repin_link);
+			vma->gpuva.gem.offset = bo_offset_or_userptr;
+
+			err = mmu_interval_notifier_insert(&userptr->notifier,
+							   current->mm,
+							   xe_vma_userptr(vma), size,
+							   &vma_userptr_notifier_ops);
+			if (err) {
+				xe_vma_free(vma);
+				return ERR_PTR(err);
+			}
+
+			userptr->notifier_seq = LONG_MAX;
+		}
+
+		xe_vm_get(vm);
+	}
+
+	return vma;
+}
+
+static void xe_vma_destroy_late(struct xe_vma *vma)
+{
+	struct xe_vm *vm = xe_vma_vm(vma);
+	struct xe_device *xe = vm->xe;
+	bool read_only = xe_vma_read_only(vma);
+
+	if (vma->ufence) {
+		xe_sync_ufence_put(vma->ufence);
+		vma->ufence = NULL;
+	}
+
+	if (xe_vma_is_userptr(vma)) {
+		struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr;
+
+		if (userptr->sg) {
+			dma_unmap_sgtable(xe->drm.dev,
+					  userptr->sg,
+					  read_only ? DMA_TO_DEVICE :
+					  DMA_BIDIRECTIONAL, 0);
+			sg_free_table(userptr->sg);
+			userptr->sg = NULL;
+		}
+
+		/*
+		 * Since userptr pages are not pinned, we can't remove
+		 * the notifer until we're sure the GPU is not accessing
+		 * them anymore
+		 */
+		mmu_interval_notifier_remove(&userptr->notifier);
+		xe_vm_put(vm);
+	} else if (xe_vma_is_null(vma)) {
+		xe_vm_put(vm);
+	} else {
+		xe_bo_put(xe_vma_bo(vma));
+	}
+
+	xe_vma_free(vma);
+}
+
+static void vma_destroy_work_func(struct work_struct *w)
+{
+	struct xe_vma *vma =
+		container_of(w, struct xe_vma, destroy_work);
+
+	xe_vma_destroy_late(vma);
+}
+
+static void vma_destroy_cb(struct dma_fence *fence,
+			   struct dma_fence_cb *cb)
+{
+	struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb);
+
+	INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
+	queue_work(system_unbound_wq, &vma->destroy_work);
+}
+
+static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
+{
+	struct xe_vm *vm = xe_vma_vm(vma);
+
+	lockdep_assert_held_write(&vm->lock);
+	xe_assert(vm->xe, list_empty(&vma->combined_links.destroy));
+
+	if (xe_vma_is_userptr(vma)) {
+		xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
+
+		spin_lock(&vm->userptr.invalidated_lock);
+		list_del(&to_userptr_vma(vma)->userptr.invalidate_link);
+		spin_unlock(&vm->userptr.invalidated_lock);
+	} else if (!xe_vma_is_null(vma)) {
+		xe_bo_assert_held(xe_vma_bo(vma));
+
+		drm_gpuva_unlink(&vma->gpuva);
+	}
+
+	xe_vm_assert_held(vm);
+	if (fence) {
+		int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
+						 vma_destroy_cb);
+
+		if (ret) {
+			XE_WARN_ON(ret != -ENOENT);
+			xe_vma_destroy_late(vma);
+		}
+	} else {
+		xe_vma_destroy_late(vma);
+	}
+}
+
+/**
+ * xe_vm_prepare_vma() - drm_exec utility to lock a vma
+ * @exec: The drm_exec object we're currently locking for.
+ * @vma: The vma for witch we want to lock the vm resv and any attached
+ * object's resv.
+ * @num_shared: The number of dma-fence slots to pre-allocate in the
+ * objects' reservation objects.
+ *
+ * Return: 0 on success, negative error code on error. In particular
+ * may return -EDEADLK on WW transaction contention and -EINTR if
+ * an interruptible wait is terminated by a signal.
+ */
+int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma,
+		      unsigned int num_shared)
+{
+	struct xe_vm *vm = xe_vma_vm(vma);
+	struct xe_bo *bo = xe_vma_bo(vma);
+	int err;
+
+	XE_WARN_ON(!vm);
+	if (num_shared)
+		err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared);
+	else
+		err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
+	if (!err && bo && !bo->vm) {
+		if (num_shared)
+			err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared);
+		else
+			err = drm_exec_lock_obj(exec, &bo->ttm.base);
+	}
+
+	return err;
+}
+
+static void xe_vma_destroy_unlocked(struct xe_vma *vma)
+{
+	struct drm_exec exec;
+	int err;
+
+	drm_exec_init(&exec, 0, 0);
+	drm_exec_until_all_locked(&exec) {
+		err = xe_vm_prepare_vma(&exec, vma, 0);
+		drm_exec_retry_on_contention(&exec);
+		if (XE_WARN_ON(err))
+			break;
+	}
+
+	xe_vma_destroy(vma, NULL);
+
+	drm_exec_fini(&exec);
+}
+
+struct xe_vma *
+xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
+{
+	struct drm_gpuva *gpuva;
+
+	lockdep_assert_held(&vm->lock);
+
+	if (xe_vm_is_closed_or_banned(vm))
+		return NULL;
+
+	xe_assert(vm->xe, start + range <= vm->size);
+
+	gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range);
+
+	return gpuva ? gpuva_to_vma(gpuva) : NULL;
+}
+
+static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
+{
+	int err;
+
+	xe_assert(vm->xe, xe_vma_vm(vma) == vm);
+	lockdep_assert_held(&vm->lock);
+
+	err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
+	XE_WARN_ON(err);	/* Shouldn't be possible */
+
+	return err;
+}
+
+static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
+{
+	xe_assert(vm->xe, xe_vma_vm(vma) == vm);
+	lockdep_assert_held(&vm->lock);
+
+	drm_gpuva_remove(&vma->gpuva);
+	if (vm->usm.last_fault_vma == vma)
+		vm->usm.last_fault_vma = NULL;
+}
+
+static struct drm_gpuva_op *xe_vm_op_alloc(void)
+{
+	struct xe_vma_op *op;
+
+	op = kzalloc(sizeof(*op), GFP_KERNEL);
+
+	if (unlikely(!op))
+		return NULL;
+
+	return &op->base;
+}
+
+static void xe_vm_free(struct drm_gpuvm *gpuvm);
+
+static struct drm_gpuvm_ops gpuvm_ops = {
+	.op_alloc = xe_vm_op_alloc,
+	.vm_bo_validate = xe_gpuvm_validate,
+	.vm_free = xe_vm_free,
+};
+
+static u64 pde_encode_pat_index(struct xe_device *xe, u16 pat_index)
+{
+	u64 pte = 0;
+
+	if (pat_index & BIT(0))
+		pte |= XE_PPGTT_PTE_PAT0;
+
+	if (pat_index & BIT(1))
+		pte |= XE_PPGTT_PTE_PAT1;
+
+	return pte;
+}
+
+static u64 pte_encode_pat_index(struct xe_device *xe, u16 pat_index,
+				u32 pt_level)
+{
+	u64 pte = 0;
+
+	if (pat_index & BIT(0))
+		pte |= XE_PPGTT_PTE_PAT0;
+
+	if (pat_index & BIT(1))
+		pte |= XE_PPGTT_PTE_PAT1;
+
+	if (pat_index & BIT(2)) {
+		if (pt_level)
+			pte |= XE_PPGTT_PDE_PDPE_PAT2;
+		else
+			pte |= XE_PPGTT_PTE_PAT2;
+	}
+
+	if (pat_index & BIT(3))
+		pte |= XELPG_PPGTT_PTE_PAT3;
+
+	if (pat_index & (BIT(4)))
+		pte |= XE2_PPGTT_PTE_PAT4;
+
+	return pte;
+}
+
+static u64 pte_encode_ps(u32 pt_level)
+{
+	XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL);
+
+	if (pt_level == 1)
+		return XE_PDE_PS_2M;
+	else if (pt_level == 2)
+		return XE_PDPE_PS_1G;
+
+	return 0;
+}
+
+static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset,
+			      const u16 pat_index)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+	u64 pde;
+
+	pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
+	pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
+	pde |= pde_encode_pat_index(xe, pat_index);
+
+	return pde;
+}
+
+static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
+			      u16 pat_index, u32 pt_level)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+	u64 pte;
+
+	pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
+	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
+	pte |= pte_encode_pat_index(xe, pat_index, pt_level);
+	pte |= pte_encode_ps(pt_level);
+
+	if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
+		pte |= XE_PPGTT_PTE_DM;
+
+	return pte;
+}
+
+static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
+			       u16 pat_index, u32 pt_level)
+{
+	struct xe_device *xe = xe_vma_vm(vma)->xe;
+
+	pte |= XE_PAGE_PRESENT;
+
+	if (likely(!xe_vma_read_only(vma)))
+		pte |= XE_PAGE_RW;
+
+	pte |= pte_encode_pat_index(xe, pat_index, pt_level);
+	pte |= pte_encode_ps(pt_level);
+
+	if (unlikely(xe_vma_is_null(vma)))
+		pte |= XE_PTE_NULL;
+
+	return pte;
+}
+
+static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
+				u16 pat_index,
+				u32 pt_level, bool devmem, u64 flags)
+{
+	u64 pte;
+
+	/* Avoid passing random bits directly as flags */
+	xe_assert(xe, !(flags & ~XE_PTE_PS64));
+
+	pte = addr;
+	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
+	pte |= pte_encode_pat_index(xe, pat_index, pt_level);
+	pte |= pte_encode_ps(pt_level);
+
+	if (devmem)
+		pte |= XE_PPGTT_PTE_DM;
+
+	pte |= flags;
+
+	return pte;
+}
+
+static const struct xe_pt_ops xelp_pt_ops = {
+	.pte_encode_bo = xelp_pte_encode_bo,
+	.pte_encode_vma = xelp_pte_encode_vma,
+	.pte_encode_addr = xelp_pte_encode_addr,
+	.pde_encode_bo = xelp_pde_encode_bo,
+};
+
+static void vm_destroy_work_func(struct work_struct *w);
+
+/**
+ * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the
+ * given tile and vm.
+ * @xe: xe device.
+ * @tile: tile to set up for.
+ * @vm: vm to set up for.
+ *
+ * Sets up a pagetable tree with one page-table per level and a single
+ * leaf PTE. All pagetable entries point to the single page-table or,
+ * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and
+ * writes become NOPs.
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile,
+				struct xe_vm *vm)
+{
+	u8 id = tile->id;
+	int i;
+
+	for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) {
+		vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i);
+		if (IS_ERR(vm->scratch_pt[id][i]))
+			return PTR_ERR(vm->scratch_pt[id][i]);
+
+		xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]);
+	}
+
+	return 0;
+}
+
+static void xe_vm_free_scratch(struct xe_vm *vm)
+{
+	struct xe_tile *tile;
+	u8 id;
+
+	if (!xe_vm_has_scratch(vm))
+		return;
+
+	for_each_tile(tile, vm->xe, id) {
+		u32 i;
+
+		if (!vm->pt_root[id])
+			continue;
+
+		for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i)
+			if (vm->scratch_pt[id][i])
+				xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL);
+	}
+}
+
+struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
+{
+	struct drm_gem_object *vm_resv_obj;
+	struct xe_vm *vm;
+	int err, number_tiles = 0;
+	struct xe_tile *tile;
+	u8 id;
+
+	vm = kzalloc(sizeof(*vm), GFP_KERNEL);
+	if (!vm)
+		return ERR_PTR(-ENOMEM);
+
+	vm->xe = xe;
+
+	vm->size = 1ull << xe->info.va_bits;
+
+	vm->flags = flags;
+
+	init_rwsem(&vm->lock);
+
+	INIT_LIST_HEAD(&vm->rebind_list);
+
+	INIT_LIST_HEAD(&vm->userptr.repin_list);
+	INIT_LIST_HEAD(&vm->userptr.invalidated);
+	init_rwsem(&vm->userptr.notifier_lock);
+	spin_lock_init(&vm->userptr.invalidated_lock);
+
+	INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
+
+	INIT_LIST_HEAD(&vm->preempt.exec_queues);
+	vm->preempt.min_run_period_ms = 10;	/* FIXME: Wire up to uAPI */
+
+	for_each_tile(tile, xe, id)
+		xe_range_fence_tree_init(&vm->rftree[id]);
+
+	vm->pt_ops = &xelp_pt_ops;
+
+	if (!(flags & XE_VM_FLAG_MIGRATION))
+		xe_device_mem_access_get(xe);
+
+	vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
+	if (!vm_resv_obj) {
+		err = -ENOMEM;
+		goto err_no_resv;
+	}
+
+	drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm,
+		       vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops);
+
+	drm_gem_object_put(vm_resv_obj);
+
+	err = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
+	if (err)
+		goto err_close;
+
+	if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
+		vm->flags |= XE_VM_FLAG_64K;
+
+	for_each_tile(tile, xe, id) {
+		if (flags & XE_VM_FLAG_MIGRATION &&
+		    tile->id != XE_VM_FLAG_TILE_ID(flags))
+			continue;
+
+		vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level);
+		if (IS_ERR(vm->pt_root[id])) {
+			err = PTR_ERR(vm->pt_root[id]);
+			vm->pt_root[id] = NULL;
+			goto err_unlock_close;
+		}
+	}
+
+	if (xe_vm_has_scratch(vm)) {
+		for_each_tile(tile, xe, id) {
+			if (!vm->pt_root[id])
+				continue;
+
+			err = xe_vm_create_scratch(xe, tile, vm);
+			if (err)
+				goto err_unlock_close;
+		}
+		vm->batch_invalidate_tlb = true;
+	}
+
+	if (flags & XE_VM_FLAG_LR_MODE) {
+		INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
+		vm->flags |= XE_VM_FLAG_LR_MODE;
+		vm->batch_invalidate_tlb = false;
+	}
+
+	/* Fill pt_root after allocating scratch tables */
+	for_each_tile(tile, xe, id) {
+		if (!vm->pt_root[id])
+			continue;
+
+		xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
+	}
+	dma_resv_unlock(xe_vm_resv(vm));
+
+	/* Kernel migration VM shouldn't have a circular loop.. */
+	if (!(flags & XE_VM_FLAG_MIGRATION)) {
+		for_each_tile(tile, xe, id) {
+			struct xe_gt *gt = tile->primary_gt;
+			struct xe_vm *migrate_vm;
+			struct xe_exec_queue *q;
+			u32 create_flags = EXEC_QUEUE_FLAG_VM;
+
+			if (!vm->pt_root[id])
+				continue;
+
+			migrate_vm = xe_migrate_get_vm(tile->migrate);
+			q = xe_exec_queue_create_class(xe, gt, migrate_vm,
+						       XE_ENGINE_CLASS_COPY,
+						       create_flags);
+			xe_vm_put(migrate_vm);
+			if (IS_ERR(q)) {
+				err = PTR_ERR(q);
+				goto err_close;
+			}
+			vm->q[id] = q;
+			number_tiles++;
+		}
+	}
+
+	if (number_tiles > 1)
+		vm->composite_fence_ctx = dma_fence_context_alloc(1);
+
+	mutex_lock(&xe->usm.lock);
+	if (flags & XE_VM_FLAG_FAULT_MODE)
+		xe->usm.num_vm_in_fault_mode++;
+	else if (!(flags & XE_VM_FLAG_MIGRATION))
+		xe->usm.num_vm_in_non_fault_mode++;
+	mutex_unlock(&xe->usm.lock);
+
+	trace_xe_vm_create(vm);
+
+	return vm;
+
+err_unlock_close:
+	dma_resv_unlock(xe_vm_resv(vm));
+err_close:
+	xe_vm_close_and_put(vm);
+	return ERR_PTR(err);
+
+err_no_resv:
+	for_each_tile(tile, xe, id)
+		xe_range_fence_tree_fini(&vm->rftree[id]);
+	kfree(vm);
+	if (!(flags & XE_VM_FLAG_MIGRATION))
+		xe_device_mem_access_put(xe);
+	return ERR_PTR(err);
+}
+
+static void xe_vm_close(struct xe_vm *vm)
+{
+	down_write(&vm->lock);
+	vm->size = 0;
+	up_write(&vm->lock);
+}
+
+void xe_vm_close_and_put(struct xe_vm *vm)
+{
+	LIST_HEAD(contested);
+	struct xe_device *xe = vm->xe;
+	struct xe_tile *tile;
+	struct xe_vma *vma, *next_vma;
+	struct drm_gpuva *gpuva, *next;
+	u8 id;
+
+	xe_assert(xe, !vm->preempt.num_exec_queues);
+
+	xe_vm_close(vm);
+	if (xe_vm_in_preempt_fence_mode(vm))
+		flush_work(&vm->preempt.rebind_work);
+
+	down_write(&vm->lock);
+	for_each_tile(tile, xe, id) {
+		if (vm->q[id])
+			xe_exec_queue_last_fence_put(vm->q[id], vm);
+	}
+	up_write(&vm->lock);
+
+	for_each_tile(tile, xe, id) {
+		if (vm->q[id]) {
+			xe_exec_queue_kill(vm->q[id]);
+			xe_exec_queue_put(vm->q[id]);
+			vm->q[id] = NULL;
+		}
+	}
+
+	down_write(&vm->lock);
+	xe_vm_lock(vm, false);
+	drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
+		vma = gpuva_to_vma(gpuva);
+
+		if (xe_vma_has_no_bo(vma)) {
+			down_read(&vm->userptr.notifier_lock);
+			vma->gpuva.flags |= XE_VMA_DESTROYED;
+			up_read(&vm->userptr.notifier_lock);
+		}
+
+		xe_vm_remove_vma(vm, vma);
+
+		/* easy case, remove from VMA? */
+		if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
+			list_del_init(&vma->combined_links.rebind);
+			xe_vma_destroy(vma, NULL);
+			continue;
+		}
+
+		list_move_tail(&vma->combined_links.destroy, &contested);
+		vma->gpuva.flags |= XE_VMA_DESTROYED;
+	}
+
+	/*
+	 * All vm operations will add shared fences to resv.
+	 * The only exception is eviction for a shared object,
+	 * but even so, the unbind when evicted would still
+	 * install a fence to resv. Hence it's safe to
+	 * destroy the pagetables immediately.
+	 */
+	xe_vm_free_scratch(vm);
+
+	for_each_tile(tile, xe, id) {
+		if (vm->pt_root[id]) {
+			xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
+			vm->pt_root[id] = NULL;
+		}
+	}
+	xe_vm_unlock(vm);
+
+	/*
+	 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
+	 * Since we hold a refcount to the bo, we can remove and free
+	 * the members safely without locking.
+	 */
+	list_for_each_entry_safe(vma, next_vma, &contested,
+				 combined_links.destroy) {
+		list_del_init(&vma->combined_links.destroy);
+		xe_vma_destroy_unlocked(vma);
+	}
+
+	up_write(&vm->lock);
+
+	mutex_lock(&xe->usm.lock);
+	if (vm->flags & XE_VM_FLAG_FAULT_MODE)
+		xe->usm.num_vm_in_fault_mode--;
+	else if (!(vm->flags & XE_VM_FLAG_MIGRATION))
+		xe->usm.num_vm_in_non_fault_mode--;
+	mutex_unlock(&xe->usm.lock);
+
+	for_each_tile(tile, xe, id)
+		xe_range_fence_tree_fini(&vm->rftree[id]);
+
+	xe_vm_put(vm);
+}
+
+static void vm_destroy_work_func(struct work_struct *w)
+{
+	struct xe_vm *vm =
+		container_of(w, struct xe_vm, destroy_work);
+	struct xe_device *xe = vm->xe;
+	struct xe_tile *tile;
+	u8 id;
+	void *lookup;
+
+	/* xe_vm_close_and_put was not called? */
+	xe_assert(xe, !vm->size);
+
+	if (!(vm->flags & XE_VM_FLAG_MIGRATION)) {
+		xe_device_mem_access_put(xe);
+
+		if (xe->info.has_asid && vm->usm.asid) {
+			mutex_lock(&xe->usm.lock);
+			lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
+			xe_assert(xe, lookup == vm);
+			mutex_unlock(&xe->usm.lock);
+		}
+	}
+
+	for_each_tile(tile, xe, id)
+		XE_WARN_ON(vm->pt_root[id]);
+
+	trace_xe_vm_free(vm);
+	dma_fence_put(vm->rebind_fence);
+	kfree(vm);
+}
+
+static void xe_vm_free(struct drm_gpuvm *gpuvm)
+{
+	struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
+
+	/* To destroy the VM we need to be able to sleep */
+	queue_work(system_unbound_wq, &vm->destroy_work);
+}
+
+struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
+{
+	struct xe_vm *vm;
+
+	mutex_lock(&xef->vm.lock);
+	vm = xa_load(&xef->vm.xa, id);
+	if (vm)
+		xe_vm_get(vm);
+	mutex_unlock(&xef->vm.lock);
+
+	return vm;
+}
+
+u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
+{
+	return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0,
+					 tile_to_xe(tile)->pat.idx[XE_CACHE_WB]);
+}
+
+static struct xe_exec_queue *
+to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
+{
+	return q ? q : vm->q[0];
+}
+
+static struct dma_fence *
+xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
+		 struct xe_sync_entry *syncs, u32 num_syncs,
+		 bool first_op, bool last_op)
+{
+	struct xe_vm *vm = xe_vma_vm(vma);
+	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
+	struct xe_tile *tile;
+	struct dma_fence *fence = NULL;
+	struct dma_fence **fences = NULL;
+	struct dma_fence_array *cf = NULL;
+	int cur_fence = 0, i;
+	int number_tiles = hweight8(vma->tile_present);
+	int err;
+	u8 id;
+
+	trace_xe_vma_unbind(vma);
+
+	if (vma->ufence) {
+		struct xe_user_fence * const f = vma->ufence;
+
+		if (!xe_sync_ufence_get_status(f))
+			return ERR_PTR(-EBUSY);
+
+		vma->ufence = NULL;
+		xe_sync_ufence_put(f);
+	}
+
+	if (number_tiles > 1) {
+		fences = kmalloc_array(number_tiles, sizeof(*fences),
+				       GFP_KERNEL);
+		if (!fences)
+			return ERR_PTR(-ENOMEM);
+	}
+
+	for_each_tile(tile, vm->xe, id) {
+		if (!(vma->tile_present & BIT(id)))
+			goto next;
+
+		fence = __xe_pt_unbind_vma(tile, vma, q ? q : vm->q[id],
+					   first_op ? syncs : NULL,
+					   first_op ? num_syncs : 0);
+		if (IS_ERR(fence)) {
+			err = PTR_ERR(fence);
+			goto err_fences;
+		}
+
+		if (fences)
+			fences[cur_fence++] = fence;
+
+next:
+		if (q && vm->pt_root[id] && !list_empty(&q->multi_gt_list))
+			q = list_next_entry(q, multi_gt_list);
+	}
+
+	if (fences) {
+		cf = dma_fence_array_create(number_tiles, fences,
+					    vm->composite_fence_ctx,
+					    vm->composite_fence_seqno++,
+					    false);
+		if (!cf) {
+			--vm->composite_fence_seqno;
+			err = -ENOMEM;
+			goto err_fences;
+		}
+	}
+
+	fence = cf ? &cf->base : !fence ?
+		xe_exec_queue_last_fence_get(wait_exec_queue, vm) : fence;
+	if (last_op) {
+		for (i = 0; i < num_syncs; i++)
+			xe_sync_entry_signal(&syncs[i], NULL, fence);
+	}
+
+	return fence;
+
+err_fences:
+	if (fences) {
+		while (cur_fence)
+			dma_fence_put(fences[--cur_fence]);
+		kfree(fences);
+	}
+
+	return ERR_PTR(err);
+}
+
+static struct dma_fence *
+xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
+	       struct xe_sync_entry *syncs, u32 num_syncs,
+	       bool first_op, bool last_op)
+{
+	struct xe_tile *tile;
+	struct dma_fence *fence;
+	struct dma_fence **fences = NULL;
+	struct dma_fence_array *cf = NULL;
+	struct xe_vm *vm = xe_vma_vm(vma);
+	int cur_fence = 0, i;
+	int number_tiles = hweight8(vma->tile_mask);
+	int err;
+	u8 id;
+
+	trace_xe_vma_bind(vma);
+
+	if (number_tiles > 1) {
+		fences = kmalloc_array(number_tiles, sizeof(*fences),
+				       GFP_KERNEL);
+		if (!fences)
+			return ERR_PTR(-ENOMEM);
+	}
+
+	for_each_tile(tile, vm->xe, id) {
+		if (!(vma->tile_mask & BIT(id)))
+			goto next;
+
+		fence = __xe_pt_bind_vma(tile, vma, q ? q : vm->q[id],
+					 first_op ? syncs : NULL,
+					 first_op ? num_syncs : 0,
+					 vma->tile_present & BIT(id));
+		if (IS_ERR(fence)) {
+			err = PTR_ERR(fence);
+			goto err_fences;
+		}
+
+		if (fences)
+			fences[cur_fence++] = fence;
+
+next:
+		if (q && vm->pt_root[id] && !list_empty(&q->multi_gt_list))
+			q = list_next_entry(q, multi_gt_list);
+	}
+
+	if (fences) {
+		cf = dma_fence_array_create(number_tiles, fences,
+					    vm->composite_fence_ctx,
+					    vm->composite_fence_seqno++,
+					    false);
+		if (!cf) {
+			--vm->composite_fence_seqno;
+			err = -ENOMEM;
+			goto err_fences;
+		}
+	}
+
+	if (last_op) {
+		for (i = 0; i < num_syncs; i++)
+			xe_sync_entry_signal(&syncs[i], NULL,
+					     cf ? &cf->base : fence);
+	}
+
+	return cf ? &cf->base : fence;
+
+err_fences:
+	if (fences) {
+		while (cur_fence)
+			dma_fence_put(fences[--cur_fence]);
+		kfree(fences);
+	}
+
+	return ERR_PTR(err);
+}
+
+static struct xe_user_fence *
+find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
+{
+	unsigned int i;
+
+	for (i = 0; i < num_syncs; i++) {
+		struct xe_sync_entry *e = &syncs[i];
+
+		if (xe_sync_is_ufence(e))
+			return xe_sync_ufence_get(e);
+	}
+
+	return NULL;
+}
+
+static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
+			struct xe_exec_queue *q, struct xe_sync_entry *syncs,
+			u32 num_syncs, bool immediate, bool first_op,
+			bool last_op)
+{
+	struct dma_fence *fence;
+	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
+	struct xe_user_fence *ufence;
+
+	xe_vm_assert_held(vm);
+
+	ufence = find_ufence_get(syncs, num_syncs);
+	if (vma->ufence && ufence)
+		xe_sync_ufence_put(vma->ufence);
+
+	vma->ufence = ufence ?: vma->ufence;
+
+	if (immediate) {
+		fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, first_op,
+				       last_op);
+		if (IS_ERR(fence))
+			return PTR_ERR(fence);
+	} else {
+		int i;
+
+		xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
+
+		fence = xe_exec_queue_last_fence_get(wait_exec_queue, vm);
+		if (last_op) {
+			for (i = 0; i < num_syncs; i++)
+				xe_sync_entry_signal(&syncs[i], NULL, fence);
+		}
+	}
+
+	if (last_op)
+		xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
+	dma_fence_put(fence);
+
+	return 0;
+}
+
+static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q,
+		      struct xe_bo *bo, struct xe_sync_entry *syncs,
+		      u32 num_syncs, bool immediate, bool first_op,
+		      bool last_op)
+{
+	int err;
+
+	xe_vm_assert_held(vm);
+	xe_bo_assert_held(bo);
+
+	if (bo && immediate) {
+		err = xe_bo_validate(bo, vm, true);
+		if (err)
+			return err;
+	}
+
+	return __xe_vm_bind(vm, vma, q, syncs, num_syncs, immediate, first_op,
+			    last_op);
+}
+
+static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
+			struct xe_exec_queue *q, struct xe_sync_entry *syncs,
+			u32 num_syncs, bool first_op, bool last_op)
+{
+	struct dma_fence *fence;
+	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
+
+	xe_vm_assert_held(vm);
+	xe_bo_assert_held(xe_vma_bo(vma));
+
+	fence = xe_vm_unbind_vma(vma, q, syncs, num_syncs, first_op, last_op);
+	if (IS_ERR(fence))
+		return PTR_ERR(fence);
+
+	xe_vma_destroy(vma, fence);
+	if (last_op)
+		xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
+	dma_fence_put(fence);
+
+	return 0;
+}
+
+#define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
+				    DRM_XE_VM_CREATE_FLAG_LR_MODE | \
+				    DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
+
+int xe_vm_create_ioctl(struct drm_device *dev, void *data,
+		       struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_vm_create *args = data;
+	struct xe_tile *tile;
+	struct xe_vm *vm;
+	u32 id, asid;
+	int err;
+	u32 flags = 0;
+
+	if (XE_IOCTL_DBG(xe, args->extensions))
+		return -EINVAL;
+
+	if (XE_WA(xe_root_mmio_gt(xe), 14016763929))
+		args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
+
+	if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
+			 !xe->info.has_usm))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE &&
+			 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) &&
+			 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
+			 xe_device_in_non_fault_mode(xe)))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) &&
+			 xe_device_in_fault_mode(xe)))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->extensions))
+		return -EINVAL;
+
+	if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
+		flags |= XE_VM_FLAG_SCRATCH_PAGE;
+	if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
+		flags |= XE_VM_FLAG_LR_MODE;
+	if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
+		flags |= XE_VM_FLAG_FAULT_MODE;
+
+	vm = xe_vm_create(xe, flags);
+	if (IS_ERR(vm))
+		return PTR_ERR(vm);
+
+	mutex_lock(&xef->vm.lock);
+	err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
+	mutex_unlock(&xef->vm.lock);
+	if (err)
+		goto err_close_and_put;
+
+	if (xe->info.has_asid) {
+		mutex_lock(&xe->usm.lock);
+		err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
+				      XA_LIMIT(1, XE_MAX_ASID - 1),
+				      &xe->usm.next_asid, GFP_KERNEL);
+		mutex_unlock(&xe->usm.lock);
+		if (err < 0)
+			goto err_free_id;
+
+		vm->usm.asid = asid;
+	}
+
+	args->vm_id = id;
+	vm->xef = xef;
+
+	/* Record BO memory for VM pagetable created against client */
+	for_each_tile(tile, xe, id)
+		if (vm->pt_root[id])
+			xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo);
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
+	/* Warning: Security issue - never enable by default */
+	args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
+#endif
+
+	return 0;
+
+err_free_id:
+	mutex_lock(&xef->vm.lock);
+	xa_erase(&xef->vm.xa, id);
+	mutex_unlock(&xef->vm.lock);
+err_close_and_put:
+	xe_vm_close_and_put(vm);
+
+	return err;
+}
+
+int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_vm_destroy *args = data;
+	struct xe_vm *vm;
+	int err = 0;
+
+	if (XE_IOCTL_DBG(xe, args->pad) ||
+	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+		return -EINVAL;
+
+	mutex_lock(&xef->vm.lock);
+	vm = xa_load(&xef->vm.xa, args->vm_id);
+	if (XE_IOCTL_DBG(xe, !vm))
+		err = -ENOENT;
+	else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues))
+		err = -EBUSY;
+	else
+		xa_erase(&xef->vm.xa, args->vm_id);
+	mutex_unlock(&xef->vm.lock);
+
+	if (!err)
+		xe_vm_close_and_put(vm);
+
+	return err;
+}
+
+static const u32 region_to_mem_type[] = {
+	XE_PL_TT,
+	XE_PL_VRAM0,
+	XE_PL_VRAM1,
+};
+
+static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
+			  struct xe_exec_queue *q, u32 region,
+			  struct xe_sync_entry *syncs, u32 num_syncs,
+			  bool first_op, bool last_op)
+{
+	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
+	int err;
+
+	xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type));
+
+	if (!xe_vma_has_no_bo(vma)) {
+		err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region]);
+		if (err)
+			return err;
+	}
+
+	if (vma->tile_mask != (vma->tile_present & ~vma->usm.tile_invalidated)) {
+		return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs, num_syncs,
+				  true, first_op, last_op);
+	} else {
+		int i;
+
+		/* Nothing to do, signal fences now */
+		if (last_op) {
+			for (i = 0; i < num_syncs; i++) {
+				struct dma_fence *fence =
+					xe_exec_queue_last_fence_get(wait_exec_queue, vm);
+
+				xe_sync_entry_signal(&syncs[i], NULL, fence);
+				dma_fence_put(fence);
+			}
+		}
+
+		return 0;
+	}
+}
+
+static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
+			     bool post_commit)
+{
+	down_read(&vm->userptr.notifier_lock);
+	vma->gpuva.flags |= XE_VMA_DESTROYED;
+	up_read(&vm->userptr.notifier_lock);
+	if (post_commit)
+		xe_vm_remove_vma(vm, vma);
+}
+
+#undef ULL
+#define ULL	unsigned long long
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
+static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
+{
+	struct xe_vma *vma;
+
+	switch (op->op) {
+	case DRM_GPUVA_OP_MAP:
+		vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx",
+		       (ULL)op->map.va.addr, (ULL)op->map.va.range);
+		break;
+	case DRM_GPUVA_OP_REMAP:
+		vma = gpuva_to_vma(op->remap.unmap->va);
+		vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
+		       (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
+		       op->remap.unmap->keep ? 1 : 0);
+		if (op->remap.prev)
+			vm_dbg(&xe->drm,
+			       "REMAP:PREV: addr=0x%016llx, range=0x%016llx",
+			       (ULL)op->remap.prev->va.addr,
+			       (ULL)op->remap.prev->va.range);
+		if (op->remap.next)
+			vm_dbg(&xe->drm,
+			       "REMAP:NEXT: addr=0x%016llx, range=0x%016llx",
+			       (ULL)op->remap.next->va.addr,
+			       (ULL)op->remap.next->va.range);
+		break;
+	case DRM_GPUVA_OP_UNMAP:
+		vma = gpuva_to_vma(op->unmap.va);
+		vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
+		       (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
+		       op->unmap.keep ? 1 : 0);
+		break;
+	case DRM_GPUVA_OP_PREFETCH:
+		vma = gpuva_to_vma(op->prefetch.va);
+		vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx",
+		       (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma));
+		break;
+	default:
+		drm_warn(&xe->drm, "NOT POSSIBLE");
+	}
+}
+#else
+static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
+{
+}
+#endif
+
+/*
+ * Create operations list from IOCTL arguments, setup operations fields so parse
+ * and commit steps are decoupled from IOCTL arguments. This step can fail.
+ */
+static struct drm_gpuva_ops *
+vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
+			 u64 bo_offset_or_userptr, u64 addr, u64 range,
+			 u32 operation, u32 flags,
+			 u32 prefetch_region, u16 pat_index)
+{
+	struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
+	struct drm_gpuva_ops *ops;
+	struct drm_gpuva_op *__op;
+	struct drm_gpuvm_bo *vm_bo;
+	int err;
+
+	lockdep_assert_held_write(&vm->lock);
+
+	vm_dbg(&vm->xe->drm,
+	       "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx",
+	       operation, (ULL)addr, (ULL)range,
+	       (ULL)bo_offset_or_userptr);
+
+	switch (operation) {
+	case DRM_XE_VM_BIND_OP_MAP:
+	case DRM_XE_VM_BIND_OP_MAP_USERPTR:
+		ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range,
+						  obj, bo_offset_or_userptr);
+		break;
+	case DRM_XE_VM_BIND_OP_UNMAP:
+		ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
+		break;
+	case DRM_XE_VM_BIND_OP_PREFETCH:
+		ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range);
+		break;
+	case DRM_XE_VM_BIND_OP_UNMAP_ALL:
+		xe_assert(vm->xe, bo);
+
+		err = xe_bo_lock(bo, true);
+		if (err)
+			return ERR_PTR(err);
+
+		vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj);
+		if (IS_ERR(vm_bo)) {
+			xe_bo_unlock(bo);
+			return ERR_CAST(vm_bo);
+		}
+
+		ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
+		drm_gpuvm_bo_put(vm_bo);
+		xe_bo_unlock(bo);
+		break;
+	default:
+		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+		ops = ERR_PTR(-EINVAL);
+	}
+	if (IS_ERR(ops))
+		return ops;
+
+	drm_gpuva_for_each_op(__op, ops) {
+		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
+
+		if (__op->op == DRM_GPUVA_OP_MAP) {
+			op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
+			op->map.pat_index = pat_index;
+		} else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
+			op->prefetch.region = prefetch_region;
+		}
+
+		print_op(vm->xe, __op);
+	}
+
+	return ops;
+}
+
+static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
+			      u16 pat_index, unsigned int flags)
+{
+	struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
+	struct drm_exec exec;
+	struct xe_vma *vma;
+	int err;
+
+	lockdep_assert_held_write(&vm->lock);
+
+	if (bo) {
+		drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+		drm_exec_until_all_locked(&exec) {
+			err = 0;
+			if (!bo->vm) {
+				err = drm_exec_lock_obj(&exec, xe_vm_obj(vm));
+				drm_exec_retry_on_contention(&exec);
+			}
+			if (!err) {
+				err = drm_exec_lock_obj(&exec, &bo->ttm.base);
+				drm_exec_retry_on_contention(&exec);
+			}
+			if (err) {
+				drm_exec_fini(&exec);
+				return ERR_PTR(err);
+			}
+		}
+	}
+	vma = xe_vma_create(vm, bo, op->gem.offset,
+			    op->va.addr, op->va.addr +
+			    op->va.range - 1, pat_index, flags);
+	if (bo)
+		drm_exec_fini(&exec);
+
+	if (xe_vma_is_userptr(vma)) {
+		err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
+		if (err) {
+			prep_vma_destroy(vm, vma, false);
+			xe_vma_destroy_unlocked(vma);
+			return ERR_PTR(err);
+		}
+	} else if (!xe_vma_has_no_bo(vma) && !bo->vm) {
+		err = add_preempt_fences(vm, bo);
+		if (err) {
+			prep_vma_destroy(vm, vma, false);
+			xe_vma_destroy_unlocked(vma);
+			return ERR_PTR(err);
+		}
+	}
+
+	return vma;
+}
+
+static u64 xe_vma_max_pte_size(struct xe_vma *vma)
+{
+	if (vma->gpuva.flags & XE_VMA_PTE_1G)
+		return SZ_1G;
+	else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT))
+		return SZ_2M;
+	else if (vma->gpuva.flags & XE_VMA_PTE_64K)
+		return SZ_64K;
+	else if (vma->gpuva.flags & XE_VMA_PTE_4K)
+		return SZ_4K;
+
+	return SZ_1G;	/* Uninitialized, used max size */
+}
+
+static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
+{
+	switch (size) {
+	case SZ_1G:
+		vma->gpuva.flags |= XE_VMA_PTE_1G;
+		break;
+	case SZ_2M:
+		vma->gpuva.flags |= XE_VMA_PTE_2M;
+		break;
+	case SZ_64K:
+		vma->gpuva.flags |= XE_VMA_PTE_64K;
+		break;
+	case SZ_4K:
+		vma->gpuva.flags |= XE_VMA_PTE_4K;
+		break;
+	}
+}
+
+static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
+{
+	int err = 0;
+
+	lockdep_assert_held_write(&vm->lock);
+
+	switch (op->base.op) {
+	case DRM_GPUVA_OP_MAP:
+		err |= xe_vm_insert_vma(vm, op->map.vma);
+		if (!err)
+			op->flags |= XE_VMA_OP_COMMITTED;
+		break;
+	case DRM_GPUVA_OP_REMAP:
+	{
+		u8 tile_present =
+			gpuva_to_vma(op->base.remap.unmap->va)->tile_present;
+
+		prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
+				 true);
+		op->flags |= XE_VMA_OP_COMMITTED;
+
+		if (op->remap.prev) {
+			err |= xe_vm_insert_vma(vm, op->remap.prev);
+			if (!err)
+				op->flags |= XE_VMA_OP_PREV_COMMITTED;
+			if (!err && op->remap.skip_prev) {
+				op->remap.prev->tile_present =
+					tile_present;
+				op->remap.prev = NULL;
+			}
+		}
+		if (op->remap.next) {
+			err |= xe_vm_insert_vma(vm, op->remap.next);
+			if (!err)
+				op->flags |= XE_VMA_OP_NEXT_COMMITTED;
+			if (!err && op->remap.skip_next) {
+				op->remap.next->tile_present =
+					tile_present;
+				op->remap.next = NULL;
+			}
+		}
+
+		/* Adjust for partial unbind after removin VMA from VM */
+		if (!err) {
+			op->base.remap.unmap->va->va.addr = op->remap.start;
+			op->base.remap.unmap->va->va.range = op->remap.range;
+		}
+		break;
+	}
+	case DRM_GPUVA_OP_UNMAP:
+		prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
+		op->flags |= XE_VMA_OP_COMMITTED;
+		break;
+	case DRM_GPUVA_OP_PREFETCH:
+		op->flags |= XE_VMA_OP_COMMITTED;
+		break;
+	default:
+		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+	}
+
+	return err;
+}
+
+
+static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
+				   struct drm_gpuva_ops *ops,
+				   struct xe_sync_entry *syncs, u32 num_syncs,
+				   struct list_head *ops_list, bool last)
+{
+	struct xe_vma_op *last_op = NULL;
+	struct drm_gpuva_op *__op;
+	int err = 0;
+
+	lockdep_assert_held_write(&vm->lock);
+
+	drm_gpuva_for_each_op(__op, ops) {
+		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
+		struct xe_vma *vma;
+		bool first = list_empty(ops_list);
+		unsigned int flags = 0;
+
+		INIT_LIST_HEAD(&op->link);
+		list_add_tail(&op->link, ops_list);
+
+		if (first) {
+			op->flags |= XE_VMA_OP_FIRST;
+			op->num_syncs = num_syncs;
+			op->syncs = syncs;
+		}
+
+		op->q = q;
+
+		switch (op->base.op) {
+		case DRM_GPUVA_OP_MAP:
+		{
+			flags |= op->map.is_null ?
+				VMA_CREATE_FLAG_IS_NULL : 0;
+
+			vma = new_vma(vm, &op->base.map, op->map.pat_index,
+				      flags);
+			if (IS_ERR(vma))
+				return PTR_ERR(vma);
+
+			op->map.vma = vma;
+			break;
+		}
+		case DRM_GPUVA_OP_REMAP:
+		{
+			struct xe_vma *old =
+				gpuva_to_vma(op->base.remap.unmap->va);
+
+			op->remap.start = xe_vma_start(old);
+			op->remap.range = xe_vma_size(old);
+
+			if (op->base.remap.prev) {
+				flags |= op->base.remap.unmap->va->flags &
+					XE_VMA_READ_ONLY ?
+					VMA_CREATE_FLAG_READ_ONLY : 0;
+				flags |= op->base.remap.unmap->va->flags &
+					DRM_GPUVA_SPARSE ?
+					VMA_CREATE_FLAG_IS_NULL : 0;
+
+				vma = new_vma(vm, op->base.remap.prev,
+					      old->pat_index, flags);
+				if (IS_ERR(vma))
+					return PTR_ERR(vma);
+
+				op->remap.prev = vma;
+
+				/*
+				 * Userptr creates a new SG mapping so
+				 * we must also rebind.
+				 */
+				op->remap.skip_prev = !xe_vma_is_userptr(old) &&
+					IS_ALIGNED(xe_vma_end(vma),
+						   xe_vma_max_pte_size(old));
+				if (op->remap.skip_prev) {
+					xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
+					op->remap.range -=
+						xe_vma_end(vma) -
+						xe_vma_start(old);
+					op->remap.start = xe_vma_end(vma);
+				}
+			}
+
+			if (op->base.remap.next) {
+				flags |= op->base.remap.unmap->va->flags &
+					XE_VMA_READ_ONLY ?
+					VMA_CREATE_FLAG_READ_ONLY : 0;
+				flags |= op->base.remap.unmap->va->flags &
+					DRM_GPUVA_SPARSE ?
+					VMA_CREATE_FLAG_IS_NULL : 0;
+
+				vma = new_vma(vm, op->base.remap.next,
+					      old->pat_index, flags);
+				if (IS_ERR(vma))
+					return PTR_ERR(vma);
+
+				op->remap.next = vma;
+
+				/*
+				 * Userptr creates a new SG mapping so
+				 * we must also rebind.
+				 */
+				op->remap.skip_next = !xe_vma_is_userptr(old) &&
+					IS_ALIGNED(xe_vma_start(vma),
+						   xe_vma_max_pte_size(old));
+				if (op->remap.skip_next) {
+					xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
+					op->remap.range -=
+						xe_vma_end(old) -
+						xe_vma_start(vma);
+				}
+			}
+			break;
+		}
+		case DRM_GPUVA_OP_UNMAP:
+		case DRM_GPUVA_OP_PREFETCH:
+			/* Nothing to do */
+			break;
+		default:
+			drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+		}
+
+		last_op = op;
+
+		err = xe_vma_op_commit(vm, op);
+		if (err)
+			return err;
+	}
+
+	/* FIXME: Unhandled corner case */
+	XE_WARN_ON(!last_op && last && !list_empty(ops_list));
+
+	if (!last_op)
+		return 0;
+
+	last_op->ops = ops;
+	if (last) {
+		last_op->flags |= XE_VMA_OP_LAST;
+		last_op->num_syncs = num_syncs;
+		last_op->syncs = syncs;
+	}
+
+	return 0;
+}
+
+static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
+		      struct xe_vma *vma, struct xe_vma_op *op)
+{
+	int err;
+
+	lockdep_assert_held_write(&vm->lock);
+
+	err = xe_vm_prepare_vma(exec, vma, 1);
+	if (err)
+		return err;
+
+	xe_vm_assert_held(vm);
+	xe_bo_assert_held(xe_vma_bo(vma));
+
+	switch (op->base.op) {
+	case DRM_GPUVA_OP_MAP:
+		err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
+				 op->syncs, op->num_syncs,
+				 !xe_vm_in_fault_mode(vm),
+				 op->flags & XE_VMA_OP_FIRST,
+				 op->flags & XE_VMA_OP_LAST);
+		break;
+	case DRM_GPUVA_OP_REMAP:
+	{
+		bool prev = !!op->remap.prev;
+		bool next = !!op->remap.next;
+
+		if (!op->remap.unmap_done) {
+			if (prev || next)
+				vma->gpuva.flags |= XE_VMA_FIRST_REBIND;
+			err = xe_vm_unbind(vm, vma, op->q, op->syncs,
+					   op->num_syncs,
+					   op->flags & XE_VMA_OP_FIRST,
+					   op->flags & XE_VMA_OP_LAST &&
+					   !prev && !next);
+			if (err)
+				break;
+			op->remap.unmap_done = true;
+		}
+
+		if (prev) {
+			op->remap.prev->gpuva.flags |= XE_VMA_LAST_REBIND;
+			err = xe_vm_bind(vm, op->remap.prev, op->q,
+					 xe_vma_bo(op->remap.prev), op->syncs,
+					 op->num_syncs, true, false,
+					 op->flags & XE_VMA_OP_LAST && !next);
+			op->remap.prev->gpuva.flags &= ~XE_VMA_LAST_REBIND;
+			if (err)
+				break;
+			op->remap.prev = NULL;
+		}
+
+		if (next) {
+			op->remap.next->gpuva.flags |= XE_VMA_LAST_REBIND;
+			err = xe_vm_bind(vm, op->remap.next, op->q,
+					 xe_vma_bo(op->remap.next),
+					 op->syncs, op->num_syncs,
+					 true, false,
+					 op->flags & XE_VMA_OP_LAST);
+			op->remap.next->gpuva.flags &= ~XE_VMA_LAST_REBIND;
+			if (err)
+				break;
+			op->remap.next = NULL;
+		}
+
+		break;
+	}
+	case DRM_GPUVA_OP_UNMAP:
+		err = xe_vm_unbind(vm, vma, op->q, op->syncs,
+				   op->num_syncs, op->flags & XE_VMA_OP_FIRST,
+				   op->flags & XE_VMA_OP_LAST);
+		break;
+	case DRM_GPUVA_OP_PREFETCH:
+		err = xe_vm_prefetch(vm, vma, op->q, op->prefetch.region,
+				     op->syncs, op->num_syncs,
+				     op->flags & XE_VMA_OP_FIRST,
+				     op->flags & XE_VMA_OP_LAST);
+		break;
+	default:
+		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+	}
+
+	if (err)
+		trace_xe_vma_fail(vma);
+
+	return err;
+}
+
+static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
+			       struct xe_vma_op *op)
+{
+	struct drm_exec exec;
+	int err;
+
+retry_userptr:
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+	drm_exec_until_all_locked(&exec) {
+		err = op_execute(&exec, vm, vma, op);
+		drm_exec_retry_on_contention(&exec);
+		if (err)
+			break;
+	}
+	drm_exec_fini(&exec);
+
+	if (err == -EAGAIN) {
+		lockdep_assert_held_write(&vm->lock);
+
+		if (op->base.op == DRM_GPUVA_OP_REMAP) {
+			if (!op->remap.unmap_done)
+				vma = gpuva_to_vma(op->base.remap.unmap->va);
+			else if (op->remap.prev)
+				vma = op->remap.prev;
+			else
+				vma = op->remap.next;
+		}
+
+		if (xe_vma_is_userptr(vma)) {
+			err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
+			if (!err)
+				goto retry_userptr;
+
+			trace_xe_vma_fail(vma);
+		}
+	}
+
+	return err;
+}
+
+static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
+{
+	int ret = 0;
+
+	lockdep_assert_held_write(&vm->lock);
+
+	switch (op->base.op) {
+	case DRM_GPUVA_OP_MAP:
+		ret = __xe_vma_op_execute(vm, op->map.vma, op);
+		break;
+	case DRM_GPUVA_OP_REMAP:
+	{
+		struct xe_vma *vma;
+
+		if (!op->remap.unmap_done)
+			vma = gpuva_to_vma(op->base.remap.unmap->va);
+		else if (op->remap.prev)
+			vma = op->remap.prev;
+		else
+			vma = op->remap.next;
+
+		ret = __xe_vma_op_execute(vm, vma, op);
+		break;
+	}
+	case DRM_GPUVA_OP_UNMAP:
+		ret = __xe_vma_op_execute(vm, gpuva_to_vma(op->base.unmap.va),
+					  op);
+		break;
+	case DRM_GPUVA_OP_PREFETCH:
+		ret = __xe_vma_op_execute(vm,
+					  gpuva_to_vma(op->base.prefetch.va),
+					  op);
+		break;
+	default:
+		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+	}
+
+	return ret;
+}
+
+static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op *op)
+{
+	bool last = op->flags & XE_VMA_OP_LAST;
+
+	if (last) {
+		while (op->num_syncs--)
+			xe_sync_entry_cleanup(&op->syncs[op->num_syncs]);
+		kfree(op->syncs);
+		if (op->q)
+			xe_exec_queue_put(op->q);
+	}
+	if (!list_empty(&op->link))
+		list_del(&op->link);
+	if (op->ops)
+		drm_gpuva_ops_free(&vm->gpuvm, op->ops);
+	if (last)
+		xe_vm_put(vm);
+}
+
+static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
+			     bool post_commit, bool prev_post_commit,
+			     bool next_post_commit)
+{
+	lockdep_assert_held_write(&vm->lock);
+
+	switch (op->base.op) {
+	case DRM_GPUVA_OP_MAP:
+		if (op->map.vma) {
+			prep_vma_destroy(vm, op->map.vma, post_commit);
+			xe_vma_destroy_unlocked(op->map.vma);
+		}
+		break;
+	case DRM_GPUVA_OP_UNMAP:
+	{
+		struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
+
+		if (vma) {
+			down_read(&vm->userptr.notifier_lock);
+			vma->gpuva.flags &= ~XE_VMA_DESTROYED;
+			up_read(&vm->userptr.notifier_lock);
+			if (post_commit)
+				xe_vm_insert_vma(vm, vma);
+		}
+		break;
+	}
+	case DRM_GPUVA_OP_REMAP:
+	{
+		struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va);
+
+		if (op->remap.prev) {
+			prep_vma_destroy(vm, op->remap.prev, prev_post_commit);
+			xe_vma_destroy_unlocked(op->remap.prev);
+		}
+		if (op->remap.next) {
+			prep_vma_destroy(vm, op->remap.next, next_post_commit);
+			xe_vma_destroy_unlocked(op->remap.next);
+		}
+		if (vma) {
+			down_read(&vm->userptr.notifier_lock);
+			vma->gpuva.flags &= ~XE_VMA_DESTROYED;
+			up_read(&vm->userptr.notifier_lock);
+			if (post_commit)
+				xe_vm_insert_vma(vm, vma);
+		}
+		break;
+	}
+	case DRM_GPUVA_OP_PREFETCH:
+		/* Nothing to do */
+		break;
+	default:
+		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+	}
+}
+
+static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
+				     struct drm_gpuva_ops **ops,
+				     int num_ops_list)
+{
+	int i;
+
+	for (i = num_ops_list - 1; i >= 0; --i) {
+		struct drm_gpuva_ops *__ops = ops[i];
+		struct drm_gpuva_op *__op;
+
+		if (!__ops)
+			continue;
+
+		drm_gpuva_for_each_op_reverse(__op, __ops) {
+			struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
+
+			xe_vma_op_unwind(vm, op,
+					 op->flags & XE_VMA_OP_COMMITTED,
+					 op->flags & XE_VMA_OP_PREV_COMMITTED,
+					 op->flags & XE_VMA_OP_NEXT_COMMITTED);
+		}
+
+		drm_gpuva_ops_free(&vm->gpuvm, __ops);
+	}
+}
+
+static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
+				     struct list_head *ops_list)
+{
+	struct xe_vma_op *op, *next;
+	int err;
+
+	lockdep_assert_held_write(&vm->lock);
+
+	list_for_each_entry_safe(op, next, ops_list, link) {
+		err = xe_vma_op_execute(vm, op);
+		if (err) {
+			drm_warn(&vm->xe->drm, "VM op(%d) failed with %d",
+				 op->base.op, err);
+			/*
+			 * FIXME: Killing VM rather than proper error handling
+			 */
+			xe_vm_kill(vm);
+			return -ENOSPC;
+		}
+		xe_vma_op_cleanup(vm, op);
+	}
+
+	return 0;
+}
+
+#define SUPPORTED_FLAGS	(DRM_XE_VM_BIND_FLAG_NULL | \
+	 DRM_XE_VM_BIND_FLAG_DUMPABLE)
+#define XE_64K_PAGE_MASK 0xffffull
+#define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
+
+static int vm_bind_ioctl_check_args(struct xe_device *xe,
+				    struct drm_xe_vm_bind *args,
+				    struct drm_xe_vm_bind_op **bind_ops)
+{
+	int err;
+	int i;
+
+	if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
+	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->extensions))
+		return -EINVAL;
+
+	if (args->num_binds > 1) {
+		u64 __user *bind_user =
+			u64_to_user_ptr(args->vector_of_binds);
+
+		*bind_ops = kvmalloc_array(args->num_binds,
+					   sizeof(struct drm_xe_vm_bind_op),
+					   GFP_KERNEL | __GFP_ACCOUNT);
+		if (!*bind_ops)
+			return -ENOMEM;
+
+		err = __copy_from_user(*bind_ops, bind_user,
+				       sizeof(struct drm_xe_vm_bind_op) *
+				       args->num_binds);
+		if (XE_IOCTL_DBG(xe, err)) {
+			err = -EFAULT;
+			goto free_bind_ops;
+		}
+	} else {
+		*bind_ops = &args->bind;
+	}
+
+	for (i = 0; i < args->num_binds; ++i) {
+		u64 range = (*bind_ops)[i].range;
+		u64 addr = (*bind_ops)[i].addr;
+		u32 op = (*bind_ops)[i].op;
+		u32 flags = (*bind_ops)[i].flags;
+		u32 obj = (*bind_ops)[i].obj;
+		u64 obj_offset = (*bind_ops)[i].obj_offset;
+		u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance;
+		bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
+		u16 pat_index = (*bind_ops)[i].pat_index;
+		u16 coh_mode;
+
+		if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) {
+			err = -EINVAL;
+			goto free_bind_ops;
+		}
+
+		pat_index = array_index_nospec(pat_index, xe->pat.n_entries);
+		(*bind_ops)[i].pat_index = pat_index;
+		coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
+		if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */
+			err = -EINVAL;
+			goto free_bind_ops;
+		}
+
+		if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) {
+			err = -EINVAL;
+			goto free_bind_ops;
+		}
+
+		if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) ||
+		    XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) ||
+		    XE_IOCTL_DBG(xe, obj && is_null) ||
+		    XE_IOCTL_DBG(xe, obj_offset && is_null) ||
+		    XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP &&
+				 is_null) ||
+		    XE_IOCTL_DBG(xe, !obj &&
+				 op == DRM_XE_VM_BIND_OP_MAP &&
+				 !is_null) ||
+		    XE_IOCTL_DBG(xe, !obj &&
+				 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
+		    XE_IOCTL_DBG(xe, addr &&
+				 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
+		    XE_IOCTL_DBG(xe, range &&
+				 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
+		    XE_IOCTL_DBG(xe, obj &&
+				 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
+		    XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
+				 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
+		    XE_IOCTL_DBG(xe, obj &&
+				 op == DRM_XE_VM_BIND_OP_PREFETCH) ||
+		    XE_IOCTL_DBG(xe, prefetch_region &&
+				 op != DRM_XE_VM_BIND_OP_PREFETCH) ||
+		    XE_IOCTL_DBG(xe, !(BIT(prefetch_region) &
+				       xe->info.mem_region_mask)) ||
+		    XE_IOCTL_DBG(xe, obj &&
+				 op == DRM_XE_VM_BIND_OP_UNMAP)) {
+			err = -EINVAL;
+			goto free_bind_ops;
+		}
+
+		if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) ||
+		    XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) ||
+		    XE_IOCTL_DBG(xe, range & ~PAGE_MASK) ||
+		    XE_IOCTL_DBG(xe, !range &&
+				 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) {
+			err = -EINVAL;
+			goto free_bind_ops;
+		}
+	}
+
+	return 0;
+
+free_bind_ops:
+	if (args->num_binds > 1)
+		kvfree(*bind_ops);
+	return err;
+}
+
+static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
+				       struct xe_exec_queue *q,
+				       struct xe_sync_entry *syncs,
+				       int num_syncs)
+{
+	struct dma_fence *fence;
+	int i, err = 0;
+
+	fence = xe_sync_in_fence_get(syncs, num_syncs,
+				     to_wait_exec_queue(vm, q), vm);
+	if (IS_ERR(fence))
+		return PTR_ERR(fence);
+
+	for (i = 0; i < num_syncs; i++)
+		xe_sync_entry_signal(&syncs[i], NULL, fence);
+
+	xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm,
+				     fence);
+	dma_fence_put(fence);
+
+	return err;
+}
+
+int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_vm_bind *args = data;
+	struct drm_xe_sync __user *syncs_user;
+	struct xe_bo **bos = NULL;
+	struct drm_gpuva_ops **ops = NULL;
+	struct xe_vm *vm;
+	struct xe_exec_queue *q = NULL;
+	u32 num_syncs, num_ufence = 0;
+	struct xe_sync_entry *syncs = NULL;
+	struct drm_xe_vm_bind_op *bind_ops;
+	LIST_HEAD(ops_list);
+	int err;
+	int i;
+
+	err = vm_bind_ioctl_check_args(xe, args, &bind_ops);
+	if (err)
+		return err;
+
+	if (args->exec_queue_id) {
+		q = xe_exec_queue_lookup(xef, args->exec_queue_id);
+		if (XE_IOCTL_DBG(xe, !q)) {
+			err = -ENOENT;
+			goto free_objs;
+		}
+
+		if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) {
+			err = -EINVAL;
+			goto put_exec_queue;
+		}
+	}
+
+	vm = xe_vm_lookup(xef, args->vm_id);
+	if (XE_IOCTL_DBG(xe, !vm)) {
+		err = -EINVAL;
+		goto put_exec_queue;
+	}
+
+	err = down_write_killable(&vm->lock);
+	if (err)
+		goto put_vm;
+
+	if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
+		err = -ENOENT;
+		goto release_vm_lock;
+	}
+
+	for (i = 0; i < args->num_binds; ++i) {
+		u64 range = bind_ops[i].range;
+		u64 addr = bind_ops[i].addr;
+
+		if (XE_IOCTL_DBG(xe, range > vm->size) ||
+		    XE_IOCTL_DBG(xe, addr > vm->size - range)) {
+			err = -EINVAL;
+			goto release_vm_lock;
+		}
+	}
+
+	if (args->num_binds) {
+		bos = kvcalloc(args->num_binds, sizeof(*bos),
+			       GFP_KERNEL | __GFP_ACCOUNT);
+		if (!bos) {
+			err = -ENOMEM;
+			goto release_vm_lock;
+		}
+
+		ops = kvcalloc(args->num_binds, sizeof(*ops),
+			       GFP_KERNEL | __GFP_ACCOUNT);
+		if (!ops) {
+			err = -ENOMEM;
+			goto release_vm_lock;
+		}
+	}
+
+	for (i = 0; i < args->num_binds; ++i) {
+		struct drm_gem_object *gem_obj;
+		u64 range = bind_ops[i].range;
+		u64 addr = bind_ops[i].addr;
+		u32 obj = bind_ops[i].obj;
+		u64 obj_offset = bind_ops[i].obj_offset;
+		u16 pat_index = bind_ops[i].pat_index;
+		u16 coh_mode;
+
+		if (!obj)
+			continue;
+
+		gem_obj = drm_gem_object_lookup(file, obj);
+		if (XE_IOCTL_DBG(xe, !gem_obj)) {
+			err = -ENOENT;
+			goto put_obj;
+		}
+		bos[i] = gem_to_xe_bo(gem_obj);
+
+		if (XE_IOCTL_DBG(xe, range > bos[i]->size) ||
+		    XE_IOCTL_DBG(xe, obj_offset >
+				 bos[i]->size - range)) {
+			err = -EINVAL;
+			goto put_obj;
+		}
+
+		if (bos[i]->flags & XE_BO_INTERNAL_64K) {
+			if (XE_IOCTL_DBG(xe, obj_offset &
+					 XE_64K_PAGE_MASK) ||
+			    XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
+			    XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
+				err = -EINVAL;
+				goto put_obj;
+			}
+		}
+
+		coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
+		if (bos[i]->cpu_caching) {
+			if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
+					 bos[i]->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
+				err = -EINVAL;
+				goto put_obj;
+			}
+		} else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
+			/*
+			 * Imported dma-buf from a different device should
+			 * require 1way or 2way coherency since we don't know
+			 * how it was mapped on the CPU. Just assume is it
+			 * potentially cached on CPU side.
+			 */
+			err = -EINVAL;
+			goto put_obj;
+		}
+	}
+
+	if (args->num_syncs) {
+		syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
+		if (!syncs) {
+			err = -ENOMEM;
+			goto put_obj;
+		}
+	}
+
+	syncs_user = u64_to_user_ptr(args->syncs);
+	for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
+		err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
+					  &syncs_user[num_syncs],
+					  (xe_vm_in_lr_mode(vm) ?
+					   SYNC_PARSE_FLAG_LR_MODE : 0) |
+					  (!args->num_binds ?
+					   SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0));
+		if (err)
+			goto free_syncs;
+
+		if (xe_sync_is_ufence(&syncs[num_syncs]))
+			num_ufence++;
+	}
+
+	if (XE_IOCTL_DBG(xe, num_ufence > 1)) {
+		err = -EINVAL;
+		goto free_syncs;
+	}
+
+	if (!args->num_binds) {
+		err = -ENODATA;
+		goto free_syncs;
+	}
+
+	for (i = 0; i < args->num_binds; ++i) {
+		u64 range = bind_ops[i].range;
+		u64 addr = bind_ops[i].addr;
+		u32 op = bind_ops[i].op;
+		u32 flags = bind_ops[i].flags;
+		u64 obj_offset = bind_ops[i].obj_offset;
+		u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
+		u16 pat_index = bind_ops[i].pat_index;
+
+		ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset,
+						  addr, range, op, flags,
+						  prefetch_region, pat_index);
+		if (IS_ERR(ops[i])) {
+			err = PTR_ERR(ops[i]);
+			ops[i] = NULL;
+			goto unwind_ops;
+		}
+
+		err = vm_bind_ioctl_ops_parse(vm, q, ops[i], syncs, num_syncs,
+					      &ops_list,
+					      i == args->num_binds - 1);
+		if (err)
+			goto unwind_ops;
+	}
+
+	/* Nothing to do */
+	if (list_empty(&ops_list)) {
+		err = -ENODATA;
+		goto unwind_ops;
+	}
+
+	xe_vm_get(vm);
+	if (q)
+		xe_exec_queue_get(q);
+
+	err = vm_bind_ioctl_ops_execute(vm, &ops_list);
+
+	up_write(&vm->lock);
+
+	if (q)
+		xe_exec_queue_put(q);
+	xe_vm_put(vm);
+
+	for (i = 0; bos && i < args->num_binds; ++i)
+		xe_bo_put(bos[i]);
+
+	kvfree(bos);
+	kvfree(ops);
+	if (args->num_binds > 1)
+		kvfree(bind_ops);
+
+	return err;
+
+unwind_ops:
+	vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
+free_syncs:
+	if (err == -ENODATA)
+		err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
+	while (num_syncs--)
+		xe_sync_entry_cleanup(&syncs[num_syncs]);
+
+	kfree(syncs);
+put_obj:
+	for (i = 0; i < args->num_binds; ++i)
+		xe_bo_put(bos[i]);
+release_vm_lock:
+	up_write(&vm->lock);
+put_vm:
+	xe_vm_put(vm);
+put_exec_queue:
+	if (q)
+		xe_exec_queue_put(q);
+free_objs:
+	kvfree(bos);
+	kvfree(ops);
+	if (args->num_binds > 1)
+		kvfree(bind_ops);
+	return err;
+}
+
+/**
+ * xe_vm_lock() - Lock the vm's dma_resv object
+ * @vm: The struct xe_vm whose lock is to be locked
+ * @intr: Whether to perform any wait interruptible
+ *
+ * Return: 0 on success, -EINTR if @intr is true and the wait for a
+ * contended lock was interrupted. If @intr is false, the function
+ * always returns 0.
+ */
+int xe_vm_lock(struct xe_vm *vm, bool intr)
+{
+	if (intr)
+		return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
+
+	return dma_resv_lock(xe_vm_resv(vm), NULL);
+}
+
+/**
+ * xe_vm_unlock() - Unlock the vm's dma_resv object
+ * @vm: The struct xe_vm whose lock is to be released.
+ *
+ * Unlock a buffer object lock that was locked by xe_vm_lock().
+ */
+void xe_vm_unlock(struct xe_vm *vm)
+{
+	dma_resv_unlock(xe_vm_resv(vm));
+}
+
+/**
+ * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
+ * @vma: VMA to invalidate
+ *
+ * Walks a list of page tables leaves which it memset the entries owned by this
+ * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
+ * complete.
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
+int xe_vm_invalidate_vma(struct xe_vma *vma)
+{
+	struct xe_device *xe = xe_vma_vm(vma)->xe;
+	struct xe_tile *tile;
+	u32 tile_needs_invalidate = 0;
+	int seqno[XE_MAX_TILES_PER_DEVICE];
+	u8 id;
+	int ret;
+
+	xe_assert(xe, xe_vm_in_fault_mode(xe_vma_vm(vma)));
+	xe_assert(xe, !xe_vma_is_null(vma));
+	trace_xe_vma_usm_invalidate(vma);
+
+	/* Check that we don't race with page-table updates */
+	if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
+		if (xe_vma_is_userptr(vma)) {
+			WARN_ON_ONCE(!mmu_interval_check_retry
+				     (&to_userptr_vma(vma)->userptr.notifier,
+				      to_userptr_vma(vma)->userptr.notifier_seq));
+			WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)),
+							     DMA_RESV_USAGE_BOOKKEEP));
+
+		} else {
+			xe_bo_assert_held(xe_vma_bo(vma));
+		}
+	}
+
+	for_each_tile(tile, xe, id) {
+		if (xe_pt_zap_ptes(tile, vma)) {
+			tile_needs_invalidate |= BIT(id);
+			xe_device_wmb(xe);
+			/*
+			 * FIXME: We potentially need to invalidate multiple
+			 * GTs within the tile
+			 */
+			seqno[id] = xe_gt_tlb_invalidation_vma(tile->primary_gt, NULL, vma);
+			if (seqno[id] < 0)
+				return seqno[id];
+		}
+	}
+
+	for_each_tile(tile, xe, id) {
+		if (tile_needs_invalidate & BIT(id)) {
+			ret = xe_gt_tlb_invalidation_wait(tile->primary_gt, seqno[id]);
+			if (ret < 0)
+				return ret;
+		}
+	}
+
+	vma->usm.tile_invalidated = vma->tile_mask;
+
+	return 0;
+}
+
+int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
+{
+	struct drm_gpuva *gpuva;
+	bool is_vram;
+	uint64_t addr;
+
+	if (!down_read_trylock(&vm->lock)) {
+		drm_printf(p, " Failed to acquire VM lock to dump capture");
+		return 0;
+	}
+	if (vm->pt_root[gt_id]) {
+		addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, XE_PAGE_SIZE);
+		is_vram = xe_bo_is_vram(vm->pt_root[gt_id]->bo);
+		drm_printf(p, " VM root: A:0x%llx %s\n", addr,
+			   is_vram ? "VRAM" : "SYS");
+	}
+
+	drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
+		struct xe_vma *vma = gpuva_to_vma(gpuva);
+		bool is_userptr = xe_vma_is_userptr(vma);
+		bool is_null = xe_vma_is_null(vma);
+
+		if (is_null) {
+			addr = 0;
+		} else if (is_userptr) {
+			struct sg_table *sg = to_userptr_vma(vma)->userptr.sg;
+			struct xe_res_cursor cur;
+
+			if (sg) {
+				xe_res_first_sg(sg, 0, XE_PAGE_SIZE, &cur);
+				addr = xe_res_dma(&cur);
+			} else {
+				addr = 0;
+			}
+		} else {
+			addr = __xe_bo_addr(xe_vma_bo(vma), 0, XE_PAGE_SIZE);
+			is_vram = xe_bo_is_vram(xe_vma_bo(vma));
+		}
+		drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n",
+			   xe_vma_start(vma), xe_vma_end(vma) - 1,
+			   xe_vma_size(vma),
+			   addr, is_null ? "NULL" : is_userptr ? "USR" :
+			   is_vram ? "VRAM" : "SYS");
+	}
+	up_read(&vm->lock);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
new file mode 100644
index 000000000000..9654a0612fc2
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -0,0 +1,275 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_VM_H_
+#define _XE_VM_H_
+
+#include "xe_bo_types.h"
+#include "xe_macros.h"
+#include "xe_map.h"
+#include "xe_vm_types.h"
+
+struct drm_device;
+struct drm_printer;
+struct drm_file;
+
+struct ttm_buffer_object;
+struct ttm_validate_buffer;
+
+struct xe_exec_queue;
+struct xe_file;
+struct xe_sync_entry;
+struct drm_exec;
+
+struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags);
+
+struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id);
+int xe_vma_cmp_vma_cb(const void *key, const struct rb_node *node);
+
+static inline struct xe_vm *xe_vm_get(struct xe_vm *vm)
+{
+	drm_gpuvm_get(&vm->gpuvm);
+	return vm;
+}
+
+static inline void xe_vm_put(struct xe_vm *vm)
+{
+	drm_gpuvm_put(&vm->gpuvm);
+}
+
+int xe_vm_lock(struct xe_vm *vm, bool intr);
+
+void xe_vm_unlock(struct xe_vm *vm);
+
+static inline bool xe_vm_is_closed(struct xe_vm *vm)
+{
+	/* Only guaranteed not to change when vm->lock is held */
+	return !vm->size;
+}
+
+static inline bool xe_vm_is_banned(struct xe_vm *vm)
+{
+	return vm->flags & XE_VM_FLAG_BANNED;
+}
+
+static inline bool xe_vm_is_closed_or_banned(struct xe_vm *vm)
+{
+	lockdep_assert_held(&vm->lock);
+	return xe_vm_is_closed(vm) || xe_vm_is_banned(vm);
+}
+
+struct xe_vma *
+xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range);
+
+/**
+ * xe_vm_has_scratch() - Whether the vm is configured for scratch PTEs
+ * @vm: The vm
+ *
+ * Return: whether the vm populates unmapped areas with scratch PTEs
+ */
+static inline bool xe_vm_has_scratch(const struct xe_vm *vm)
+{
+	return vm->flags & XE_VM_FLAG_SCRATCH_PAGE;
+}
+
+/**
+ * gpuvm_to_vm() - Return the embedding xe_vm from a struct drm_gpuvm pointer
+ * @gpuvm: The struct drm_gpuvm pointer
+ *
+ * Return: Pointer to the embedding struct xe_vm.
+ */
+static inline struct xe_vm *gpuvm_to_vm(struct drm_gpuvm *gpuvm)
+{
+	return container_of(gpuvm, struct xe_vm, gpuvm);
+}
+
+static inline struct xe_vm *gpuva_to_vm(struct drm_gpuva *gpuva)
+{
+	return gpuvm_to_vm(gpuva->vm);
+}
+
+static inline struct xe_vma *gpuva_to_vma(struct drm_gpuva *gpuva)
+{
+	return container_of(gpuva, struct xe_vma, gpuva);
+}
+
+static inline struct xe_vma_op *gpuva_op_to_vma_op(struct drm_gpuva_op *op)
+{
+	return container_of(op, struct xe_vma_op, base);
+}
+
+/**
+ * DOC: Provide accessors for vma members to facilitate easy change of
+ * implementation.
+ */
+static inline u64 xe_vma_start(struct xe_vma *vma)
+{
+	return vma->gpuva.va.addr;
+}
+
+static inline u64 xe_vma_size(struct xe_vma *vma)
+{
+	return vma->gpuva.va.range;
+}
+
+static inline u64 xe_vma_end(struct xe_vma *vma)
+{
+	return xe_vma_start(vma) + xe_vma_size(vma);
+}
+
+static inline u64 xe_vma_bo_offset(struct xe_vma *vma)
+{
+	return vma->gpuva.gem.offset;
+}
+
+static inline struct xe_bo *xe_vma_bo(struct xe_vma *vma)
+{
+	return !vma->gpuva.gem.obj ? NULL :
+		container_of(vma->gpuva.gem.obj, struct xe_bo, ttm.base);
+}
+
+static inline struct xe_vm *xe_vma_vm(struct xe_vma *vma)
+{
+	return container_of(vma->gpuva.vm, struct xe_vm, gpuvm);
+}
+
+static inline bool xe_vma_read_only(struct xe_vma *vma)
+{
+	return vma->gpuva.flags & XE_VMA_READ_ONLY;
+}
+
+static inline u64 xe_vma_userptr(struct xe_vma *vma)
+{
+	return vma->gpuva.gem.offset;
+}
+
+static inline bool xe_vma_is_null(struct xe_vma *vma)
+{
+	return vma->gpuva.flags & DRM_GPUVA_SPARSE;
+}
+
+static inline bool xe_vma_has_no_bo(struct xe_vma *vma)
+{
+	return !xe_vma_bo(vma);
+}
+
+static inline bool xe_vma_is_userptr(struct xe_vma *vma)
+{
+	return xe_vma_has_no_bo(vma) && !xe_vma_is_null(vma);
+}
+
+/**
+ * to_userptr_vma() - Return a pointer to an embedding userptr vma
+ * @vma: Pointer to the embedded struct xe_vma
+ *
+ * Return: Pointer to the embedding userptr vma
+ */
+static inline struct xe_userptr_vma *to_userptr_vma(struct xe_vma *vma)
+{
+	xe_assert(xe_vma_vm(vma)->xe, xe_vma_is_userptr(vma));
+	return container_of(vma, struct xe_userptr_vma, vma);
+}
+
+u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile);
+
+int xe_vm_create_ioctl(struct drm_device *dev, void *data,
+		       struct drm_file *file);
+int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file);
+int xe_vm_bind_ioctl(struct drm_device *dev, void *data,
+		     struct drm_file *file);
+
+void xe_vm_close_and_put(struct xe_vm *vm);
+
+static inline bool xe_vm_in_fault_mode(struct xe_vm *vm)
+{
+	return vm->flags & XE_VM_FLAG_FAULT_MODE;
+}
+
+static inline bool xe_vm_in_lr_mode(struct xe_vm *vm)
+{
+	return vm->flags & XE_VM_FLAG_LR_MODE;
+}
+
+static inline bool xe_vm_in_preempt_fence_mode(struct xe_vm *vm)
+{
+	return xe_vm_in_lr_mode(vm) && !xe_vm_in_fault_mode(vm);
+}
+
+int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q);
+void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q);
+
+int xe_vm_userptr_pin(struct xe_vm *vm);
+
+int __xe_vm_userptr_needs_repin(struct xe_vm *vm);
+
+int xe_vm_userptr_check_repin(struct xe_vm *vm);
+
+struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker);
+
+int xe_vm_invalidate_vma(struct xe_vma *vma);
+
+extern struct ttm_device_funcs xe_ttm_funcs;
+
+static inline void xe_vm_queue_rebind_worker(struct xe_vm *vm)
+{
+	xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
+	queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work);
+}
+
+/**
+ * xe_vm_reactivate_rebind() - Reactivate the rebind functionality on compute
+ * vms.
+ * @vm: The vm.
+ *
+ * If the rebind functionality on a compute vm was disabled due
+ * to nothing to execute. Reactivate it and run the rebind worker.
+ * This function should be called after submitting a batch to a compute vm.
+ */
+static inline void xe_vm_reactivate_rebind(struct xe_vm *vm)
+{
+	if (xe_vm_in_preempt_fence_mode(vm) && vm->preempt.rebind_deactivated) {
+		vm->preempt.rebind_deactivated = false;
+		xe_vm_queue_rebind_worker(vm);
+	}
+}
+
+int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma);
+
+int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma);
+
+bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end);
+
+int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id);
+
+int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma,
+		      unsigned int num_shared);
+
+/**
+ * xe_vm_resv() - Return's the vm's reservation object
+ * @vm: The vm
+ *
+ * Return: Pointer to the vm's reservation object.
+ */
+static inline struct dma_resv *xe_vm_resv(struct xe_vm *vm)
+{
+	return drm_gpuvm_resv(&vm->gpuvm);
+}
+
+/**
+ * xe_vm_assert_held(vm) - Assert that the vm's reservation object is held.
+ * @vm: The vm
+ */
+#define xe_vm_assert_held(vm) dma_resv_assert_held(xe_vm_resv(vm))
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
+#define vm_dbg drm_dbg
+#else
+__printf(2, 3)
+static inline void vm_dbg(const struct drm_device *dev,
+			  const char *format, ...)
+{ /* noop */ }
+#endif
+#endif
diff --git a/drivers/gpu/drm/xe/xe_vm_doc.h b/drivers/gpu/drm/xe/xe_vm_doc.h
new file mode 100644
index 000000000000..bdc6659891a5
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vm_doc.h
@@ -0,0 +1,555 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_VM_DOC_H_
+#define _XE_VM_DOC_H_
+
+/**
+ * DOC: XE VM (user address space)
+ *
+ * VM creation
+ * ===========
+ *
+ * Allocate a physical page for root of the page table structure, create default
+ * bind engine, and return a handle to the user.
+ *
+ * Scratch page
+ * ------------
+ *
+ * If the VM is created with the flag, DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE, set the
+ * entire page table structure defaults pointing to blank page allocated by the
+ * VM. Invalid memory access rather than fault just read / write to this page.
+ *
+ * VM bind (create GPU mapping for a BO or userptr)
+ * ================================================
+ *
+ * Creates GPU mapings for a BO or userptr within a VM. VM binds uses the same
+ * in / out fence interface (struct drm_xe_sync) as execs which allows users to
+ * think of binds and execs as more or less the same operation.
+ *
+ * Operations
+ * ----------
+ *
+ * DRM_XE_VM_BIND_OP_MAP		- Create mapping for a BO
+ * DRM_XE_VM_BIND_OP_UNMAP		- Destroy mapping for a BO / userptr
+ * DRM_XE_VM_BIND_OP_MAP_USERPTR	- Create mapping for userptr
+ *
+ * Implementation details
+ * ~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * All bind operations are implemented via a hybrid approach of using the CPU
+ * and GPU to modify page tables. If a new physical page is allocated in the
+ * page table structure we populate that page via the CPU and insert that new
+ * page into the existing page table structure via a GPU job. Also any existing
+ * pages in the page table structure that need to be modified also are updated
+ * via the GPU job. As the root physical page is prealloced on VM creation our
+ * GPU job will always have at least 1 update. The in / out fences are passed to
+ * this job so again this is conceptually the same as an exec.
+ *
+ * Very simple example of few binds on an empty VM with 48 bits of address space
+ * and the resulting operations:
+ *
+ * .. code-block::
+ *
+ *	bind BO0 0x0-0x1000
+ *	alloc page level 3a, program PTE[0] to BO0 phys address (CPU)
+ *	alloc page level 2, program PDE[0] page level 3a phys address (CPU)
+ *	alloc page level 1, program PDE[0] page level 2 phys address (CPU)
+ *	update root PDE[0] to page level 1 phys address (GPU)
+ *
+ *	bind BO1 0x201000-0x202000
+ *	alloc page level 3b, program PTE[1] to BO1 phys address (CPU)
+ *	update page level 2 PDE[1] to page level 3b phys address (GPU)
+ *
+ *	bind BO2 0x1ff000-0x201000
+ *	update page level 3a PTE[511] to BO2 phys addres (GPU)
+ *	update page level 3b PTE[0] to BO2 phys addres + 0x1000 (GPU)
+ *
+ * GPU bypass
+ * ~~~~~~~~~~
+ *
+ * In the above example the steps using the GPU can be converted to CPU if the
+ * bind can be done immediately (all in-fences satisfied, VM dma-resv kernel
+ * slot is idle).
+ *
+ * Address space
+ * -------------
+ *
+ * Depending on platform either 48 or 57 bits of address space is supported.
+ *
+ * Page sizes
+ * ----------
+ *
+ * The minimum page size is either 4k or 64k depending on platform and memory
+ * placement (sysmem vs. VRAM). We enforce that binds must be aligned to the
+ * minimum page size.
+ *
+ * Larger pages (2M or 1GB) can be used for BOs in VRAM, the BO physical address
+ * is aligned to the larger pages size, and VA is aligned to the larger page
+ * size. Larger pages for userptrs / BOs in sysmem should be possible but is not
+ * yet implemented.
+ *
+ * Sync error handling mode
+ * ------------------------
+ *
+ * In both modes during the bind IOCTL the user input is validated. In sync
+ * error handling mode the newly bound BO is validated (potentially moved back
+ * to a region of memory where is can be used), page tables are updated by the
+ * CPU and the job to do the GPU binds is created in the IOCTL itself. This step
+ * can fail due to memory pressure. The user can recover by freeing memory and
+ * trying this operation again.
+ *
+ * Async error handling mode
+ * -------------------------
+ *
+ * In async error handling the step of validating the BO, updating page tables,
+ * and generating a job are deferred to an async worker. As this step can now
+ * fail after the IOCTL has reported success we need an error handling flow for
+ * which the user can recover from.
+ *
+ * The solution is for a user to register a user address with the VM which the
+ * VM uses to report errors to. The ufence wait interface can be used to wait on
+ * a VM going into an error state. Once an error is reported the VM's async
+ * worker is paused. While the VM's async worker is paused sync,
+ * DRM_XE_VM_BIND_OP_UNMAP operations are allowed (this can free memory). Once the
+ * uses believe the error state is fixed, the async worker can be resumed via
+ * XE_VM_BIND_OP_RESTART operation. When VM async bind work is restarted, the
+ * first operation processed is the operation that caused the original error.
+ *
+ * Bind queues / engines
+ * ---------------------
+ *
+ * Think of the case where we have two bind operations A + B and are submitted
+ * in that order. A has in fences while B has none. If using a single bind
+ * queue, B is now blocked on A's in fences even though it is ready to run. This
+ * example is a real use case for VK sparse binding. We work around this
+ * limitation by implementing bind engines.
+ *
+ * In the bind IOCTL the user can optionally pass in an engine ID which must map
+ * to an engine which is of the special class DRM_XE_ENGINE_CLASS_VM_BIND.
+ * Underneath this is a really virtual engine that can run on any of the copy
+ * hardware engines. The job(s) created each IOCTL are inserted into this
+ * engine's ring. In the example above if A and B have different bind engines B
+ * is free to pass A. If the engine ID field is omitted, the default bind queue
+ * for the VM is used.
+ *
+ * TODO: Explain race in issue 41 and how we solve it
+ *
+ * Array of bind operations
+ * ------------------------
+ *
+ * The uAPI allows multiple binds operations to be passed in via a user array,
+ * of struct drm_xe_vm_bind_op, in a single VM bind IOCTL. This interface
+ * matches the VK sparse binding API. The implementation is rather simple, parse
+ * the array into a list of operations, pass the in fences to the first operation,
+ * and pass the out fences to the last operation. The ordered nature of a bind
+ * engine makes this possible.
+ *
+ * Munmap semantics for unbinds
+ * ----------------------------
+ *
+ * Munmap allows things like:
+ *
+ * .. code-block::
+ *
+ *	0x0000-0x2000 and 0x3000-0x5000 have mappings
+ *	Munmap 0x1000-0x4000, results in mappings 0x0000-0x1000 and 0x4000-0x5000
+ *
+ * To support this semantic in the above example we decompose the above example
+ * into 4 operations:
+ *
+ * .. code-block::
+ *
+ *	unbind 0x0000-0x2000
+ *	unbind 0x3000-0x5000
+ *	rebind 0x0000-0x1000
+ *	rebind 0x4000-0x5000
+ *
+ * Why not just do a partial unbind of 0x1000-0x2000 and 0x3000-0x4000? This
+ * falls apart when using large pages at the edges and the unbind forces us to
+ * use a smaller page size. For simplity we always issue a set of unbinds
+ * unmapping anything in the range and at most 2 rebinds on the edges.
+ *
+ * Similar to an array of binds, in fences are passed to the first operation and
+ * out fences are signaled on the last operation.
+ *
+ * In this example there is a window of time where 0x0000-0x1000 and
+ * 0x4000-0x5000 are invalid but the user didn't ask for these addresses to be
+ * removed from the mapping. To work around this we treat any munmap style
+ * unbinds which require a rebind as a kernel operations (BO eviction or userptr
+ * invalidation). The first operation waits on the VM's
+ * DMA_RESV_USAGE_PREEMPT_FENCE slots (waits for all pending jobs on VM to
+ * complete / triggers preempt fences) and the last operation is installed in
+ * the VM's DMA_RESV_USAGE_KERNEL slot (blocks future jobs / resume compute mode
+ * VM). The caveat is all dma-resv slots must be updated atomically with respect
+ * to execs and compute mode rebind worker. To accomplish this, hold the
+ * vm->lock in write mode from the first operation until the last.
+ *
+ * Deferred binds in fault mode
+ * ----------------------------
+ *
+ * In a VM is in fault mode (TODO: link to fault mode), new bind operations that
+ * create mappings are by default are deferred to the page fault handler (first
+ * use). This behavior can be overriden by setting the flag
+ * DRM_XE_VM_BIND_FLAG_IMMEDIATE which indicates to creating the mapping
+ * immediately.
+ *
+ * User pointer
+ * ============
+ *
+ * User pointers are user allocated memory (malloc'd, mmap'd, etc..) for which the
+ * user wants to create a GPU mapping. Typically in other DRM drivers a dummy BO
+ * was created and then a binding was created. We bypass creating a dummy BO in
+ * XE and simply create a binding directly from the userptr.
+ *
+ * Invalidation
+ * ------------
+ *
+ * Since this a core kernel managed memory the kernel can move this memory
+ * whenever it wants. We register an invalidation MMU notifier to alert XE when
+ * a user poiter is about to move. The invalidation notifier needs to block
+ * until all pending users (jobs or compute mode engines) of the userptr are
+ * idle to ensure no faults. This done by waiting on all of VM's dma-resv slots.
+ *
+ * Rebinds
+ * -------
+ *
+ * Either the next exec (non-compute) or rebind worker (compute mode) will
+ * rebind the userptr. The invalidation MMU notifier kicks the rebind worker
+ * after the VM dma-resv wait if the VM is in compute mode.
+ *
+ * Compute mode
+ * ============
+ *
+ * A VM in compute mode enables long running workloads and ultra low latency
+ * submission (ULLS). ULLS is implemented via a continuously running batch +
+ * semaphores. This enables to the user to insert jump to new batch commands
+ * into the continuously running batch. In both cases these batches exceed the
+ * time a dma fence is allowed to exist for before signaling, as such dma fences
+ * are not used when a VM is in compute mode. User fences (TODO: link user fence
+ * doc) are used instead to signal operation's completion.
+ *
+ * Preempt fences
+ * --------------
+ *
+ * If the kernel decides to move memory around (either userptr invalidate, BO
+ * eviction, or mumap style unbind which results in a rebind) and a batch is
+ * running on an engine, that batch can fault or cause a memory corruption as
+ * page tables for the moved memory are no longer valid. To work around this we
+ * introduce the concept of preempt fences. When sw signaling is enabled on a
+ * preempt fence it tells the submission backend to kick that engine off the
+ * hardware and the preempt fence signals when the engine is off the hardware.
+ * Once all preempt fences are signaled for a VM the kernel can safely move the
+ * memory and kick the rebind worker which resumes all the engines execution.
+ *
+ * A preempt fence, for every engine using the VM, is installed the VM's
+ * dma-resv DMA_RESV_USAGE_PREEMPT_FENCE slot. The same preempt fence, for every
+ * engine using the VM, is also installed into the same dma-resv slot of every
+ * external BO mapped in the VM.
+ *
+ * Rebind worker
+ * -------------
+ *
+ * The rebind worker is very similar to an exec. It is resposible for rebinding
+ * evicted BOs or userptrs, waiting on those operations, installing new preempt
+ * fences, and finally resuming executing of engines in the VM.
+ *
+ * Flow
+ * ~~~~
+ *
+ * .. code-block::
+ *
+ *	<----------------------------------------------------------------------|
+ *	Check if VM is closed, if so bail out                                  |
+ *	Lock VM global lock in read mode                                       |
+ *	Pin userptrs (also finds userptr invalidated since last rebind worker) |
+ *	Lock VM dma-resv and external BOs dma-resv                             |
+ *	Validate BOs that have been evicted                                    |
+ *	Wait on and allocate new preempt fences for every engine using the VM  |
+ *	Rebind invalidated userptrs + evicted BOs                              |
+ *	Wait on last rebind fence                                              |
+ *	Wait VM's DMA_RESV_USAGE_KERNEL dma-resv slot                          |
+ *	Install preeempt fences and issue resume for every engine using the VM |
+ *	Check if any userptrs invalidated since pin                            |
+ *		Squash resume for all engines                                  |
+ *		Unlock all                                                     |
+ *		Wait all VM's dma-resv slots                                   |
+ *		Retry ----------------------------------------------------------
+ *	Release all engines waiting to resume
+ *	Unlock all
+ *
+ * Timeslicing
+ * -----------
+ *
+ * In order to prevent an engine from continuously being kicked off the hardware
+ * and making no forward progress an engine has a period of time it allowed to
+ * run after resume before it can be kicked off again. This effectively gives
+ * each engine a timeslice.
+ *
+ * Handling multiple GTs
+ * =====================
+ *
+ * If a GT has slower access to some regions and the page table structure are in
+ * the slow region, the performance on that GT could adversely be affected. To
+ * work around this we allow a VM page tables to be shadowed in multiple GTs.
+ * When VM is created, a default bind engine and PT table structure are created
+ * on each GT.
+ *
+ * Binds can optionally pass in a mask of GTs where a mapping should be created,
+ * if this mask is zero then default to all the GTs where the VM has page
+ * tables.
+ *
+ * The implementation for this breaks down into a bunch for_each_gt loops in
+ * various places plus exporting a composite fence for multi-GT binds to the
+ * user.
+ *
+ * Fault mode (unified shared memory)
+ * ==================================
+ *
+ * A VM in fault mode can be enabled on devices that support page faults. If
+ * page faults are enabled, using dma fences can potentially induce a deadlock:
+ * A pending page fault can hold up the GPU work which holds up the dma fence
+ * signaling, and memory allocation is usually required to resolve a page
+ * fault, but memory allocation is not allowed to gate dma fence signaling. As
+ * such, dma fences are not allowed when VM is in fault mode. Because dma-fences
+ * are not allowed, long running workloads and ULLS are enabled on a faulting
+ * VM.
+ *
+ * Defered VM binds
+ * ----------------
+ *
+ * By default, on a faulting VM binds just allocate the VMA and the actual
+ * updating of the page tables is defered to the page fault handler. This
+ * behavior can be overridden by setting the flag DRM_XE_VM_BIND_FLAG_IMMEDIATE in
+ * the VM bind which will then do the bind immediately.
+ *
+ * Page fault handler
+ * ------------------
+ *
+ * Page faults are received in the G2H worker under the CT lock which is in the
+ * path of dma fences (no memory allocations are allowed, faults require memory
+ * allocations) thus we cannot process faults under the CT lock. Another issue
+ * is faults issue TLB invalidations which require G2H credits and we cannot
+ * allocate G2H credits in the G2H handlers without deadlocking. Lastly, we do
+ * not want the CT lock to be an outer lock of the VM global lock (VM global
+ * lock required to fault processing).
+ *
+ * To work around the above issue with processing faults in the G2H worker, we
+ * sink faults to a buffer which is large enough to sink all possible faults on
+ * the GT (1 per hardware engine) and kick a worker to process the faults. Since
+ * the page faults G2H are already received in a worker, kicking another worker
+ * adds more latency to a critical performance path. We add a fast path in the
+ * G2H irq handler which looks at first G2H and if it is a page fault we sink
+ * the fault to the buffer and kick the worker to process the fault. TLB
+ * invalidation responses are also in the critical path so these can also be
+ * processed in this fast path.
+ *
+ * Multiple buffers and workers are used and hashed over based on the ASID so
+ * faults from different VMs can be processed in parallel.
+ *
+ * The page fault handler itself is rather simple, flow is below.
+ *
+ * .. code-block::
+ *
+ *	Lookup VM from ASID in page fault G2H
+ *	Lock VM global lock in read mode
+ *	Lookup VMA from address in page fault G2H
+ *	Check if VMA is valid, if not bail
+ *	Check if VMA's BO has backing store, if not allocate
+ *	<----------------------------------------------------------------------|
+ *	If userptr, pin pages                                                  |
+ *	Lock VM & BO dma-resv locks                                            |
+ *	If atomic fault, migrate to VRAM, else validate BO location            |
+ *	Issue rebind                                                           |
+ *	Wait on rebind to complete                                             |
+ *	Check if userptr invalidated since pin                                 |
+ *		Drop VM & BO dma-resv locks                                    |
+ *		Retry ----------------------------------------------------------
+ *	Unlock all
+ *	Issue blocking TLB invalidation                                        |
+ *	Send page fault response to GuC
+ *
+ * Access counters
+ * ---------------
+ *
+ * Access counters can be configured to trigger a G2H indicating the device is
+ * accessing VMAs in system memory frequently as hint to migrate those VMAs to
+ * VRAM.
+ *
+ * Same as the page fault handler, access counters G2H cannot be processed the
+ * G2H worker under the CT lock. Again we use a buffer to sink access counter
+ * G2H. Unlike page faults there is no upper bound so if the buffer is full we
+ * simply drop the G2H. Access counters are a best case optimization and it is
+ * safe to drop these unlike page faults.
+ *
+ * The access counter handler itself is rather simple flow is below.
+ *
+ * .. code-block::
+ *
+ *	Lookup VM from ASID in access counter G2H
+ *	Lock VM global lock in read mode
+ *	Lookup VMA from address in access counter G2H
+ *	If userptr, bail nothing to do
+ *	Lock VM & BO dma-resv locks
+ *	Issue migration to VRAM
+ *	Unlock all
+ *
+ * Notice no rebind is issued in the access counter handler as the rebind will
+ * be issued on next page fault.
+ *
+ * Cavets with eviction / user pointer invalidation
+ * ------------------------------------------------
+ *
+ * In the case of eviction and user pointer invalidation on a faulting VM, there
+ * is no need to issue a rebind rather we just need to blow away the page tables
+ * for the VMAs and the page fault handler will rebind the VMAs when they fault.
+ * The cavet is to update / read the page table structure the VM global lock is
+ * neeeed. In both the case of eviction and user pointer invalidation locks are
+ * held which make acquiring the VM global lock impossible. To work around this
+ * every VMA maintains a list of leaf page table entries which should be written
+ * to zero to blow away the VMA's page tables. After writing zero to these
+ * entries a blocking TLB invalidate is issued. At this point it is safe for the
+ * kernel to move the VMA's memory around. This is a necessary lockless
+ * algorithm and is safe as leafs cannot be changed while either an eviction or
+ * userptr invalidation is occurring.
+ *
+ * Locking
+ * =======
+ *
+ * VM locking protects all of the core data paths (bind operations, execs,
+ * evictions, and compute mode rebind worker) in XE.
+ *
+ * Locks
+ * -----
+ *
+ * VM global lock (vm->lock) - rw semaphore lock. Outer most lock which protects
+ * the list of userptrs mapped in the VM, the list of engines using this VM, and
+ * the array of external BOs mapped in the VM. When adding or removing any of the
+ * aforemented state from the VM should acquire this lock in write mode. The VM
+ * bind path also acquires this lock in write while the exec / compute mode
+ * rebind worker acquire this lock in read mode.
+ *
+ * VM dma-resv lock (vm->ttm.base.resv->lock) - WW lock. Protects VM dma-resv
+ * slots which is shared with any private BO in the VM. Expected to be acquired
+ * during VM binds, execs, and compute mode rebind worker. This lock is also
+ * held when private BOs are being evicted.
+ *
+ * external BO dma-resv lock (bo->ttm.base.resv->lock) - WW lock. Protects
+ * external BO dma-resv slots. Expected to be acquired during VM binds (in
+ * addition to the VM dma-resv lock). All external BO dma-locks within a VM are
+ * expected to be acquired (in addition to the VM dma-resv lock) during execs
+ * and the compute mode rebind worker. This lock is also held when an external
+ * BO is being evicted.
+ *
+ * Putting it all together
+ * -----------------------
+ *
+ * 1. An exec and bind operation with the same VM can't be executing at the same
+ * time (vm->lock).
+ *
+ * 2. A compute mode rebind worker and bind operation with the same VM can't be
+ * executing at the same time (vm->lock).
+ *
+ * 3. We can't add / remove userptrs or external BOs to a VM while an exec with
+ * the same VM is executing (vm->lock).
+ *
+ * 4. We can't add / remove userptrs, external BOs, or engines to a VM while a
+ * compute mode rebind worker with the same VM is executing (vm->lock).
+ *
+ * 5. Evictions within a VM can't be happen while an exec with the same VM is
+ * executing (dma-resv locks).
+ *
+ * 6. Evictions within a VM can't be happen while a compute mode rebind worker
+ * with the same VM is executing (dma-resv locks).
+ *
+ * dma-resv usage
+ * ==============
+ *
+ * As previously stated to enforce the ordering of kernel ops (eviction, userptr
+ * invalidation, munmap style unbinds which result in a rebind), rebinds during
+ * execs, execs, and resumes in the rebind worker we use both the VMs and
+ * external BOs dma-resv slots. Let try to make this as clear as possible.
+ *
+ * Slot installation
+ * -----------------
+ *
+ * 1. Jobs from kernel ops install themselves into the DMA_RESV_USAGE_KERNEL
+ * slot of either an external BO or VM (depends on if kernel op is operating on
+ * an external or private BO)
+ *
+ * 2. In non-compute mode, jobs from execs install themselves into the
+ * DMA_RESV_USAGE_BOOKKEEP slot of the VM
+ *
+ * 3. In non-compute mode, jobs from execs install themselves into the
+ * DMA_RESV_USAGE_WRITE slot of all external BOs in the VM
+ *
+ * 4. Jobs from binds install themselves into the DMA_RESV_USAGE_BOOKKEEP slot
+ * of the VM
+ *
+ * 5. Jobs from binds install themselves into the DMA_RESV_USAGE_BOOKKEEP slot
+ * of the external BO (if the bind is to an external BO, this is addition to #4)
+ *
+ * 6. Every engine using a compute mode VM has a preempt fence in installed into
+ * the DMA_RESV_USAGE_PREEMPT_FENCE slot of the VM
+ *
+ * 7. Every engine using a compute mode VM has a preempt fence in installed into
+ * the DMA_RESV_USAGE_PREEMPT_FENCE slot of all the external BOs in the VM
+ *
+ * Slot waiting
+ * ------------
+ *
+ * 1. The exection of all jobs from kernel ops shall wait on all slots
+ * (DMA_RESV_USAGE_PREEMPT_FENCE) of either an external BO or VM (depends on if
+ * kernel op is operating on external or private BO)
+ *
+ * 2. In non-compute mode, the exection of all jobs from rebinds in execs shall
+ * wait on the DMA_RESV_USAGE_KERNEL slot of either an external BO or VM
+ * (depends on if the rebind is operatiing on an external or private BO)
+ *
+ * 3. In non-compute mode, the exection of all jobs from execs shall wait on the
+ * last rebind job
+ *
+ * 4. In compute mode, the exection of all jobs from rebinds in the rebind
+ * worker shall wait on the DMA_RESV_USAGE_KERNEL slot of either an external BO
+ * or VM (depends on if rebind is operating on external or private BO)
+ *
+ * 5. In compute mode, resumes in rebind worker shall wait on last rebind fence
+ *
+ * 6. In compute mode, resumes in rebind worker shall wait on the
+ * DMA_RESV_USAGE_KERNEL slot of the VM
+ *
+ * Putting it all together
+ * -----------------------
+ *
+ * 1. New jobs from kernel ops are blocked behind any existing jobs from
+ * non-compute mode execs
+ *
+ * 2. New jobs from non-compute mode execs are blocked behind any existing jobs
+ * from kernel ops and rebinds
+ *
+ * 3. New jobs from kernel ops are blocked behind all preempt fences signaling in
+ * compute mode
+ *
+ * 4. Compute mode engine resumes are blocked behind any existing jobs from
+ * kernel ops and rebinds
+ *
+ * Future work
+ * ===========
+ *
+ * Support large pages for sysmem and userptr.
+ *
+ * Update page faults to handle BOs are page level grainularity (e.g. part of BO
+ * could be in system memory while another part could be in VRAM).
+ *
+ * Page fault handler likely we be optimized a bit more (e.g. Rebinds always
+ * wait on the dma-resv kernel slots of VM or BO, technically we only have to
+ * wait the BO moving. If using a job to do the rebind, we could not block in
+ * the page fault handler rather attach a callback to fence of the rebind job to
+ * signal page fault complete. Our handling of short circuting for atomic faults
+ * for bound VMAs could be better. etc...). We can tune all of this once we have
+ * benchmarks / performance number from workloads up and running.
+ */
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
new file mode 100644
index 000000000000..7300eea5394b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -0,0 +1,374 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_VM_TYPES_H_
+#define _XE_VM_TYPES_H_
+
+#include <drm/drm_gpuvm.h>
+
+#include <linux/dma-resv.h>
+#include <linux/kref.h>
+#include <linux/mmu_notifier.h>
+#include <linux/scatterlist.h>
+
+#include "xe_device_types.h"
+#include "xe_pt_types.h"
+#include "xe_range_fence.h"
+
+struct xe_bo;
+struct xe_sync_entry;
+struct xe_user_fence;
+struct xe_vm;
+
+#define XE_VMA_READ_ONLY	DRM_GPUVA_USERBITS
+#define XE_VMA_DESTROYED	(DRM_GPUVA_USERBITS << 1)
+#define XE_VMA_ATOMIC_PTE_BIT	(DRM_GPUVA_USERBITS << 2)
+#define XE_VMA_FIRST_REBIND	(DRM_GPUVA_USERBITS << 3)
+#define XE_VMA_LAST_REBIND	(DRM_GPUVA_USERBITS << 4)
+#define XE_VMA_PTE_4K		(DRM_GPUVA_USERBITS << 5)
+#define XE_VMA_PTE_2M		(DRM_GPUVA_USERBITS << 6)
+#define XE_VMA_PTE_1G		(DRM_GPUVA_USERBITS << 7)
+#define XE_VMA_PTE_64K		(DRM_GPUVA_USERBITS << 8)
+#define XE_VMA_PTE_COMPACT	(DRM_GPUVA_USERBITS << 9)
+
+/** struct xe_userptr - User pointer */
+struct xe_userptr {
+	/** @invalidate_link: Link for the vm::userptr.invalidated list */
+	struct list_head invalidate_link;
+	/** @userptr: link into VM repin list if userptr. */
+	struct list_head repin_link;
+	/**
+	 * @notifier: MMU notifier for user pointer (invalidation call back)
+	 */
+	struct mmu_interval_notifier notifier;
+	/** @sgt: storage for a scatter gather table */
+	struct sg_table sgt;
+	/** @sg: allocated scatter gather table */
+	struct sg_table *sg;
+	/** @notifier_seq: notifier sequence number */
+	unsigned long notifier_seq;
+	/**
+	 * @initial_bind: user pointer has been bound at least once.
+	 * write: vm->userptr.notifier_lock in read mode and vm->resv held.
+	 * read: vm->userptr.notifier_lock in write mode or vm->resv held.
+	 */
+	bool initial_bind;
+#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
+	u32 divisor;
+#endif
+};
+
+struct xe_vma {
+	/** @gpuva: Base GPUVA object */
+	struct drm_gpuva gpuva;
+
+	/**
+	 * @combined_links: links into lists which are mutually exclusive.
+	 * Locking: vm lock in write mode OR vm lock in read mode and the vm's
+	 * resv.
+	 */
+	union {
+		/** @rebind: link into VM if this VMA needs rebinding. */
+		struct list_head rebind;
+		/** @destroy: link to contested list when VM is being closed. */
+		struct list_head destroy;
+	} combined_links;
+
+	union {
+		/** @destroy_cb: callback to destroy VMA when unbind job is done */
+		struct dma_fence_cb destroy_cb;
+		/** @destroy_work: worker to destroy this BO */
+		struct work_struct destroy_work;
+	};
+
+	/** @usm: unified shared memory state */
+	struct {
+		/** @tile_invalidated: VMA has been invalidated */
+		u8 tile_invalidated;
+	} usm;
+
+	/** @tile_mask: Tile mask of where to create binding for this VMA */
+	u8 tile_mask;
+
+	/**
+	 * @tile_present: GT mask of binding are present for this VMA.
+	 * protected by vm->lock, vm->resv and for userptrs,
+	 * vm->userptr.notifier_lock for writing. Needs either for reading,
+	 * but if reading is done under the vm->lock only, it needs to be held
+	 * in write mode.
+	 */
+	u8 tile_present;
+
+	/**
+	 * @pat_index: The pat index to use when encoding the PTEs for this vma.
+	 */
+	u16 pat_index;
+
+	/**
+	 * @ufence: The user fence that was provided with MAP.
+	 * Needs to be signalled before UNMAP can be processed.
+	 */
+	struct xe_user_fence *ufence;
+};
+
+/**
+ * struct xe_userptr_vma - A userptr vma subclass
+ * @vma: The vma.
+ * @userptr: Additional userptr information.
+ */
+struct xe_userptr_vma {
+	struct xe_vma vma;
+	struct xe_userptr userptr;
+};
+
+struct xe_device;
+
+struct xe_vm {
+	/** @gpuvm: base GPUVM used to track VMAs */
+	struct drm_gpuvm gpuvm;
+
+	struct xe_device *xe;
+
+	/* exec queue used for (un)binding vma's */
+	struct xe_exec_queue *q[XE_MAX_TILES_PER_DEVICE];
+
+	/** @lru_bulk_move: Bulk LRU move list for this VM's BOs */
+	struct ttm_lru_bulk_move lru_bulk_move;
+
+	u64 size;
+
+	struct xe_pt *pt_root[XE_MAX_TILES_PER_DEVICE];
+	struct xe_pt *scratch_pt[XE_MAX_TILES_PER_DEVICE][XE_VM_MAX_LEVEL];
+
+	/**
+	 * @flags: flags for this VM, statically setup a creation time aside
+	 * from XE_VM_FLAG_BANNED which requires vm->lock to set / read safely
+	 */
+#define XE_VM_FLAG_64K			BIT(0)
+#define XE_VM_FLAG_LR_MODE		BIT(1)
+#define XE_VM_FLAG_MIGRATION		BIT(2)
+#define XE_VM_FLAG_SCRATCH_PAGE		BIT(3)
+#define XE_VM_FLAG_FAULT_MODE		BIT(4)
+#define XE_VM_FLAG_BANNED		BIT(5)
+#define XE_VM_FLAG_TILE_ID(flags)	FIELD_GET(GENMASK(7, 6), flags)
+#define XE_VM_FLAG_SET_TILE_ID(tile)	FIELD_PREP(GENMASK(7, 6), (tile)->id)
+	unsigned long flags;
+
+	/** @composite_fence_ctx: context composite fence */
+	u64 composite_fence_ctx;
+	/** @composite_fence_seqno: seqno for composite fence */
+	u32 composite_fence_seqno;
+
+	/**
+	 * @lock: outer most lock, protects objects of anything attached to this
+	 * VM
+	 */
+	struct rw_semaphore lock;
+
+	/**
+	 * @rebind_list: list of VMAs that need rebinding. Protected by the
+	 * vm->lock in write mode, OR (the vm->lock in read mode and the
+	 * vm resv).
+	 */
+	struct list_head rebind_list;
+
+	/** @rebind_fence: rebind fence from execbuf */
+	struct dma_fence *rebind_fence;
+
+	/**
+	 * @destroy_work: worker to destroy VM, needed as a dma_fence signaling
+	 * from an irq context can be last put and the destroy needs to be able
+	 * to sleep.
+	 */
+	struct work_struct destroy_work;
+
+	/**
+	 * @rftree: range fence tree to track updates to page table structure.
+	 * Used to implement conflict tracking between independent bind engines.
+	 */
+	struct xe_range_fence_tree rftree[XE_MAX_TILES_PER_DEVICE];
+
+	/** @async_ops: async VM operations (bind / unbinds) */
+	struct {
+		/** @list: list of pending async VM ops */
+		struct list_head pending;
+		/** @work: worker to execute async VM ops */
+		struct work_struct work;
+		/** @lock: protects list of pending async VM ops and fences */
+		spinlock_t lock;
+		/** @fence: fence state */
+		struct {
+			/** @context: context of async fence */
+			u64 context;
+			/** @seqno: seqno of async fence */
+			u32 seqno;
+		} fence;
+		/** @error: error state for async VM ops */
+		int error;
+		/**
+		 * @munmap_rebind_inflight: an munmap style VM bind is in the
+		 * middle of a set of ops which requires a rebind at the end.
+		 */
+		bool munmap_rebind_inflight;
+	} async_ops;
+
+	const struct xe_pt_ops *pt_ops;
+
+	/** @userptr: user pointer state */
+	struct {
+		/**
+		 * @userptr.repin_list: list of VMAs which are user pointers,
+		 * and needs repinning. Protected by @lock.
+		 */
+		struct list_head repin_list;
+		/**
+		 * @notifier_lock: protects notifier in write mode and
+		 * submission in read mode.
+		 */
+		struct rw_semaphore notifier_lock;
+		/**
+		 * @userptr.invalidated_lock: Protects the
+		 * @userptr.invalidated list.
+		 */
+		spinlock_t invalidated_lock;
+		/**
+		 * @userptr.invalidated: List of invalidated userptrs, not yet
+		 * picked
+		 * up for revalidation. Protected from access with the
+		 * @invalidated_lock. Removing items from the list
+		 * additionally requires @lock in write mode, and adding
+		 * items to the list requires the @userptr.notifer_lock in
+		 * write mode.
+		 */
+		struct list_head invalidated;
+	} userptr;
+
+	/** @preempt: preempt state */
+	struct {
+		/**
+		 * @min_run_period_ms: The minimum run period before preempting
+		 * an engine again
+		 */
+		s64 min_run_period_ms;
+		/** @exec_queues: list of exec queues attached to this VM */
+		struct list_head exec_queues;
+		/** @num_exec_queues: number exec queues attached to this VM */
+		int num_exec_queues;
+		/**
+		 * @rebind_deactivated: Whether rebind has been temporarily deactivated
+		 * due to no work available. Protected by the vm resv.
+		 */
+		bool rebind_deactivated;
+		/**
+		 * @rebind_work: worker to rebind invalidated userptrs / evicted
+		 * BOs
+		 */
+		struct work_struct rebind_work;
+	} preempt;
+
+	/** @um: unified memory state */
+	struct {
+		/** @asid: address space ID, unique to each VM */
+		u32 asid;
+		/**
+		 * @last_fault_vma: Last fault VMA, used for fast lookup when we
+		 * get a flood of faults to the same VMA
+		 */
+		struct xe_vma *last_fault_vma;
+	} usm;
+
+	/** @error_capture: allow to track errors */
+	struct {
+		/** @capture_once: capture only one error per VM */
+		bool capture_once;
+	} error_capture;
+
+	/** @batch_invalidate_tlb: Always invalidate TLB before batch start */
+	bool batch_invalidate_tlb;
+	/** @xef: XE file handle for tracking this VM's drm client */
+	struct xe_file *xef;
+};
+
+/** struct xe_vma_op_map - VMA map operation */
+struct xe_vma_op_map {
+	/** @vma: VMA to map */
+	struct xe_vma *vma;
+	/** @is_null: is NULL binding */
+	bool is_null;
+	/** @pat_index: The pat index to use for this operation. */
+	u16 pat_index;
+};
+
+/** struct xe_vma_op_remap - VMA remap operation */
+struct xe_vma_op_remap {
+	/** @prev: VMA preceding part of a split mapping */
+	struct xe_vma *prev;
+	/** @next: VMA subsequent part of a split mapping */
+	struct xe_vma *next;
+	/** @start: start of the VMA unmap */
+	u64 start;
+	/** @range: range of the VMA unmap */
+	u64 range;
+	/** @skip_prev: skip prev rebind */
+	bool skip_prev;
+	/** @skip_next: skip next rebind */
+	bool skip_next;
+	/** @unmap_done: unmap operation in done */
+	bool unmap_done;
+};
+
+/** struct xe_vma_op_prefetch - VMA prefetch operation */
+struct xe_vma_op_prefetch {
+	/** @region: memory region to prefetch to */
+	u32 region;
+};
+
+/** enum xe_vma_op_flags - flags for VMA operation */
+enum xe_vma_op_flags {
+	/** @XE_VMA_OP_FIRST: first VMA operation for a set of syncs */
+	XE_VMA_OP_FIRST			= BIT(0),
+	/** @XE_VMA_OP_LAST: last VMA operation for a set of syncs */
+	XE_VMA_OP_LAST			= BIT(1),
+	/** @XE_VMA_OP_COMMITTED: VMA operation committed */
+	XE_VMA_OP_COMMITTED		= BIT(2),
+	/** @XE_VMA_OP_PREV_COMMITTED: Previous VMA operation committed */
+	XE_VMA_OP_PREV_COMMITTED	= BIT(3),
+	/** @XE_VMA_OP_NEXT_COMMITTED: Next VMA operation committed */
+	XE_VMA_OP_NEXT_COMMITTED	= BIT(4),
+};
+
+/** struct xe_vma_op - VMA operation */
+struct xe_vma_op {
+	/** @base: GPUVA base operation */
+	struct drm_gpuva_op base;
+	/**
+	 * @ops: GPUVA ops, when set call drm_gpuva_ops_free after this
+	 * operations is processed
+	 */
+	struct drm_gpuva_ops *ops;
+	/** @q: exec queue for this operation */
+	struct xe_exec_queue *q;
+	/**
+	 * @syncs: syncs for this operation, only used on first and last
+	 * operation
+	 */
+	struct xe_sync_entry *syncs;
+	/** @num_syncs: number of syncs */
+	u32 num_syncs;
+	/** @link: async operation link */
+	struct list_head link;
+	/** @flags: operation flags */
+	enum xe_vma_op_flags flags;
+
+	union {
+		/** @map: VMA map operation specific data */
+		struct xe_vma_op_map map;
+		/** @remap: VMA remap operation specific data */
+		struct xe_vma_op_remap remap;
+		/** @prefetch: VMA prefetch operation specific data */
+		struct xe_vma_op_prefetch prefetch;
+	};
+};
+#endif
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
new file mode 100644
index 000000000000..5f61dd87c586
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -0,0 +1,895 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_wa.h"
+
+#include <drm/drm_managed.h>
+#include <kunit/visibility.h>
+#include <linux/compiler_types.h>
+
+#include "generated/xe_wa_oob.h"
+#include "regs/xe_engine_regs.h"
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_regs.h"
+#include "xe_device_types.h"
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_hw_engine_types.h"
+#include "xe_mmio.h"
+#include "xe_platform_types.h"
+#include "xe_rtp.h"
+#include "xe_step.h"
+
+/**
+ * DOC: Hardware workarounds
+ *
+ * Hardware workarounds are register programming documented to be executed in
+ * the driver that fall outside of the normal programming sequences for a
+ * platform. There are some basic categories of workarounds, depending on
+ * how/when they are applied:
+ *
+ * - LRC workarounds: workarounds that touch registers that are
+ *   saved/restored to/from the HW context image. The list is emitted (via Load
+ *   Register Immediate commands) once when initializing the device and saved in
+ *   the default context. That default context is then used on every context
+ *   creation to have a "primed golden context", i.e. a context image that
+ *   already contains the changes needed to all the registers.
+ *
+ * - Engine workarounds: the list of these WAs is applied whenever the specific
+ *   engine is reset. It's also possible that a set of engine classes share a
+ *   common power domain and they are reset together. This happens on some
+ *   platforms with render and compute engines. In this case (at least) one of
+ *   them need to keeep the workaround programming: the approach taken in the
+ *   driver is to tie those workarounds to the first compute/render engine that
+ *   is registered.  When executing with GuC submission, engine resets are
+ *   outside of kernel driver control, hence the list of registers involved in
+ *   written once, on engine initialization, and then passed to GuC, that
+ *   saves/restores their values before/after the reset takes place. See
+ *   ``drivers/gpu/drm/xe/xe_guc_ads.c`` for reference.
+ *
+ * - GT workarounds: the list of these WAs is applied whenever these registers
+ *   revert to their default values: on GPU reset, suspend/resume [1]_, etc.
+ *
+ * - Register whitelist: some workarounds need to be implemented in userspace,
+ *   but need to touch privileged registers. The whitelist in the kernel
+ *   instructs the hardware to allow the access to happen. From the kernel side,
+ *   this is just a special case of a MMIO workaround (as we write the list of
+ *   these to/be-whitelisted registers to some special HW registers).
+ *
+ * - Workaround batchbuffers: buffers that get executed automatically by the
+ *   hardware on every HW context restore. These buffers are created and
+ *   programmed in the default context so the hardware always go through those
+ *   programming sequences when switching contexts. The support for workaround
+ *   batchbuffers is enabled these hardware mechanisms:
+ *
+ *   #. INDIRECT_CTX: A batchbuffer and an offset are provided in the default
+ *      context, pointing the hardware to jump to that location when that offset
+ *      is reached in the context restore. Workaround batchbuffer in the driver
+ *      currently uses this mechanism for all platforms.
+ *
+ *   #. BB_PER_CTX_PTR: A batchbuffer is provided in the default context,
+ *      pointing the hardware to a buffer to continue executing after the
+ *      engine registers are restored in a context restore sequence. This is
+ *      currently not used in the driver.
+ *
+ * - Other/OOB:  There are WAs that, due to their nature, cannot be applied from
+ *   a central place. Those are peppered around the rest of the code, as needed.
+ *   Workarounds related to the display IP are the main example.
+ *
+ * .. [1] Technically, some registers are powercontext saved & restored, so they
+ *    survive a suspend/resume. In practice, writing them again is not too
+ *    costly and simplifies things, so it's the approach taken in the driver.
+ *
+ * .. note::
+ *    Hardware workarounds in xe work the same way as in i915, with the
+ *    difference of how they are maintained in the code. In xe it uses the
+ *    xe_rtp infrastructure so the workarounds can be kept in tables, following
+ *    a more declarative approach rather than procedural.
+ */
+
+#undef XE_REG_MCR
+#define XE_REG_MCR(...)     XE_REG(__VA_ARGS__, .mcr = 1)
+
+__diag_push();
+__diag_ignore_all("-Woverride-init", "Allow field overrides in table");
+
+static const struct xe_rtp_entry_sr gt_was[] = {
+	{ XE_RTP_NAME("14011060649"),
+	  XE_RTP_RULES(MEDIA_VERSION_RANGE(1200, 1255),
+		       ENGINE_CLASS(VIDEO_DECODE),
+		       FUNC(xe_rtp_match_even_instance)),
+	  XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)),
+	  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
+	},
+	{ XE_RTP_NAME("14011059788"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
+	  XE_RTP_ACTIONS(SET(DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE))
+	},
+	{ XE_RTP_NAME("14015795083"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1260)),
+	  XE_RTP_ACTIONS(CLR(MISCCPCTL, DOP_CLOCK_GATE_RENDER_ENABLE))
+	},
+
+	/* DG1 */
+
+	{ XE_RTP_NAME("1409420604"),
+	  XE_RTP_RULES(PLATFORM(DG1)),
+	  XE_RTP_ACTIONS(SET(SUBSLICE_UNIT_LEVEL_CLKGATE2, CPSSUNIT_CLKGATE_DIS))
+	},
+	{ XE_RTP_NAME("1408615072"),
+	  XE_RTP_RULES(PLATFORM(DG1)),
+	  XE_RTP_ACTIONS(SET(UNSLICE_UNIT_LEVEL_CLKGATE2, VSUNIT_CLKGATE2_DIS))
+	},
+
+	/* DG2 */
+
+	{ XE_RTP_NAME("16010515920"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10),
+		       GRAPHICS_STEP(A0, B0),
+		       ENGINE_CLASS(VIDEO_DECODE)),
+	  XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F18(0), ALNUNIT_CLKGATE_DIS)),
+	  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
+	},
+	{ XE_RTP_NAME("22010523718"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10)),
+	  XE_RTP_ACTIONS(SET(UNSLICE_UNIT_LEVEL_CLKGATE, CG3DDISCFEG_CLKGATE_DIS))
+	},
+	{ XE_RTP_NAME("14011006942"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10)),
+	  XE_RTP_ACTIONS(SET(SUBSLICE_UNIT_LEVEL_CLKGATE, DSS_ROUTER_CLKGATE_DIS))
+	},
+	{ XE_RTP_NAME("14012362059"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
+	  XE_RTP_ACTIONS(SET(XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB))
+	},
+	{ XE_RTP_NAME("14012362059"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0)),
+	  XE_RTP_ACTIONS(SET(XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB))
+	},
+	{ XE_RTP_NAME("14010948348"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
+	  XE_RTP_ACTIONS(SET(UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS))
+	},
+	{ XE_RTP_NAME("14011037102"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
+	  XE_RTP_ACTIONS(SET(UNSLCGCTL9444, LTCDD_CLKGATE_DIS))
+	},
+	{ XE_RTP_NAME("14011371254"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
+	  XE_RTP_ACTIONS(SET(XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS))
+	},
+	{ XE_RTP_NAME("14011431319"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
+	  XE_RTP_ACTIONS(SET(UNSLCGCTL9440,
+			     GAMTLBOACS_CLKGATE_DIS |
+			     GAMTLBVDBOX7_CLKGATE_DIS | GAMTLBVDBOX6_CLKGATE_DIS |
+			     GAMTLBVDBOX5_CLKGATE_DIS | GAMTLBVDBOX4_CLKGATE_DIS |
+			     GAMTLBVDBOX3_CLKGATE_DIS | GAMTLBVDBOX2_CLKGATE_DIS |
+			     GAMTLBVDBOX1_CLKGATE_DIS | GAMTLBVDBOX0_CLKGATE_DIS |
+			     GAMTLBKCR_CLKGATE_DIS | GAMTLBGUC_CLKGATE_DIS |
+			     GAMTLBBLT_CLKGATE_DIS),
+			 SET(UNSLCGCTL9444,
+			     GAMTLBGFXA0_CLKGATE_DIS | GAMTLBGFXA1_CLKGATE_DIS |
+			     GAMTLBCOMPA0_CLKGATE_DIS | GAMTLBCOMPA1_CLKGATE_DIS |
+			     GAMTLBCOMPB0_CLKGATE_DIS | GAMTLBCOMPB1_CLKGATE_DIS |
+			     GAMTLBCOMPC0_CLKGATE_DIS | GAMTLBCOMPC1_CLKGATE_DIS |
+			     GAMTLBCOMPD0_CLKGATE_DIS | GAMTLBCOMPD1_CLKGATE_DIS |
+			     GAMTLBMERT_CLKGATE_DIS |
+			     GAMTLBVEBOX3_CLKGATE_DIS | GAMTLBVEBOX2_CLKGATE_DIS |
+			     GAMTLBVEBOX1_CLKGATE_DIS | GAMTLBVEBOX0_CLKGATE_DIS))
+	},
+	{ XE_RTP_NAME("14010569222"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
+	  XE_RTP_ACTIONS(SET(UNSLICE_UNIT_LEVEL_CLKGATE, GAMEDIA_CLKGATE_DIS))
+	},
+	{ XE_RTP_NAME("14011028019"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
+	  XE_RTP_ACTIONS(SET(SSMCGCTL9530, RTFUNIT_CLKGATE_DIS))
+	},
+	{ XE_RTP_NAME("14010680813"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
+	  XE_RTP_ACTIONS(SET(XEHP_GAMSTLB_CTRL,
+			     CONTROL_BLOCK_CLKGATE_DIS |
+			     EGRESS_BLOCK_CLKGATE_DIS |
+			     TAG_BLOCK_CLKGATE_DIS))
+	},
+	{ XE_RTP_NAME("14014830051"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_ACTIONS(CLR(SARB_CHICKEN1, COMP_CKN_IN))
+	},
+	{ XE_RTP_NAME("18018781329"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_ACTIONS(SET(RENDER_MOD_CTRL, FORCE_MISS_FTLB),
+			 SET(COMP_MOD_CTRL, FORCE_MISS_FTLB),
+			 SET(XEHP_VDBX_MOD_CTRL, FORCE_MISS_FTLB),
+			 SET(XEHP_VEBX_MOD_CTRL, FORCE_MISS_FTLB))
+	},
+	{ XE_RTP_NAME("1509235366"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_ACTIONS(SET(XEHP_GAMCNTRL_CTRL,
+			     INVALIDATION_BROADCAST_MODE_DIS |
+			     GLOBAL_INVALIDATION_MODE))
+	},
+	{ XE_RTP_NAME("14010648519"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_ACTIONS(SET(XEHP_L3NODEARBCFG, XEHP_LNESPARE))
+	},
+
+	/* PVC */
+
+	{ XE_RTP_NAME("18018781329"),
+	  XE_RTP_RULES(PLATFORM(PVC)),
+	  XE_RTP_ACTIONS(SET(RENDER_MOD_CTRL, FORCE_MISS_FTLB),
+			 SET(COMP_MOD_CTRL, FORCE_MISS_FTLB),
+			 SET(XEHP_VDBX_MOD_CTRL, FORCE_MISS_FTLB),
+			 SET(XEHP_VEBX_MOD_CTRL, FORCE_MISS_FTLB))
+	},
+	{ XE_RTP_NAME("16016694945"),
+	  XE_RTP_RULES(PLATFORM(PVC)),
+	  XE_RTP_ACTIONS(SET(XEHPC_LNCFMISCCFGREG0, XEHPC_OVRLSCCC))
+	},
+
+	/* Xe_LPG */
+
+	{ XE_RTP_NAME("14015795083"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271), GRAPHICS_STEP(A0, B0)),
+	  XE_RTP_ACTIONS(CLR(MISCCPCTL, DOP_CLOCK_GATE_RENDER_ENABLE))
+	},
+	{ XE_RTP_NAME("14018575942"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)),
+	  XE_RTP_ACTIONS(SET(COMP_MOD_CTRL, FORCE_MISS_FTLB))
+	},
+	{ XE_RTP_NAME("22016670082"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)),
+	  XE_RTP_ACTIONS(SET(SQCNT1, ENFORCE_RAR))
+	},
+
+	/* Xe_LPM+ */
+
+	{ XE_RTP_NAME("16021867713"),
+	  XE_RTP_RULES(MEDIA_VERSION(1300),
+		       ENGINE_CLASS(VIDEO_DECODE)),
+	  XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)),
+	  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
+	},
+	{ XE_RTP_NAME("22016670082"),
+	  XE_RTP_RULES(MEDIA_VERSION(1300)),
+	  XE_RTP_ACTIONS(SET(XELPMP_SQCNT1, ENFORCE_RAR))
+	},
+
+	/* Xe2_LPG */
+
+	{ XE_RTP_NAME("16020975621"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)),
+	  XE_RTP_ACTIONS(SET(XEHP_SLICE_UNIT_LEVEL_CLKGATE, SBEUNIT_CLKGATE_DIS))
+	},
+	{ XE_RTP_NAME("14018157293"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)),
+	  XE_RTP_ACTIONS(SET(XEHPC_L3CLOS_MASK(0), ~0),
+			 SET(XEHPC_L3CLOS_MASK(1), ~0),
+			 SET(XEHPC_L3CLOS_MASK(2), ~0),
+			 SET(XEHPC_L3CLOS_MASK(3), ~0))
+	},
+
+	/* Xe2_LPM */
+
+	{ XE_RTP_NAME("14017421178"),
+	  XE_RTP_RULES(MEDIA_VERSION(2000),
+		       ENGINE_CLASS(VIDEO_DECODE)),
+	  XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)),
+	  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
+	},
+	{ XE_RTP_NAME("16021867713"),
+	  XE_RTP_RULES(MEDIA_VERSION(2000),
+		       ENGINE_CLASS(VIDEO_DECODE)),
+	  XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)),
+	  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
+	},
+	{ XE_RTP_NAME("14019449301"),
+	  XE_RTP_RULES(MEDIA_VERSION(2000), ENGINE_CLASS(VIDEO_DECODE)),
+	  XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F08(0), CG3DDISHRS_CLKGATE_DIS)),
+	  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
+	},
+
+	{}
+};
+
+static const struct xe_rtp_entry_sr engine_was[] = {
+	{ XE_RTP_NAME("22010931296, 18011464164, 14010919138"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(1200), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(FF_THREAD_MODE(RENDER_RING_BASE),
+			     FF_TESSELATION_DOP_GATE_DISABLE))
+	},
+	{ XE_RTP_NAME("1409804808"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(1200),
+		       ENGINE_CLASS(RENDER),
+		       IS_INTEGRATED),
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN2, PUSH_CONST_DEREF_HOLD_DIS))
+	},
+	{ XE_RTP_NAME("14010229206, 1409085225"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(1200),
+		       ENGINE_CLASS(RENDER),
+		       IS_INTEGRATED),
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH))
+	},
+	{ XE_RTP_NAME("1606931601"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN2, DISABLE_EARLY_READ))
+	},
+	{ XE_RTP_NAME("14010826681, 1606700617, 22010271021, 18019627453"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1255), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(CS_DEBUG_MODE1(RENDER_RING_BASE),
+			     FF_DOP_CLOCK_GATE_DISABLE))
+	},
+	{ XE_RTP_NAME("1406941453"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(SAMPLER_MODE, ENABLE_SMALLPL))
+	},
+	{ XE_RTP_NAME("FtrPerCtxtPreemptionGranularityControl"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1250), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN1(RENDER_RING_BASE),
+			     FFSC_PERCTX_PREEMPT_CTRL))
+	},
+
+	/* TGL */
+
+	{ XE_RTP_NAME("1607297627, 1607030317, 1607186500"),
+	  XE_RTP_RULES(PLATFORM(TIGERLAKE), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(RING_PSMI_CTL(RENDER_RING_BASE),
+			     WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
+			     RC_SEMA_IDLE_MSG_DISABLE))
+	},
+
+	/* RKL */
+
+	{ XE_RTP_NAME("1607297627, 1607030317, 1607186500"),
+	  XE_RTP_RULES(PLATFORM(ROCKETLAKE), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(RING_PSMI_CTL(RENDER_RING_BASE),
+			     WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
+			     RC_SEMA_IDLE_MSG_DISABLE))
+	},
+
+	/* ADL-P */
+
+	{ XE_RTP_NAME("1607297627, 1607030317, 1607186500"),
+	  XE_RTP_RULES(PLATFORM(ALDERLAKE_P), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(RING_PSMI_CTL(RENDER_RING_BASE),
+			     WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
+			     RC_SEMA_IDLE_MSG_DISABLE))
+	},
+
+	/* DG2 */
+
+	{ XE_RTP_NAME("22013037850"),
+	  XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW,
+			     DISABLE_128B_EVICTION_COMMAND_UDW))
+	},
+	{ XE_RTP_NAME("22014226127"),
+	  XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE))
+	},
+	{ XE_RTP_NAME("18017747507"),
+	  XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(VFG_PREEMPTION_CHICKEN,
+			     POLYGON_TRIFAN_LINELOOP_DISABLE))
+	},
+	{ XE_RTP_NAME("22012826095, 22013059131"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(FIELD_SET(LSC_CHICKEN_BIT_0_UDW,
+				   MAXREQS_PER_BANK,
+				   REG_FIELD_PREP(MAXREQS_PER_BANK, 2)))
+	},
+	{ XE_RTP_NAME("22012826095, 22013059131"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G11),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(FIELD_SET(LSC_CHICKEN_BIT_0_UDW,
+				   MAXREQS_PER_BANK,
+				   REG_FIELD_PREP(MAXREQS_PER_BANK, 2)))
+	},
+	{ XE_RTP_NAME("22013059131"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, FORCE_1_SUB_MESSAGE_PER_FRAGMENT))
+	},
+	{ XE_RTP_NAME("22013059131"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G11),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, FORCE_1_SUB_MESSAGE_PER_FRAGMENT))
+	},
+	{ XE_RTP_NAME("14010918519"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW,
+			     FORCE_SLM_FENCE_SCOPE_TO_TILE |
+			     FORCE_UGM_FENCE_SCOPE_TO_TILE,
+			     /*
+			      * Ignore read back as it always returns 0 in these
+			      * steps
+			      */
+			     .read_mask = 0))
+	},
+	{ XE_RTP_NAME("14015227452"),
+	  XE_RTP_RULES(PLATFORM(DG2),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE))
+	},
+	{ XE_RTP_NAME("16015675438"),
+	  XE_RTP_RULES(PLATFORM(DG2),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN2(RENDER_RING_BASE),
+			     PERF_FIX_BALANCING_CFE_DISABLE))
+	},
+	{ XE_RTP_NAME("18028616096"),
+	  XE_RTP_RULES(PLATFORM(DG2),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3))
+	},
+	{ XE_RTP_NAME("16011620976, 22015475538"),
+	  XE_RTP_RULES(PLATFORM(DG2),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8))
+	},
+	{ XE_RTP_NAME("22012654132"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(CACHE_MODE_SS, ENABLE_PREFETCH_INTO_IC,
+			     /*
+			      * Register can't be read back for verification on
+			      * DG2 due to Wa_14012342262
+			      */
+			     .read_mask = 0))
+	},
+	{ XE_RTP_NAME("22012654132"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G11),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(CACHE_MODE_SS, ENABLE_PREFETCH_INTO_IC,
+			     /*
+			      * Register can't be read back for verification on
+			      * DG2 due to Wa_14012342262
+			      */
+			     .read_mask = 0))
+	},
+	{ XE_RTP_NAME("1509727124"),
+	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(SAMPLER_MODE, SC_DISABLE_POWER_OPTIMIZATION_EBB))
+	},
+	{ XE_RTP_NAME("22012856258"),
+	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN2, DISABLE_READ_SUPPRESSION))
+	},
+	{ XE_RTP_NAME("14013392000"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0),
+		       ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN2, ENABLE_LARGE_GRF_MODE))
+	},
+	{ XE_RTP_NAME("14012419201"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0),
+		       ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4,
+			     DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX))
+	},
+	{ XE_RTP_NAME("14012419201"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0),
+		       ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4,
+			     DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX))
+	},
+	{ XE_RTP_NAME("1308578152"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0),
+		       ENGINE_CLASS(RENDER),
+		       FUNC(xe_rtp_match_first_gslice_fused_off)),
+	  XE_RTP_ACTIONS(CLR(CS_DEBUG_MODE1(RENDER_RING_BASE),
+			     REPLAY_MODE_GRANULARITY))
+	},
+	{ XE_RTP_NAME("22010960976, 14013347512"),
+	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(CLR(XEHP_HDC_CHICKEN0,
+			     LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK))
+	},
+	{ XE_RTP_NAME("1608949956, 14010198302"),
+	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN,
+			     MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE))
+	},
+	{ XE_RTP_NAME("22010430635"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0),
+		       ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4,
+			     DISABLE_GRF_CLEAR))
+	},
+	{ XE_RTP_NAME("14013202645"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0),
+		       ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY))
+	},
+	{ XE_RTP_NAME("14013202645"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0),
+		       ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY))
+	},
+	{ XE_RTP_NAME("22012532006"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0),
+		       ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7,
+			     DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA))
+	},
+	{ XE_RTP_NAME("22012532006"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0),
+		       ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7,
+			     DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA))
+	},
+	{ XE_RTP_NAME("14015150844"),
+	  XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(XEHP_HDC_CHICKEN0, DIS_ATOMIC_CHAINING_TYPED_WRITES,
+			     XE_RTP_NOCHECK))
+	},
+
+	/* PVC */
+
+	{ XE_RTP_NAME("22014226127"),
+	  XE_RTP_RULES(PLATFORM(PVC), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE))
+	},
+	{ XE_RTP_NAME("14015227452"),
+	  XE_RTP_RULES(PLATFORM(PVC), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE))
+	},
+	{ XE_RTP_NAME("16015675438"),
+	  XE_RTP_RULES(PLATFORM(PVC), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN2(RENDER_RING_BASE),
+			     PERF_FIX_BALANCING_CFE_DISABLE))
+	},
+	{ XE_RTP_NAME("14014999345"),
+	  XE_RTP_RULES(PLATFORM(PVC), ENGINE_CLASS(COMPUTE),
+		       GRAPHICS_STEP(B0, C0)),
+	  XE_RTP_ACTIONS(SET(CACHE_MODE_SS, DISABLE_ECC))
+	},
+
+	/* Xe_LPG */
+
+	{ XE_RTP_NAME("14017856879"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN3, DIS_FIX_EOT1_FLUSH))
+	},
+	{ XE_RTP_NAME("14015150844"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(XEHP_HDC_CHICKEN0, DIS_ATOMIC_CHAINING_TYPED_WRITES,
+			     XE_RTP_NOCHECK))
+	},
+
+	/* Xe2_LPG */
+
+	{ XE_RTP_NAME("18032247524"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, SEQUENTIAL_ACCESS_UPGRADE_DISABLE))
+	},
+	{ XE_RTP_NAME("16018712365"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, XE2_ALLOC_DPA_STARVE_FIX_DIS))
+	},
+	{ XE_RTP_NAME("14018957109"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN5, DISABLE_SAMPLE_G_PERFORMANCE))
+	},
+	{ XE_RTP_NAME("16021540221"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH))
+	},
+	{ XE_RTP_NAME("14019322943"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, TGM_WRITE_EOM_FORCE))
+	},
+	{ XE_RTP_NAME("14018471104"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, ENABLE_SMP_LD_RENDER_SURFACE_CONTROL))
+	},
+	{ XE_RTP_NAME("16018737384"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN, EARLY_EOT_DIS))
+	},
+	/*
+	 * These two workarounds are the same, just applying to different
+	 * engines.  Although Wa_18032095049 (for the RCS) isn't required on
+	 * all steppings, disabling these reports has no impact for our
+	 * driver or the GuC, so we go ahead and treat it the same as
+	 * Wa_16021639441 which does apply to all steppings.
+	 */
+	{ XE_RTP_NAME("18032095049, 16021639441"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004)),
+	  XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0),
+			     GHWSP_CSB_REPORT_DIS |
+			     PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS,
+			     XE_RTP_ACTION_FLAG(ENGINE_BASE)))
+	},
+
+	{}
+};
+
+static const struct xe_rtp_entry_sr lrc_was[] = {
+	{ XE_RTP_NAME("1409342910, 14010698770, 14010443199, 1408979724, 1409178076, 1409207793, 1409217633, 1409252684, 1409347922, 1409142259"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
+	  XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN3,
+			     DISABLE_CPS_AWARE_COLOR_PIPE))
+	},
+	{ XE_RTP_NAME("WaDisableGPGPUMidThreadPreemption"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
+	  XE_RTP_ACTIONS(FIELD_SET(CS_CHICKEN1(RENDER_RING_BASE),
+				   PREEMPT_GPGPU_LEVEL_MASK,
+				   PREEMPT_GPGPU_THREAD_GROUP_LEVEL))
+	},
+	{ XE_RTP_NAME("1806527549"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(1200)),
+	  XE_RTP_ACTIONS(SET(HIZ_CHICKEN, HZ_DEPTH_TEST_LE_GE_OPT_DISABLE))
+	},
+	{ XE_RTP_NAME("1606376872"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(1200)),
+	  XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, DISABLE_TDC_LOAD_BALANCING_CALC))
+	},
+
+	/* DG1 */
+
+	{ XE_RTP_NAME("1409044764"),
+	  XE_RTP_RULES(PLATFORM(DG1)),
+	  XE_RTP_ACTIONS(CLR(COMMON_SLICE_CHICKEN3,
+			     DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN))
+	},
+	{ XE_RTP_NAME("22010493298"),
+	  XE_RTP_RULES(PLATFORM(DG1)),
+	  XE_RTP_ACTIONS(SET(HIZ_CHICKEN,
+			     DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE))
+	},
+
+	/* DG2 */
+
+	{ XE_RTP_NAME("16011186671"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0)),
+	  XE_RTP_ACTIONS(CLR(VFLSKPD, DIS_MULT_MISS_RD_SQUASH),
+			 SET(VFLSKPD, DIS_OVER_FETCH_CACHE))
+	},
+	{ XE_RTP_NAME("14010469329"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
+	  XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN3,
+			     XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE))
+	},
+	{ XE_RTP_NAME("14010698770, 22010613112, 22010465075"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
+	  XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN3,
+			     DISABLE_CPS_AWARE_COLOR_PIPE))
+	},
+	{ XE_RTP_NAME("16013271637"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_ACTIONS(SET(XEHP_SLICE_COMMON_ECO_CHICKEN1,
+			     MSC_MSAA_REODER_BUF_BYPASS_DISABLE))
+	},
+	{ XE_RTP_NAME("14014947963"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_ACTIONS(FIELD_SET(VF_PREEMPTION,
+				   PREEMPTION_VERTEX_COUNT,
+				   0x4000))
+	},
+	{ XE_RTP_NAME("18018764978"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_ACTIONS(SET(XEHP_PSS_MODE2,
+			     SCOREBOARD_STALL_FLUSH_CONTROL))
+	},
+	{ XE_RTP_NAME("18019271663"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE))
+	},
+	{ XE_RTP_NAME("14019877138"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT))
+	},
+
+	/* PVC */
+
+	{ XE_RTP_NAME("16017236439"),
+	  XE_RTP_RULES(PLATFORM(PVC), ENGINE_CLASS(COPY),
+		       FUNC(xe_rtp_match_even_instance)),
+	  XE_RTP_ACTIONS(SET(BCS_SWCTRL(0),
+			     BCS_SWCTRL_DISABLE_256B,
+			     XE_RTP_ACTION_FLAG(ENGINE_BASE))),
+	},
+
+	/* Xe_LPG */
+
+	{ XE_RTP_NAME("18019271663"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)),
+	  XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE))
+	},
+
+	/* Xe2_LPG */
+
+	{ XE_RTP_NAME("16020518922"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
+		       ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(FF_MODE,
+			     DIS_TE_AUTOSTRIP |
+			     DIS_MESH_PARTIAL_AUTOSTRIP |
+			     DIS_MESH_AUTOSTRIP),
+			 SET(VFLSKPD,
+			     DIS_PARTIAL_AUTOSTRIP |
+			     DIS_AUTOSTRIP))
+	},
+	{ XE_RTP_NAME("14019386621"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(VF_SCRATCHPAD, XE2_VFG_TED_CREDIT_INTERFACE_DISABLE))
+	},
+	{ XE_RTP_NAME("14019877138"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT))
+	},
+	{ XE_RTP_NAME("14020013138"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
+		       ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS))
+	},
+	{ XE_RTP_NAME("14019988906"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD))
+	},
+
+	{}
+};
+
+static __maybe_unused const struct xe_rtp_entry oob_was[] = {
+#include <generated/xe_wa_oob.c>
+	{}
+};
+
+static_assert(ARRAY_SIZE(oob_was) - 1 == _XE_WA_OOB_COUNT);
+
+__diag_pop();
+
+/**
+ * xe_wa_process_oob - process OOB workaround table
+ * @gt: GT instance to process workarounds for
+ *
+ * Process OOB workaround table for this platform, marking in @gt the
+ * workarounds that are active.
+ */
+void xe_wa_process_oob(struct xe_gt *gt)
+{
+	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt);
+
+	xe_rtp_process_ctx_enable_active_tracking(&ctx, gt->wa_active.oob,
+						  ARRAY_SIZE(oob_was));
+	xe_rtp_process(&ctx, oob_was);
+}
+
+/**
+ * xe_wa_process_gt - process GT workaround table
+ * @gt: GT instance to process workarounds for
+ *
+ * Process GT workaround table for this platform, saving in @gt all the
+ * workarounds that need to be applied at the GT level.
+ */
+void xe_wa_process_gt(struct xe_gt *gt)
+{
+	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt);
+
+	xe_rtp_process_ctx_enable_active_tracking(&ctx, gt->wa_active.gt,
+						  ARRAY_SIZE(gt_was));
+	xe_rtp_process_to_sr(&ctx, gt_was, &gt->reg_sr);
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_wa_process_gt);
+
+/**
+ * xe_wa_process_engine - process engine workaround table
+ * @hwe: engine instance to process workarounds for
+ *
+ * Process engine workaround table for this platform, saving in @hwe all the
+ * workarounds that need to be applied at the engine level that match this
+ * engine.
+ */
+void xe_wa_process_engine(struct xe_hw_engine *hwe)
+{
+	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
+
+	xe_rtp_process_ctx_enable_active_tracking(&ctx, hwe->gt->wa_active.engine,
+						  ARRAY_SIZE(engine_was));
+	xe_rtp_process_to_sr(&ctx, engine_was, &hwe->reg_sr);
+}
+
+/**
+ * xe_wa_process_lrc - process context workaround table
+ * @hwe: engine instance to process workarounds for
+ *
+ * Process context workaround table for this platform, saving in @hwe all the
+ * workarounds that need to be applied on context restore. These are workarounds
+ * touching registers that are part of the HW context image.
+ */
+void xe_wa_process_lrc(struct xe_hw_engine *hwe)
+{
+	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
+
+	xe_rtp_process_ctx_enable_active_tracking(&ctx, hwe->gt->wa_active.lrc,
+						  ARRAY_SIZE(lrc_was));
+	xe_rtp_process_to_sr(&ctx, lrc_was, &hwe->reg_lrc);
+}
+
+/**
+ * xe_wa_init - initialize gt with workaround bookkeeping
+ * @gt: GT instance to initialize
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
+int xe_wa_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	size_t n_oob, n_lrc, n_engine, n_gt, total;
+	unsigned long *p;
+
+	n_gt = BITS_TO_LONGS(ARRAY_SIZE(gt_was));
+	n_engine = BITS_TO_LONGS(ARRAY_SIZE(engine_was));
+	n_lrc = BITS_TO_LONGS(ARRAY_SIZE(lrc_was));
+	n_oob = BITS_TO_LONGS(ARRAY_SIZE(oob_was));
+	total = n_gt + n_engine + n_lrc + n_oob;
+
+	p = drmm_kzalloc(&xe->drm, sizeof(*p) * total, GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	gt->wa_active.gt = p;
+	p += n_gt;
+	gt->wa_active.engine = p;
+	p += n_engine;
+	gt->wa_active.lrc = p;
+	p += n_lrc;
+	gt->wa_active.oob = p;
+
+	return 0;
+}
+
+void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p)
+{
+	size_t idx;
+
+	drm_printf(p, "GT Workarounds\n");
+	for_each_set_bit(idx, gt->wa_active.gt, ARRAY_SIZE(gt_was))
+		drm_printf_indent(p, 1, "%s\n", gt_was[idx].name);
+
+	drm_printf(p, "\nEngine Workarounds\n");
+	for_each_set_bit(idx, gt->wa_active.engine, ARRAY_SIZE(engine_was))
+		drm_printf_indent(p, 1, "%s\n", engine_was[idx].name);
+
+	drm_printf(p, "\nLRC Workarounds\n");
+	for_each_set_bit(idx, gt->wa_active.lrc, ARRAY_SIZE(lrc_was))
+		drm_printf_indent(p, 1, "%s\n", lrc_was[idx].name);
+
+	drm_printf(p, "\nOOB Workarounds\n");
+	for_each_set_bit(idx, gt->wa_active.oob, ARRAY_SIZE(oob_was))
+		if (oob_was[idx].name)
+			drm_printf_indent(p, 1, "%s\n", oob_was[idx].name);
+}
+
+/*
+ * Apply tile (non-GT, non-display) workarounds.  Think very carefully before
+ * adding anything to this function; most workarounds should be implemented
+ * elsewhere.  The programming here is primarily for sgunit/soc workarounds,
+ * which are relatively rare.  Since the registers these workarounds target are
+ * outside the GT, they should only need to be applied once at device
+ * probe/resume; they will not lose their values on any kind of GT or engine
+ * reset.
+ *
+ * TODO:  We may want to move this over to xe_rtp in the future once we have
+ * enough workarounds to justify the work.
+ */
+void xe_wa_apply_tile_workarounds(struct xe_tile *tile)
+{
+	struct xe_gt *mmio = tile->primary_gt;
+
+	if (XE_WA(mmio, 22010954014))
+		xe_mmio_rmw32(mmio, XEHP_CLOCK_GATE_DIS, 0, SGSI_SIDECLK_DIS);
+}
diff --git a/drivers/gpu/drm/xe/xe_wa.h b/drivers/gpu/drm/xe/xe_wa.h
new file mode 100644
index 000000000000..1b24d66f9d80
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_wa.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_WA_
+#define _XE_WA_
+
+struct drm_printer;
+struct xe_gt;
+struct xe_hw_engine;
+struct xe_tile;
+
+int xe_wa_init(struct xe_gt *gt);
+void xe_wa_process_oob(struct xe_gt *gt);
+void xe_wa_process_gt(struct xe_gt *gt);
+void xe_wa_process_engine(struct xe_hw_engine *hwe);
+void xe_wa_process_lrc(struct xe_hw_engine *hwe);
+void xe_wa_apply_tile_workarounds(struct xe_tile *tile);
+
+void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe);
+void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p);
+
+/**
+ * XE_WA - Out-of-band workarounds, that don't fit the lifecycle any
+ *         other more specific type
+ * @gt__: gt instance
+ * @id__: XE_OOB_<id__>, as generated by build system in generated/xe_wa_oob.h
+ */
+#define XE_WA(gt__, id__) test_bit(XE_WA_OOB_ ## id__, (gt__)->wa_active.oob)
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
new file mode 100644
index 000000000000..727bdc429212
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -0,0 +1,24 @@
+22012773006	GRAPHICS_VERSION_RANGE(1200, 1250)
+16011759253	SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)
+14014475959	GRAPHICS_VERSION_RANGE(1270, 1271), GRAPHICS_STEP(A0, B0)
+		PLATFORM(DG2)
+22011391025	PLATFORM(DG2)
+14012197797	PLATFORM(DG2), GRAPHICS_STEP(A0, B0)
+16011777198	SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0)
+		SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0)
+22012727170	SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0)
+		SUBPLATFORM(DG2, G11)
+22012727685	SUBPLATFORM(DG2, G11)
+16015675438	PLATFORM(PVC)
+		SUBPLATFORM(DG2, G10)
+		SUBPLATFORM(DG2, G12)
+18020744125	PLATFORM(PVC)
+1509372804	PLATFORM(PVC), GRAPHICS_STEP(A0, C0)
+1409600907	GRAPHICS_VERSION_RANGE(1200, 1250)
+14016763929	SUBPLATFORM(DG2, G10)
+		SUBPLATFORM(DG2, G12)
+16017236439	PLATFORM(PVC)
+22010954014	PLATFORM(DG2)
+14019821291	MEDIA_VERSION_RANGE(1300, 2000)
+14015076503	MEDIA_VERSION(1300)
+16020292621	GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)
diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c
new file mode 100644
index 000000000000..a75eeba7bfe5
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_wait_user_fence.h"
+
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/drm_utils.h>
+#include <drm/xe_drm.h>
+
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_macros.h"
+#include "xe_exec_queue.h"
+
+static int do_compare(u64 addr, u64 value, u64 mask, u16 op)
+{
+	u64 rvalue;
+	int err;
+	bool passed;
+
+	err = copy_from_user(&rvalue, u64_to_user_ptr(addr), sizeof(rvalue));
+	if (err)
+		return -EFAULT;
+
+	switch (op) {
+	case DRM_XE_UFENCE_WAIT_OP_EQ:
+		passed = (rvalue & mask) == (value & mask);
+		break;
+	case DRM_XE_UFENCE_WAIT_OP_NEQ:
+		passed = (rvalue & mask) != (value & mask);
+		break;
+	case DRM_XE_UFENCE_WAIT_OP_GT:
+		passed = (rvalue & mask) > (value & mask);
+		break;
+	case DRM_XE_UFENCE_WAIT_OP_GTE:
+		passed = (rvalue & mask) >= (value & mask);
+		break;
+	case DRM_XE_UFENCE_WAIT_OP_LT:
+		passed = (rvalue & mask) < (value & mask);
+		break;
+	case DRM_XE_UFENCE_WAIT_OP_LTE:
+		passed = (rvalue & mask) <= (value & mask);
+		break;
+	default:
+		XE_WARN_ON("Not possible");
+		return -EINVAL;
+	}
+
+	return passed ? 0 : 1;
+}
+
+#define VALID_FLAGS	DRM_XE_UFENCE_WAIT_FLAG_ABSTIME
+#define MAX_OP		DRM_XE_UFENCE_WAIT_OP_LTE
+
+static long to_jiffies_timeout(struct xe_device *xe,
+			       struct drm_xe_wait_user_fence *args)
+{
+	unsigned long long t;
+	long timeout;
+
+	/*
+	 * For negative timeout we want to wait "forever" by setting
+	 * MAX_SCHEDULE_TIMEOUT. But we have to assign this value also
+	 * to args->timeout to avoid being zeroed on the signal delivery
+	 * (see arithmetics after wait).
+	 */
+	if (args->timeout < 0) {
+		args->timeout = MAX_SCHEDULE_TIMEOUT;
+		return MAX_SCHEDULE_TIMEOUT;
+	}
+
+	if (args->timeout == 0)
+		return 0;
+
+	/*
+	 * Save the timeout to an u64 variable because nsecs_to_jiffies
+	 * might return a value that overflows s32 variable.
+	 */
+	if (args->flags & DRM_XE_UFENCE_WAIT_FLAG_ABSTIME)
+		t = drm_timeout_abs_to_jiffies(args->timeout);
+	else
+		t = nsecs_to_jiffies(args->timeout);
+
+	/*
+	 * Anything greater then MAX_SCHEDULE_TIMEOUT is meaningless,
+	 * also we don't want to cap it at MAX_SCHEDULE_TIMEOUT because
+	 * apparently user doesn't mean to wait forever, otherwise the
+	 * args->timeout should have been set to a negative value.
+	 */
+	if (t > MAX_SCHEDULE_TIMEOUT)
+		timeout = MAX_SCHEDULE_TIMEOUT - 1;
+	else
+		timeout = t;
+
+	return timeout ?: 1;
+}
+
+int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	DEFINE_WAIT_FUNC(w_wait, woken_wake_function);
+	struct drm_xe_wait_user_fence *args = data;
+	struct xe_exec_queue *q = NULL;
+	u64 addr = args->addr;
+	int err = 0;
+	long timeout;
+	ktime_t start;
+
+	if (XE_IOCTL_DBG(xe, args->extensions) || XE_IOCTL_DBG(xe, args->pad) ||
+	    XE_IOCTL_DBG(xe, args->pad2) ||
+	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->flags & ~VALID_FLAGS))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->op > MAX_OP))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, addr & 0x7))
+		return -EINVAL;
+
+	if (args->exec_queue_id) {
+		q = xe_exec_queue_lookup(xef, args->exec_queue_id);
+		if (XE_IOCTL_DBG(xe, !q))
+			return -ENOENT;
+	}
+
+	timeout = to_jiffies_timeout(xe, args);
+
+	start = ktime_get();
+
+	add_wait_queue(&xe->ufence_wq, &w_wait);
+	for (;;) {
+		err = do_compare(addr, args->value, args->mask, args->op);
+		if (err <= 0)
+			break;
+
+		if (signal_pending(current)) {
+			err = -ERESTARTSYS;
+			break;
+		}
+
+		if (q) {
+			if (q->ops->reset_status(q)) {
+				drm_info(&xe->drm, "exec gueue reset detected\n");
+				err = -EIO;
+				break;
+			}
+		}
+
+		if (!timeout) {
+			err = -ETIME;
+			break;
+		}
+
+		timeout = wait_woken(&w_wait, TASK_INTERRUPTIBLE, timeout);
+	}
+	remove_wait_queue(&xe->ufence_wq, &w_wait);
+
+	if (!(args->flags & DRM_XE_UFENCE_WAIT_FLAG_ABSTIME)) {
+		args->timeout -= ktime_to_ns(ktime_sub(ktime_get(), start));
+		if (args->timeout < 0)
+			args->timeout = 0;
+	}
+
+	if (!timeout && !(err < 0))
+		err = -ETIME;
+
+	if (q)
+		xe_exec_queue_put(q);
+
+	return err;
+}
diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.h b/drivers/gpu/drm/xe/xe_wait_user_fence.h
new file mode 100644
index 000000000000..0e268978f9e6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_wait_user_fence.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_WAIT_USER_FENCE_H_
+#define _XE_WAIT_USER_FENCE_H_
+
+struct drm_device;
+struct drm_file;
+
+int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_wopcm.c b/drivers/gpu/drm/xe/xe_wopcm.c
new file mode 100644
index 000000000000..d3a99157e523
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_wopcm.c
@@ -0,0 +1,270 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_wopcm.h"
+
+#include "regs/xe_guc_regs.h"
+#include "xe_device.h"
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_mmio.h"
+#include "xe_uc_fw.h"
+
+/**
+ * DOC: Write Once Protected Content Memory (WOPCM) Layout
+ *
+ * The layout of the WOPCM will be fixed after writing to GuC WOPCM size and
+ * offset registers whose values are calculated and determined by HuC/GuC
+ * firmware size and set of hardware requirements/restrictions as shown below:
+ *
+ * ::
+ *
+ *    +=========> +====================+ <== WOPCM Top
+ *    ^           |  HW contexts RSVD  |
+ *    |     +===> +====================+ <== GuC WOPCM Top
+ *    |     ^     |                    |
+ *    |     |     |                    |
+ *    |     |     |                    |
+ *    |    GuC    |                    |
+ *    |   WOPCM   |                    |
+ *    |    Size   +--------------------+
+ *  WOPCM   |     |    GuC FW RSVD     |
+ *    |     |     +--------------------+
+ *    |     |     |   GuC Stack RSVD   |
+ *    |     |     +------------------- +
+ *    |     v     |   GuC WOPCM RSVD   |
+ *    |     +===> +====================+ <== GuC WOPCM base
+ *    |           |     WOPCM RSVD     |
+ *    |           +------------------- + <== HuC Firmware Top
+ *    v           |      HuC FW        |
+ *    +=========> +====================+ <== WOPCM Base
+ *
+ * GuC accessible WOPCM starts at GuC WOPCM base and ends at GuC WOPCM top.
+ * The top part of the WOPCM is reserved for hardware contexts (e.g. RC6
+ * context).
+ */
+
+/* Default WOPCM size is 2MB from Gen11, 1MB on previous platforms */
+/* FIXME: Larger size require for 2 tile PVC, do a proper probe sooner or later */
+#define DGFX_WOPCM_SIZE			SZ_4M
+/* FIXME: Larger size require for MTL, do a proper probe sooner or later */
+#define MTL_WOPCM_SIZE			SZ_4M
+#define WOPCM_SIZE			SZ_2M
+
+#define MAX_WOPCM_SIZE			SZ_8M
+
+/* 16KB WOPCM (RSVD WOPCM) is reserved from HuC firmware top. */
+#define WOPCM_RESERVED_SIZE		SZ_16K
+
+/* 16KB reserved at the beginning of GuC WOPCM. */
+#define GUC_WOPCM_RESERVED		SZ_16K
+/* 8KB from GUC_WOPCM_RESERVED is reserved for GuC stack. */
+#define GUC_WOPCM_STACK_RESERVED	SZ_8K
+
+/* GuC WOPCM Offset value needs to be aligned to 16KB. */
+#define GUC_WOPCM_OFFSET_ALIGNMENT	(1UL << GUC_WOPCM_OFFSET_SHIFT)
+
+/* 36KB WOPCM reserved at the end of WOPCM */
+#define WOPCM_HW_CTX_RESERVED		(SZ_32K + SZ_4K)
+
+static inline struct xe_gt *wopcm_to_gt(struct xe_wopcm *wopcm)
+{
+	return container_of(wopcm, struct xe_gt, uc.wopcm);
+}
+
+static inline struct xe_device *wopcm_to_xe(struct xe_wopcm *wopcm)
+{
+	return gt_to_xe(wopcm_to_gt(wopcm));
+}
+
+static u32 context_reserved_size(void)
+{
+	return WOPCM_HW_CTX_RESERVED;
+}
+
+static bool __check_layout(struct xe_device *xe, u32 wopcm_size,
+			   u32 guc_wopcm_base, u32 guc_wopcm_size,
+			   u32 guc_fw_size, u32 huc_fw_size)
+{
+	const u32 ctx_rsvd = context_reserved_size();
+	u32 size;
+
+	size = wopcm_size - ctx_rsvd;
+	if (unlikely(guc_wopcm_base >= size ||
+		     guc_wopcm_size > size - guc_wopcm_base)) {
+		drm_err(&xe->drm,
+			"WOPCM: invalid GuC region layout: %uK + %uK > %uK\n",
+			guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K,
+			size / SZ_1K);
+		return false;
+	}
+
+	size = guc_fw_size + GUC_WOPCM_RESERVED + GUC_WOPCM_STACK_RESERVED;
+	if (unlikely(guc_wopcm_size < size)) {
+		drm_err(&xe->drm, "WOPCM: no space for %s: %uK < %uK\n",
+			xe_uc_fw_type_repr(XE_UC_FW_TYPE_GUC),
+			guc_wopcm_size / SZ_1K, size / SZ_1K);
+		return false;
+	}
+
+	size = huc_fw_size + WOPCM_RESERVED_SIZE;
+	if (unlikely(guc_wopcm_base < size)) {
+		drm_err(&xe->drm, "WOPCM: no space for %s: %uK < %uK\n",
+			xe_uc_fw_type_repr(XE_UC_FW_TYPE_HUC),
+			guc_wopcm_base / SZ_1K, size / SZ_1K);
+		return false;
+	}
+
+	return true;
+}
+
+static bool __wopcm_regs_locked(struct xe_gt *gt,
+				u32 *guc_wopcm_base, u32 *guc_wopcm_size)
+{
+	u32 reg_base = xe_mmio_read32(gt, DMA_GUC_WOPCM_OFFSET);
+	u32 reg_size = xe_mmio_read32(gt, GUC_WOPCM_SIZE);
+
+	if (!(reg_size & GUC_WOPCM_SIZE_LOCKED) ||
+	    !(reg_base & GUC_WOPCM_OFFSET_VALID))
+		return false;
+
+	*guc_wopcm_base = reg_base & GUC_WOPCM_OFFSET_MASK;
+	*guc_wopcm_size = reg_size & GUC_WOPCM_SIZE_MASK;
+	return true;
+}
+
+static int __wopcm_init_regs(struct xe_device *xe, struct xe_gt *gt,
+			     struct xe_wopcm *wopcm)
+{
+	u32 base = wopcm->guc.base;
+	u32 size = wopcm->guc.size;
+	u32 huc_agent = xe_uc_fw_is_available(&gt->uc.huc.fw) ? HUC_LOADING_AGENT_GUC : 0;
+	u32 mask;
+	int err;
+
+	XE_WARN_ON(!(base & GUC_WOPCM_OFFSET_MASK));
+	XE_WARN_ON(base & ~GUC_WOPCM_OFFSET_MASK);
+	XE_WARN_ON(!(size & GUC_WOPCM_SIZE_MASK));
+	XE_WARN_ON(size & ~GUC_WOPCM_SIZE_MASK);
+
+	mask = GUC_WOPCM_SIZE_MASK | GUC_WOPCM_SIZE_LOCKED;
+	err = xe_mmio_write32_and_verify(gt, GUC_WOPCM_SIZE, size, mask,
+					 size | GUC_WOPCM_SIZE_LOCKED);
+	if (err)
+		goto err_out;
+
+	mask = GUC_WOPCM_OFFSET_MASK | GUC_WOPCM_OFFSET_VALID | huc_agent;
+	err = xe_mmio_write32_and_verify(gt, DMA_GUC_WOPCM_OFFSET,
+					 base | huc_agent, mask,
+					 base | huc_agent |
+					 GUC_WOPCM_OFFSET_VALID);
+	if (err)
+		goto err_out;
+
+	return 0;
+
+err_out:
+	drm_notice(&xe->drm, "Failed to init uC WOPCM registers!\n");
+	drm_notice(&xe->drm, "%s(%#x)=%#x\n", "DMA_GUC_WOPCM_OFFSET",
+		   DMA_GUC_WOPCM_OFFSET.addr,
+		   xe_mmio_read32(gt, DMA_GUC_WOPCM_OFFSET));
+	drm_notice(&xe->drm, "%s(%#x)=%#x\n", "GUC_WOPCM_SIZE",
+		   GUC_WOPCM_SIZE.addr,
+		   xe_mmio_read32(gt, GUC_WOPCM_SIZE));
+
+	return err;
+}
+
+u32 xe_wopcm_size(struct xe_device *xe)
+{
+	return IS_DGFX(xe) ? DGFX_WOPCM_SIZE :
+		xe->info.platform == XE_METEORLAKE ? MTL_WOPCM_SIZE :
+		WOPCM_SIZE;
+}
+
+/**
+ * xe_wopcm_init() - Initialize the WOPCM structure.
+ * @wopcm: pointer to xe_wopcm.
+ *
+ * This function will partition WOPCM space based on GuC and HuC firmware sizes
+ * and will allocate max remaining for use by GuC. This function will also
+ * enforce platform dependent hardware restrictions on GuC WOPCM offset and
+ * size. It will fail the WOPCM init if any of these checks fail, so that the
+ * following WOPCM registers setup and GuC firmware uploading would be aborted.
+ */
+int xe_wopcm_init(struct xe_wopcm *wopcm)
+{
+	struct xe_device *xe = wopcm_to_xe(wopcm);
+	struct xe_gt *gt = wopcm_to_gt(wopcm);
+	u32 guc_fw_size = xe_uc_fw_get_upload_size(&gt->uc.guc.fw);
+	u32 huc_fw_size = xe_uc_fw_get_upload_size(&gt->uc.huc.fw);
+	u32 ctx_rsvd = context_reserved_size();
+	u32 guc_wopcm_base;
+	u32 guc_wopcm_size;
+	bool locked;
+	int ret = 0;
+
+	if (!guc_fw_size)
+		return -EINVAL;
+
+	wopcm->size = xe_wopcm_size(xe);
+	drm_dbg(&xe->drm, "WOPCM: %uK\n", wopcm->size / SZ_1K);
+
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+	XE_WARN_ON(guc_fw_size >= wopcm->size);
+	XE_WARN_ON(huc_fw_size >= wopcm->size);
+	XE_WARN_ON(ctx_rsvd + WOPCM_RESERVED_SIZE >= wopcm->size);
+
+	locked = __wopcm_regs_locked(gt, &guc_wopcm_base, &guc_wopcm_size);
+	if (locked) {
+		drm_dbg(&xe->drm, "GuC WOPCM is already locked [%uK, %uK)\n",
+			guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K);
+		/*
+		 * When the GuC wopcm base and size are preprogrammed by
+		 * BIOS/IFWI, check against the max allowed wopcm size to
+		 * validate if the programmed values align to the wopcm layout.
+		 */
+		wopcm->size = MAX_WOPCM_SIZE;
+
+		goto check;
+	}
+
+	/*
+	 * Aligned value of guc_wopcm_base will determine available WOPCM space
+	 * for HuC firmware and mandatory reserved area.
+	 */
+	guc_wopcm_base = huc_fw_size + WOPCM_RESERVED_SIZE;
+	guc_wopcm_base = ALIGN(guc_wopcm_base, GUC_WOPCM_OFFSET_ALIGNMENT);
+
+	/*
+	 * Need to clamp guc_wopcm_base now to make sure the following math is
+	 * correct. Formal check of whole WOPCM layout will be done below.
+	 */
+	guc_wopcm_base = min(guc_wopcm_base, wopcm->size - ctx_rsvd);
+
+	/* Aligned remainings of usable WOPCM space can be assigned to GuC. */
+	guc_wopcm_size = wopcm->size - ctx_rsvd - guc_wopcm_base;
+	guc_wopcm_size &= GUC_WOPCM_SIZE_MASK;
+
+	drm_dbg(&xe->drm, "Calculated GuC WOPCM [%uK, %uK)\n",
+		guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K);
+
+check:
+	if (__check_layout(xe, wopcm->size, guc_wopcm_base, guc_wopcm_size,
+			   guc_fw_size, huc_fw_size)) {
+		wopcm->guc.base = guc_wopcm_base;
+		wopcm->guc.size = guc_wopcm_size;
+		XE_WARN_ON(!wopcm->guc.base);
+		XE_WARN_ON(!wopcm->guc.size);
+	} else {
+		drm_notice(&xe->drm, "Unsuccessful WOPCM partitioning\n");
+		return -E2BIG;
+	}
+
+	if (!locked)
+		ret = __wopcm_init_regs(xe, gt, wopcm);
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/xe/xe_wopcm.h b/drivers/gpu/drm/xe/xe_wopcm.h
new file mode 100644
index 000000000000..0197a282460b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_wopcm.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_WOPCM_H_
+#define _XE_WOPCM_H_
+
+#include "xe_wopcm_types.h"
+
+struct xe_device;
+
+int xe_wopcm_init(struct xe_wopcm *wopcm);
+u32 xe_wopcm_size(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_wopcm_types.h b/drivers/gpu/drm/xe/xe_wopcm_types.h
new file mode 100644
index 000000000000..486d850c4084
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_wopcm_types.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_WOPCM_TYPES_H_
+#define _XE_WOPCM_TYPES_H_
+
+#include <linux/types.h>
+
+/**
+ * struct xe_wopcm - Overall WOPCM info and WOPCM regions.
+ */
+struct xe_wopcm {
+	/** @size: Size of overall WOPCM */
+	u32 size;
+	/** @guc: GuC WOPCM Region info */
+	struct {
+		/** @base: GuC WOPCM base which is offset from WOPCM base */
+		u32 base;
+		/** @size: Size of the GuC WOPCM region */
+		u32 size;
+	} guc;
+};
+
+#endif