384 files changed, 16034 insertions, 9900 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index f42e8d467c12..b1bb10625cd9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -90,6 +90,7 @@
 #include "amdgpu_mes.h"
 #include "amdgpu_umc.h"
 #include "amdgpu_mmhub.h"
+#include "amdgpu_df.h"
 
 #define MAX_GPU_INSTANCE		16
 
@@ -664,29 +665,6 @@ struct amdgpu_mmio_remap {
 	resource_size_t bus_addr;
 };
 
-struct amdgpu_df_funcs {
-	void (*sw_init)(struct amdgpu_device *adev);
-	void (*sw_fini)(struct amdgpu_device *adev);
-	void (*enable_broadcast_mode)(struct amdgpu_device *adev,
-				      bool enable);
-	u32 (*get_fb_channel_number)(struct amdgpu_device *adev);
-	u32 (*get_hbm_channel_number)(struct amdgpu_device *adev);
-	void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev,
-						 bool enable);
-	void (*get_clockgating_state)(struct amdgpu_device *adev,
-				      u32 *flags);
-	void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev,
-					    bool enable);
-	int (*pmc_start)(struct amdgpu_device *adev, uint64_t config,
-					 int is_enable);
-	int (*pmc_stop)(struct amdgpu_device *adev, uint64_t config,
-					 int is_disable);
-	void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config,
-					 uint64_t *count);
-	uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val);
-	void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val,
-			 uint32_t ficadl_val, uint32_t ficadh_val);
-};
 /* Define the HW IP blocks will be used in driver , add more if necessary */
 enum amd_hw_ip_block_type {
 	GC_HWIP = 1,
@@ -930,6 +908,9 @@ struct amdgpu_device {
 	bool                            enable_mes;
 	struct amdgpu_mes               mes;
 
+	/* df */
+	struct amdgpu_df                df;
+
 	struct amdgpu_ip_block          ip_blocks[AMDGPU_MAX_IP_NUM];
 	int				num_ip_blocks;
 	struct mutex	mn_lock;
@@ -943,8 +924,6 @@ struct amdgpu_device {
 	/* soc15 register offset based on ip, instance and  segment */
 	uint32_t 		*reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE];
 
-	const struct amdgpu_df_funcs	*df_funcs;
-
 	/* delayed work_func for deferring clockgating during resume */
 	struct delayed_work     delayed_init_work;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index d3da9dde4ee1..8609287620ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -613,15 +613,9 @@ void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 
-	if (is_support_sw_smu(adev))
-		smu_switch_power_profile(&adev->smu,
-					 PP_SMC_POWER_PROFILE_COMPUTE,
-					 !idle);
-	else if (adev->powerplay.pp_funcs &&
-		 adev->powerplay.pp_funcs->switch_power_profile)
-		amdgpu_dpm_switch_power_profile(adev,
-						PP_SMC_POWER_PROFILE_COMPUTE,
-						!idle);
+	amdgpu_dpm_switch_power_profile(adev,
+					PP_SMC_POWER_PROFILE_COMPUTE,
+					!idle);
 }
 
 bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
@@ -634,6 +628,38 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
 	return false;
 }
 
+int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+	if (adev->family == AMDGPU_FAMILY_AI) {
+		int i;
+
+		for (i = 0; i < adev->num_vmhubs; i++)
+			amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
+	} else {
+		amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
+	}
+
+	return 0;
+}
+
+int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+	uint32_t flush_type = 0;
+	bool all_hub = false;
+
+	if (adev->gmc.xgmi.num_physical_nodes &&
+		adev->asic_type == CHIP_VEGA20)
+		flush_type = 2;
+
+	if (adev->family == AMDGPU_FAMILY_AI)
+		all_hub = true;
+
+	return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub);
+}
+
 bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 069d5d230810..47b0f2957d1f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -136,6 +136,8 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
 				uint32_t *ib_cmd, uint32_t ib_len);
 void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle);
 bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd);
+int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid);
+int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid);
 
 bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index 3c119407dc34..4bcc175a149d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -71,32 +71,56 @@ static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
 				unsigned int engine_id,
 				unsigned int queue_id)
 {
-	uint32_t sdma_engine_reg_base[8] = {
-		SOC15_REG_OFFSET(SDMA0, 0,
-				 mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL,
-		SOC15_REG_OFFSET(SDMA1, 0,
-				 mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL,
-		SOC15_REG_OFFSET(SDMA2, 0,
-				 mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL,
-		SOC15_REG_OFFSET(SDMA3, 0,
-				 mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL,
-		SOC15_REG_OFFSET(SDMA4, 0,
-				 mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL,
-		SOC15_REG_OFFSET(SDMA5, 0,
-				 mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL,
-		SOC15_REG_OFFSET(SDMA6, 0,
-				 mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL,
-		SOC15_REG_OFFSET(SDMA7, 0,
-				 mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL
-	};
-
-	uint32_t retval = sdma_engine_reg_base[engine_id]
+	uint32_t sdma_engine_reg_base = 0;
+	uint32_t sdma_rlc_reg_offset;
+
+	switch (engine_id) {
+	default:
+		dev_warn(adev->dev,
+			 "Invalid sdma engine id (%d), using engine id 0\n",
+			 engine_id);
+		/* fall through */
+	case 0:
+		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
+				mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
+		break;
+	case 1:
+		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
+				mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL;
+		break;
+	case 2:
+		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA2, 0,
+				mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL;
+		break;
+	case 3:
+		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA3, 0,
+				mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL;
+		break;
+	case 4:
+		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA4, 0,
+				mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL;
+		break;
+	case 5:
+		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA5, 0,
+				mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL;
+		break;
+	case 6:
+		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA6, 0,
+				mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL;
+		break;
+	case 7:
+		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA7, 0,
+				mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL;
+		break;
+	}
+
+	sdma_rlc_reg_offset = sdma_engine_reg_base
 		+ queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);
 
 	pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
-			queue_id, retval);
+			queue_id, sdma_rlc_reg_offset);
 
-	return retval;
+	return sdma_rlc_reg_offset;
 }
 
 static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
@@ -281,6 +305,7 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
 	.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
 	.init_interrupts = kgd_gfx_v9_init_interrupts,
 	.hqd_load = kgd_gfx_v9_hqd_load,
+	.hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
 	.hqd_sdma_load = kgd_hqd_sdma_load,
 	.hqd_dump = kgd_gfx_v9_hqd_dump,
 	.hqd_sdma_dump = kgd_hqd_sdma_dump,
@@ -296,7 +321,5 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
 			kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
 	.get_tile_config = kgd_gfx_v9_get_tile_config,
 	.set_vm_context_page_table_base = kgd_set_vm_context_page_table_base,
-	.invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs,
-	.invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid,
 	.get_hive_id = amdgpu_amdkfd_get_hive_id,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 61cd707158e4..a7b17c8deb00 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -107,13 +107,13 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
 	lock_srbm(kgd, mec, pipe, queue_id, 0);
 }
 
-static uint32_t get_queue_mask(struct amdgpu_device *adev,
+static uint64_t get_queue_mask(struct amdgpu_device *adev,
 			       uint32_t pipe_id, uint32_t queue_id)
 {
-	unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe +
-			    queue_id) & 31;
+	unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
+			queue_id;
 
-	return ((uint32_t)1) << bit;
+	return 1ull << bit;
 }
 
 static void release_queue(struct kgd_dev *kgd)
@@ -268,21 +268,6 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 	pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id);
 	acquire_queue(kgd, pipe_id, queue_id);
 
-	/* HIQ is set during driver init period with vmid set to 0*/
-	if (m->cp_hqd_vmid == 0) {
-		uint32_t value, mec, pipe;
-
-		mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
-		pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
-
-		pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
-			mec, pipe, queue_id);
-		value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
-		value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
-			((mec << 5) | (pipe << 3) | queue_id | 0x80));
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
-	}
-
 	/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
 	mqd_hqd = &m->cp_mqd_base_addr_lo;
 	hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
@@ -332,9 +317,10 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 		       lower_32_bits((uint64_t)wptr));
 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
 		       upper_32_bits((uint64_t)wptr));
-		pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__, get_queue_mask(adev, pipe_id, queue_id));
+		pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__,
+			 (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
-		       get_queue_mask(adev, pipe_id, queue_id));
+		       (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
 	}
 
 	/* Start the EOP fetcher */
@@ -350,6 +336,59 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 	return 0;
 }
 
+static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
+			    uint32_t pipe_id, uint32_t queue_id,
+			    uint32_t doorbell_off)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+	struct v10_compute_mqd *m;
+	uint32_t mec, pipe;
+	int r;
+
+	m = get_mqd(mqd);
+
+	acquire_queue(kgd, pipe_id, queue_id);
+
+	mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+	pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+		 mec, pipe, queue_id);
+
+	spin_lock(&adev->gfx.kiq.ring_lock);
+	r = amdgpu_ring_alloc(kiq_ring, 7);
+	if (r) {
+		pr_err("Failed to alloc KIQ (%d).\n", r);
+		goto out_unlock;
+	}
+
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+	amdgpu_ring_write(kiq_ring,
+			  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+			  PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */
+			  PACKET3_MAP_QUEUES_QUEUE(queue_id) |
+			  PACKET3_MAP_QUEUES_PIPE(pipe) |
+			  PACKET3_MAP_QUEUES_ME((mec - 1)) |
+			  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
+			  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
+			  PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */
+			  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
+	amdgpu_ring_write(kiq_ring,
+			  PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off));
+	amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo);
+	amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi);
+	amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo);
+	amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
+	amdgpu_ring_commit(kiq_ring);
+
+out_unlock:
+	spin_unlock(&adev->gfx.kiq.ring_lock);
+	release_queue(kgd);
+
+	return r;
+}
+
 static int kgd_hqd_dump(struct kgd_dev *kgd,
 			uint32_t pipe_id, uint32_t queue_id,
 			uint32_t (**dump)[2], uint32_t *n_regs)
@@ -686,71 +725,6 @@ static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
 	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
 }
 
-static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
-{
-	signed long r;
-	uint32_t seq;
-	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
-
-	spin_lock(&adev->gfx.kiq.ring_lock);
-	amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
-	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
-	amdgpu_ring_write(ring,
-			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
-			PACKET3_INVALIDATE_TLBS_PASID(pasid));
-	amdgpu_fence_emit_polling(ring, &seq);
-	amdgpu_ring_commit(ring);
-	spin_unlock(&adev->gfx.kiq.ring_lock);
-
-	r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
-	if (r < 1) {
-		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
-		return -ETIME;
-	}
-
-	return 0;
-}
-
-static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-	int vmid;
-	uint16_t queried_pasid;
-	bool ret;
-	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
-
-	if (amdgpu_emu_mode == 0 && ring->sched.ready)
-		return invalidate_tlbs_with_kiq(adev, pasid);
-
-	for (vmid = 0; vmid < 16; vmid++) {
-		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
-			continue;
-
-		ret = get_atc_vmid_pasid_mapping_info(kgd, vmid,
-				&queried_pasid);
-		if (ret	&& queried_pasid == pasid) {
-			amdgpu_gmc_flush_gpu_tlb(adev, vmid,
-					AMDGPU_GFXHUB_0, 0);
-			break;
-		}
-	}
-
-	return 0;
-}
-
-static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
-	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
-		pr_err("non kfd vmid %d\n", vmid);
-		return 0;
-	}
-
-	amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
-	return 0;
-}
-
 static int kgd_address_watch_disable(struct kgd_dev *kgd)
 {
 	return 0;
@@ -817,6 +791,7 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
 	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
 	.init_interrupts = kgd_init_interrupts,
 	.hqd_load = kgd_hqd_load,
+	.hiq_mqd_load = kgd_hiq_mqd_load,
 	.hqd_sdma_load = kgd_hqd_sdma_load,
 	.hqd_dump = kgd_hqd_dump,
 	.hqd_sdma_dump = kgd_hqd_sdma_dump,
@@ -832,7 +807,5 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
 			get_atc_vmid_pasid_mapping_info,
 	.get_tile_config = amdgpu_amdkfd_get_tile_config,
 	.set_vm_context_page_table_base = set_vm_context_page_table_base,
-	.invalidate_tlbs = invalidate_tlbs,
-	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
 	.get_hive_id = amdgpu_amdkfd_get_hive_id,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 6e6f0a99ec06..8f052e98a3c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -696,45 +696,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
 		lower_32_bits(page_table_base));
 }
 
-static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-	int vmid;
-	unsigned int tmp;
-
-	if (adev->in_gpu_reset)
-		return -EIO;
-
-	for (vmid = 0; vmid < 16; vmid++) {
-		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
-			continue;
-
-		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
-		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
-			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
-			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
-			RREG32(mmVM_INVALIDATE_RESPONSE);
-			break;
-		}
-	}
-
-	return 0;
-}
-
-static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
-	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
-		pr_err("non kfd vmid\n");
-		return 0;
-	}
-
-	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
-	RREG32(mmVM_INVALIDATE_RESPONSE);
-	return 0;
-}
-
  /**
   * read_vmid_from_vmfault_reg - read vmid from register
   *
@@ -771,7 +732,5 @@ const struct kfd2kgd_calls gfx_v7_kfd2kgd = {
 	.set_scratch_backing_va = set_scratch_backing_va,
 	.get_tile_config = get_tile_config,
 	.set_vm_context_page_table_base = set_vm_context_page_table_base,
-	.invalidate_tlbs = invalidate_tlbs,
-	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
 	.read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index bfbddedb2380..19a10db93d68 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -657,45 +657,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
 			lower_32_bits(page_table_base));
 }
 
-static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-	int vmid;
-	unsigned int tmp;
-
-	if (adev->in_gpu_reset)
-		return -EIO;
-
-	for (vmid = 0; vmid < 16; vmid++) {
-		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
-			continue;
-
-		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
-		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
-			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
-			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
-			RREG32(mmVM_INVALIDATE_RESPONSE);
-			break;
-		}
-	}
-
-	return 0;
-}
-
-static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
-	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
-		pr_err("non kfd vmid %d\n", vmid);
-		return -EINVAL;
-	}
-
-	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
-	RREG32(mmVM_INVALIDATE_RESPONSE);
-	return 0;
-}
-
 const struct kfd2kgd_calls gfx_v8_kfd2kgd = {
 	.program_sh_mem_settings = kgd_program_sh_mem_settings,
 	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
@@ -717,6 +678,4 @@ const struct kfd2kgd_calls gfx_v8_kfd2kgd = {
 	.set_scratch_backing_va = set_scratch_backing_va,
 	.get_tile_config = get_tile_config,
 	.set_vm_context_page_table_base = set_vm_context_page_table_base,
-	.invalidate_tlbs = invalidate_tlbs,
-	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index e7861f0ef415..8562afe5b761 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -103,13 +103,13 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
 	lock_srbm(kgd, mec, pipe, queue_id, 0);
 }
 
-static uint32_t get_queue_mask(struct amdgpu_device *adev,
+static uint64_t get_queue_mask(struct amdgpu_device *adev,
 			       uint32_t pipe_id, uint32_t queue_id)
 {
-	unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe +
-			    queue_id) & 31;
+	unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
+			queue_id;
 
-	return ((uint32_t)1) << bit;
+	return 1ull << bit;
 }
 
 static void release_queue(struct kgd_dev *kgd)
@@ -258,21 +258,6 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 
 	acquire_queue(kgd, pipe_id, queue_id);
 
-	/* HIQ is set during driver init period with vmid set to 0*/
-	if (m->cp_hqd_vmid == 0) {
-		uint32_t value, mec, pipe;
-
-		mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
-		pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
-
-		pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
-			mec, pipe, queue_id);
-		value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
-		value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
-			((mec << 5) | (pipe << 3) | queue_id | 0x80));
-		WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
-	}
-
 	/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
 	mqd_hqd = &m->cp_mqd_base_addr_lo;
 	hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
@@ -323,7 +308,7 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 		WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
 		       upper_32_bits((uintptr_t)wptr));
 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
-		       get_queue_mask(adev, pipe_id, queue_id));
+		       (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
 	}
 
 	/* Start the EOP fetcher */
@@ -339,6 +324,59 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 	return 0;
 }
 
+int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
+			    uint32_t pipe_id, uint32_t queue_id,
+			    uint32_t doorbell_off)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+	struct v9_mqd *m;
+	uint32_t mec, pipe;
+	int r;
+
+	m = get_mqd(mqd);
+
+	acquire_queue(kgd, pipe_id, queue_id);
+
+	mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+	pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+		 mec, pipe, queue_id);
+
+	spin_lock(&adev->gfx.kiq.ring_lock);
+	r = amdgpu_ring_alloc(kiq_ring, 7);
+	if (r) {
+		pr_err("Failed to alloc KIQ (%d).\n", r);
+		goto out_unlock;
+	}
+
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+	amdgpu_ring_write(kiq_ring,
+			  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+			  PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */
+			  PACKET3_MAP_QUEUES_QUEUE(queue_id) |
+			  PACKET3_MAP_QUEUES_PIPE(pipe) |
+			  PACKET3_MAP_QUEUES_ME((mec - 1)) |
+			  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
+			  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
+			  PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */
+			  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
+	amdgpu_ring_write(kiq_ring,
+			  PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off));
+	amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo);
+	amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi);
+	amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo);
+	amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
+	amdgpu_ring_commit(kiq_ring);
+
+out_unlock:
+	spin_unlock(&adev->gfx.kiq.ring_lock);
+	release_queue(kgd);
+
+	return r;
+}
+
 int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd,
 			uint32_t pipe_id, uint32_t queue_id,
 			uint32_t (**dump)[2], uint32_t *n_regs)
@@ -617,100 +655,6 @@ bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
 	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
 }
 
-static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid,
-			uint32_t flush_type)
-{
-	signed long r;
-	uint32_t seq;
-	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
-
-	spin_lock(&adev->gfx.kiq.ring_lock);
-	amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
-	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
-	amdgpu_ring_write(ring,
-			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
-			PACKET3_INVALIDATE_TLBS_ALL_HUB(1) |
-			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
-			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
-	amdgpu_fence_emit_polling(ring, &seq);
-	amdgpu_ring_commit(ring);
-	spin_unlock(&adev->gfx.kiq.ring_lock);
-
-	r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
-	if (r < 1) {
-		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
-		return -ETIME;
-	}
-
-	return 0;
-}
-
-int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-	int vmid, i;
-	uint16_t queried_pasid;
-	bool ret;
-	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
-	uint32_t flush_type = 0;
-
-	if (adev->in_gpu_reset)
-		return -EIO;
-	if (adev->gmc.xgmi.num_physical_nodes &&
-		adev->asic_type == CHIP_VEGA20)
-		flush_type = 2;
-
-	if (ring->sched.ready)
-		return invalidate_tlbs_with_kiq(adev, pasid, flush_type);
-
-	for (vmid = 0; vmid < 16; vmid++) {
-		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
-			continue;
-
-		ret = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(kgd, vmid,
-				&queried_pasid);
-		if (ret && queried_pasid == pasid) {
-			for (i = 0; i < adev->num_vmhubs; i++)
-				amdgpu_gmc_flush_gpu_tlb(adev, vmid,
-							i, flush_type);
-			break;
-		}
-	}
-
-	return 0;
-}
-
-int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-	int i;
-
-	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
-		pr_err("non kfd vmid %d\n", vmid);
-		return 0;
-	}
-
-	/* Use legacy mode tlb invalidation.
-	 *
-	 * Currently on Raven the code below is broken for anything but
-	 * legacy mode due to a MMHUB power gating problem. A workaround
-	 * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ
-	 * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack
-	 * bit.
-	 *
-	 * TODO 1: agree on the right set of invalidation registers for
-	 * KFD use. Use the last one for now. Invalidate both GC and
-	 * MMHUB.
-	 *
-	 * TODO 2: support range-based invalidation, requires kfg2kgd
-	 * interface change
-	 */
-	for (i = 0; i < adev->num_vmhubs; i++)
-		amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
-
-	return 0;
-}
-
 int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd)
 {
 	return 0;
@@ -778,6 +722,7 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
 	.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
 	.init_interrupts = kgd_gfx_v9_init_interrupts,
 	.hqd_load = kgd_gfx_v9_hqd_load,
+	.hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
 	.hqd_sdma_load = kgd_hqd_sdma_load,
 	.hqd_dump = kgd_gfx_v9_hqd_dump,
 	.hqd_sdma_dump = kgd_hqd_sdma_dump,
@@ -793,7 +738,5 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
 			kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
 	.get_tile_config = kgd_gfx_v9_get_tile_config,
 	.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
-	.invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs,
-	.invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid,
 	.get_hive_id = amdgpu_amdkfd_get_hive_id,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
index 02b1426d17d1..63d3e6683dfe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
@@ -33,6 +33,9 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 			uint32_t queue_id, uint32_t __user *wptr,
 			uint32_t wptr_shift, uint32_t wptr_mask,
 			struct mm_struct *mm);
+int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
+			    uint32_t pipe_id, uint32_t queue_id,
+			    uint32_t doorbell_off);
 int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd,
 			uint32_t pipe_id, uint32_t queue_id,
 			uint32_t (**dump)[2], uint32_t *n_regs);
@@ -57,7 +60,5 @@ uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd,
 
 bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
 					uint8_t vmid, uint16_t *p_pasid);
-int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
-int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
 int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd,
 		struct tile_config *config);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 5b330f69194b..a52a084158b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -909,6 +909,11 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 		if (parser->entity && parser->entity != entity)
 			return -EINVAL;
 
+		/* Return if there is no run queue associated with this entity.
+		 * Possibly because of disabled HW IP*/
+		if (entity->rq == NULL)
+			return -EINVAL;
+
 		parser->entity = entity;
 
 		ring = to_amdgpu_ring(entity->rq->sched);
@@ -1229,7 +1234,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 		goto error_abort;
 	}
 
-	job->owner = p->filp;
 	p->fence = dma_fence_get(&job->base.s_fence->finished);
 
 	amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 63343bb43049..f24ed9a1a3e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -26,6 +26,7 @@
 #include <linux/kthread.h>
 #include <linux/pci.h>
 #include <linux/uaccess.h>
+#include <linux/pm_runtime.h>
 
 #include <drm/drm_debugfs.h>
 
@@ -144,10 +145,17 @@ static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
 
 	*pos &= (1UL << 22) - 1;
 
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
+
 	if (use_bank) {
 		if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
-		    (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines))
+		    (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) {
+			pm_runtime_mark_last_busy(adev->ddev->dev);
+			pm_runtime_put_autosuspend(adev->ddev->dev);
 			return -EINVAL;
+		}
 		mutex_lock(&adev->grbm_idx_mutex);
 		amdgpu_gfx_select_se_sh(adev, se_bank,
 					sh_bank, instance_bank);
@@ -193,6 +201,9 @@ end:
 	if (pm_pg_lock)
 		mutex_unlock(&adev->pm.mutex);
 
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	return result;
 }
 
@@ -237,13 +248,20 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
 	if (size & 0x3 || *pos & 0x3)
 		return -EINVAL;
 
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
+
 	while (size) {
 		uint32_t value;
 
 		value = RREG32_PCIE(*pos >> 2);
 		r = put_user(value, (uint32_t *)buf);
-		if (r)
+		if (r) {
+			pm_runtime_mark_last_busy(adev->ddev->dev);
+			pm_runtime_put_autosuspend(adev->ddev->dev);
 			return r;
+		}
 
 		result += 4;
 		buf += 4;
@@ -251,6 +269,9 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
 		size -= 4;
 	}
 
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	return result;
 }
 
@@ -276,12 +297,19 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
 	if (size & 0x3 || *pos & 0x3)
 		return -EINVAL;
 
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
+
 	while (size) {
 		uint32_t value;
 
 		r = get_user(value, (uint32_t *)buf);
-		if (r)
+		if (r) {
+			pm_runtime_mark_last_busy(adev->ddev->dev);
+			pm_runtime_put_autosuspend(adev->ddev->dev);
 			return r;
+		}
 
 		WREG32_PCIE(*pos >> 2, value);
 
@@ -291,6 +319,9 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
 		size -= 4;
 	}
 
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	return result;
 }
 
@@ -316,13 +347,20 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
 	if (size & 0x3 || *pos & 0x3)
 		return -EINVAL;
 
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
+
 	while (size) {
 		uint32_t value;
 
 		value = RREG32_DIDT(*pos >> 2);
 		r = put_user(value, (uint32_t *)buf);
-		if (r)
+		if (r) {
+			pm_runtime_mark_last_busy(adev->ddev->dev);
+			pm_runtime_put_autosuspend(adev->ddev->dev);
 			return r;
+		}
 
 		result += 4;
 		buf += 4;
@@ -330,6 +368,9 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
 		size -= 4;
 	}
 
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	return result;
 }
 
@@ -355,12 +396,19 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
 	if (size & 0x3 || *pos & 0x3)
 		return -EINVAL;
 
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
+
 	while (size) {
 		uint32_t value;
 
 		r = get_user(value, (uint32_t *)buf);
-		if (r)
+		if (r) {
+			pm_runtime_mark_last_busy(adev->ddev->dev);
+			pm_runtime_put_autosuspend(adev->ddev->dev);
 			return r;
+		}
 
 		WREG32_DIDT(*pos >> 2, value);
 
@@ -370,6 +418,9 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
 		size -= 4;
 	}
 
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	return result;
 }
 
@@ -395,13 +446,20 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
 	if (size & 0x3 || *pos & 0x3)
 		return -EINVAL;
 
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
+
 	while (size) {
 		uint32_t value;
 
 		value = RREG32_SMC(*pos);
 		r = put_user(value, (uint32_t *)buf);
-		if (r)
+		if (r) {
+			pm_runtime_mark_last_busy(adev->ddev->dev);
+			pm_runtime_put_autosuspend(adev->ddev->dev);
 			return r;
+		}
 
 		result += 4;
 		buf += 4;
@@ -409,6 +467,9 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
 		size -= 4;
 	}
 
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	return result;
 }
 
@@ -434,12 +495,19 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
 	if (size & 0x3 || *pos & 0x3)
 		return -EINVAL;
 
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
+
 	while (size) {
 		uint32_t value;
 
 		r = get_user(value, (uint32_t *)buf);
-		if (r)
+		if (r) {
+			pm_runtime_mark_last_busy(adev->ddev->dev);
+			pm_runtime_put_autosuspend(adev->ddev->dev);
 			return r;
+		}
 
 		WREG32_SMC(*pos, value);
 
@@ -449,6 +517,9 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
 		size -= 4;
 	}
 
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	return result;
 }
 
@@ -572,7 +643,16 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
 	idx = *pos >> 2;
 
 	valuesize = sizeof(values);
+
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
+
 	r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize);
+
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	if (r)
 		return r;
 
@@ -633,6 +713,10 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
 	wave = (*pos & GENMASK_ULL(36, 31)) >> 31;
 	simd = (*pos & GENMASK_ULL(44, 37)) >> 37;
 
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
+
 	/* switch to the specific se/sh/cu */
 	mutex_lock(&adev->grbm_idx_mutex);
 	amdgpu_gfx_select_se_sh(adev, se, sh, cu);
@@ -644,6 +728,9 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
 	amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
 	mutex_unlock(&adev->grbm_idx_mutex);
 
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	if (!x)
 		return -EINVAL;
 
@@ -711,6 +798,10 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
 	if (!data)
 		return -ENOMEM;
 
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
+
 	/* switch to the specific se/sh/cu */
 	mutex_lock(&adev->grbm_idx_mutex);
 	amdgpu_gfx_select_se_sh(adev, se, sh, cu);
@@ -726,6 +817,9 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
 	amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
 	mutex_unlock(&adev->grbm_idx_mutex);
 
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	while (size) {
 		uint32_t value;
 
@@ -859,6 +953,10 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
 	struct amdgpu_device *adev = dev->dev_private;
 	int r = 0, i;
 
+	r = pm_runtime_get_sync(dev->dev);
+	if (r < 0)
+		return r;
+
 	/* Avoid accidently unparking the sched thread during GPU reset */
 	mutex_lock(&adev->lock_reset);
 
@@ -889,6 +987,9 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
 
 	mutex_unlock(&adev->lock_reset);
 
+	pm_runtime_mark_last_busy(dev->dev);
+	pm_runtime_put_autosuspend(dev->dev);
+
 	return 0;
 }
 
@@ -907,8 +1008,17 @@ static int amdgpu_debugfs_evict_vram(struct seq_file *m, void *data)
 	struct drm_info_node *node = (struct drm_info_node *)m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct amdgpu_device *adev = dev->dev_private;
+	int r;
+
+	r = pm_runtime_get_sync(dev->dev);
+	if (r < 0)
+		return r;
 
 	seq_printf(m, "(%d)\n", amdgpu_bo_evict_vram(adev));
+
+	pm_runtime_mark_last_busy(dev->dev);
+	pm_runtime_put_autosuspend(dev->dev);
+
 	return 0;
 }
 
@@ -917,8 +1027,17 @@ static int amdgpu_debugfs_evict_gtt(struct seq_file *m, void *data)
 	struct drm_info_node *node = (struct drm_info_node *)m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct amdgpu_device *adev = dev->dev_private;
+	int r;
+
+	r = pm_runtime_get_sync(dev->dev);
+	if (r < 0)
+		return r;
 
 	seq_printf(m, "(%d)\n", ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_TT));
+
+	pm_runtime_mark_last_busy(dev->dev);
+	pm_runtime_put_autosuspend(dev->dev);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 9b4c18b3546f..53d882000101 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2345,14 +2345,7 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
 		adev->ip_blocks[i].status.hw = false;
 		/* handle putting the SMC in the appropriate state */
 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
-			if (is_support_sw_smu(adev)) {
-				r = smu_set_mp1_state(&adev->smu, adev->mp1_state);
-			} else if (adev->powerplay.pp_funcs &&
-					   adev->powerplay.pp_funcs->set_mp1_state) {
-				r = adev->powerplay.pp_funcs->set_mp1_state(
-					adev->powerplay.pp_handle,
-					adev->mp1_state);
-			}
+			r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
 			if (r) {
 				DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
 					  adev->mp1_state, r);
@@ -2855,6 +2848,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	hash_init(adev->mn_hash);
 	mutex_init(&adev->lock_reset);
 	mutex_init(&adev->psp.mutex);
+	mutex_init(&adev->notifier_lock);
 
 	r = amdgpu_device_check_arguments(adev);
 	if (r)
@@ -3765,6 +3759,7 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
 		case CHIP_VEGA10:
 		case CHIP_VEGA12:
 		case CHIP_RAVEN:
+		case CHIP_ARCTURUS:
 			break;
 		default:
 			goto disabled;
@@ -4359,55 +4354,21 @@ int amdgpu_device_baco_enter(struct drm_device *dev)
 	if (ras && ras->supported)
 		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
 
-	if (is_support_sw_smu(adev)) {
-		struct smu_context *smu = &adev->smu;
-		int ret;
-
-		ret = smu_baco_enter(smu);
-		if (ret)
-			return ret;
-	} else {
-		void *pp_handle = adev->powerplay.pp_handle;
-		const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
-
-		if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state)
-			return -ENOENT;
-
-		/* enter BACO state */
-		if (pp_funcs->set_asic_baco_state(pp_handle, 1))
-			return -EIO;
-	}
-
-	return 0;
+	return amdgpu_dpm_baco_enter(adev);
 }
 
 int amdgpu_device_baco_exit(struct drm_device *dev)
 {
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+	int ret = 0;
 
 	if (!amdgpu_device_supports_baco(adev->ddev))
 		return -ENOTSUPP;
 
-	if (is_support_sw_smu(adev)) {
-		struct smu_context *smu = &adev->smu;
-		int ret;
-
-		ret = smu_baco_exit(smu);
-		if (ret)
-			return ret;
-
-	} else {
-		void *pp_handle = adev->powerplay.pp_handle;
-		const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
-
-		if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state)
-			return -ENOENT;
-
-		/* exit BACO state */
-		if (pp_funcs->set_asic_baco_state(pp_handle, 0))
-			return -EIO;
-	}
+	ret = amdgpu_dpm_baco_exit(adev);
+	if (ret)
+		return ret;
 
 	if (ras && ras->supported)
 		adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
new file mode 100644
index 000000000000..61a26c15c8dd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_DF_H__
+#define __AMDGPU_DF_H__
+
+struct amdgpu_df_hash_status {
+	bool hash_64k;
+	bool hash_2m;
+	bool hash_1g;
+};
+
+struct amdgpu_df_funcs {
+	void (*sw_init)(struct amdgpu_device *adev);
+	void (*sw_fini)(struct amdgpu_device *adev);
+	void (*enable_broadcast_mode)(struct amdgpu_device *adev,
+				      bool enable);
+	u32 (*get_fb_channel_number)(struct amdgpu_device *adev);
+	u32 (*get_hbm_channel_number)(struct amdgpu_device *adev);
+	void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev,
+						 bool enable);
+	void (*get_clockgating_state)(struct amdgpu_device *adev,
+				      u32 *flags);
+	void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev,
+					    bool enable);
+	int (*pmc_start)(struct amdgpu_device *adev, uint64_t config,
+					 int is_enable);
+	int (*pmc_stop)(struct amdgpu_device *adev, uint64_t config,
+					 int is_disable);
+	void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config,
+					 uint64_t *count);
+	uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val);
+	void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val,
+			 uint32_t ficadl_val, uint32_t ficadh_val);
+};
+
+struct amdgpu_df {
+	struct amdgpu_df_hash_status	hash_status;
+	const struct amdgpu_df_funcs	*funcs;
+};
+
+#endif /* __AMDGPU_DF_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 4e699071d144..6d520a3eec40 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -513,13 +513,23 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
 	 * will not allow USWC mappings.
 	 * Also, don't allow GTT domain if the BO doens't have USWC falg set.
 	 */
-	if (adev->asic_type >= CHIP_CARRIZO &&
-	    adev->asic_type < CHIP_RAVEN &&
-	    (adev->flags & AMD_IS_APU) &&
-	    (bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) &&
+	if ((bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) &&
 	    amdgpu_bo_support_uswc(bo_flags) &&
-	    amdgpu_device_asic_has_dc_support(adev->asic_type))
-		domain |= AMDGPU_GEM_DOMAIN_GTT;
+	    amdgpu_device_asic_has_dc_support(adev->asic_type)) {
+		switch (adev->asic_type) {
+		case CHIP_CARRIZO:
+		case CHIP_STONEY:
+			domain |= AMDGPU_GEM_DOMAIN_GTT;
+			break;
+		case CHIP_RAVEN:
+			/* enable S/G on PCO and RV2 */
+			if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
+				domain |= AMDGPU_GEM_DOMAIN_GTT;
+			break;
+		default:
+			break;
+		}
+	}
 #endif
 
 	return domain;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
index cd76fbf4385d..a2e8c3dfb4f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
@@ -946,23 +946,54 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block
 	bool swsmu = is_support_sw_smu(adev);
 
 	switch (block_type) {
-	case AMD_IP_BLOCK_TYPE_GFX:
 	case AMD_IP_BLOCK_TYPE_UVD:
-	case AMD_IP_BLOCK_TYPE_VCN:
 	case AMD_IP_BLOCK_TYPE_VCE:
-	case AMD_IP_BLOCK_TYPE_SDMA:
 		if (swsmu) {
 			ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate);
-		} else {
-			if (adev->powerplay.pp_funcs &&
-			    adev->powerplay.pp_funcs->set_powergating_by_smu) {
-				mutex_lock(&adev->pm.mutex);
-				ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu(
-					(adev)->powerplay.pp_handle, block_type, gate));
-				mutex_unlock(&adev->pm.mutex);
-			}
+		} else if (adev->powerplay.pp_funcs &&
+			   adev->powerplay.pp_funcs->set_powergating_by_smu) {
+			/*
+			 * TODO: need a better lock mechanism
+			 *
+			 * Here adev->pm.mutex lock protection is enforced on
+			 * UVD and VCE cases only. Since for other cases, there
+			 * may be already lock protection in amdgpu_pm.c.
+			 * This is a quick fix for the deadlock issue below.
+			 *     NFO: task ocltst:2028 blocked for more than 120 seconds.
+			 *     Tainted: G           OE     5.0.0-37-generic #40~18.04.1-Ubuntu
+			 *     echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+			 *     cltst          D    0  2028   2026 0x00000000
+			 *     all Trace:
+			 *     __schedule+0x2c0/0x870
+			 *     schedule+0x2c/0x70
+			 *     schedule_preempt_disabled+0xe/0x10
+			 *     __mutex_lock.isra.9+0x26d/0x4e0
+			 *     __mutex_lock_slowpath+0x13/0x20
+			 *     ? __mutex_lock_slowpath+0x13/0x20
+			 *     mutex_lock+0x2f/0x40
+			 *     amdgpu_dpm_set_powergating_by_smu+0x64/0xe0 [amdgpu]
+			 *     gfx_v8_0_enable_gfx_static_mg_power_gating+0x3c/0x70 [amdgpu]
+			 *     gfx_v8_0_set_powergating_state+0x66/0x260 [amdgpu]
+			 *     amdgpu_device_ip_set_powergating_state+0x62/0xb0 [amdgpu]
+			 *     pp_dpm_force_performance_level+0xe7/0x100 [amdgpu]
+			 *     amdgpu_set_dpm_forced_performance_level+0x129/0x330 [amdgpu]
+			 */
+			mutex_lock(&adev->pm.mutex);
+			ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu(
+				(adev)->powerplay.pp_handle, block_type, gate));
+			mutex_unlock(&adev->pm.mutex);
 		}
 		break;
+	case AMD_IP_BLOCK_TYPE_GFX:
+	case AMD_IP_BLOCK_TYPE_VCN:
+	case AMD_IP_BLOCK_TYPE_SDMA:
+		if (swsmu)
+			ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate);
+		else if (adev->powerplay.pp_funcs &&
+			 adev->powerplay.pp_funcs->set_powergating_by_smu)
+			ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu(
+				(adev)->powerplay.pp_handle, block_type, gate));
+		break;
 	case AMD_IP_BLOCK_TYPE_JPEG:
 		if (swsmu)
 			ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate);
@@ -970,12 +1001,9 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block
 	case AMD_IP_BLOCK_TYPE_GMC:
 	case AMD_IP_BLOCK_TYPE_ACP:
 		if (adev->powerplay.pp_funcs &&
-		    adev->powerplay.pp_funcs->set_powergating_by_smu) {
-			mutex_lock(&adev->pm.mutex);
+		    adev->powerplay.pp_funcs->set_powergating_by_smu)
 			ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu(
 				(adev)->powerplay.pp_handle, block_type, gate));
-			mutex_unlock(&adev->pm.mutex);
-		}
 		break;
 	default:
 		break;
@@ -983,3 +1011,163 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block
 
 	return ret;
 }
+
+int amdgpu_dpm_baco_enter(struct amdgpu_device *adev)
+{
+	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+	void *pp_handle = adev->powerplay.pp_handle;
+	struct smu_context *smu = &adev->smu;
+	int ret = 0;
+
+	if (is_support_sw_smu(adev)) {
+		ret = smu_baco_enter(smu);
+	} else {
+		if (!pp_funcs || !pp_funcs->set_asic_baco_state)
+			return -ENOENT;
+
+		/* enter BACO state */
+		ret = pp_funcs->set_asic_baco_state(pp_handle, 1);
+	}
+
+	return ret;
+}
+
+int amdgpu_dpm_baco_exit(struct amdgpu_device *adev)
+{
+	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+	void *pp_handle = adev->powerplay.pp_handle;
+	struct smu_context *smu = &adev->smu;
+	int ret = 0;
+
+	if (is_support_sw_smu(adev)) {
+		ret = smu_baco_exit(smu);
+	} else {
+		if (!pp_funcs || !pp_funcs->set_asic_baco_state)
+			return -ENOENT;
+
+		/* exit BACO state */
+		ret = pp_funcs->set_asic_baco_state(pp_handle, 0);
+	}
+
+	return ret;
+}
+
+int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev,
+			     enum pp_mp1_state mp1_state)
+{
+	int ret = 0;
+
+	if (is_support_sw_smu(adev)) {
+		ret = smu_set_mp1_state(&adev->smu, mp1_state);
+	} else if (adev->powerplay.pp_funcs &&
+		   adev->powerplay.pp_funcs->set_mp1_state) {
+		ret = adev->powerplay.pp_funcs->set_mp1_state(
+				adev->powerplay.pp_handle,
+				mp1_state);
+	}
+
+	return ret;
+}
+
+bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev)
+{
+	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+	void *pp_handle = adev->powerplay.pp_handle;
+	struct smu_context *smu = &adev->smu;
+	bool baco_cap;
+
+	if (is_support_sw_smu(adev)) {
+		return smu_baco_is_support(smu);
+	} else {
+		if (!pp_funcs || !pp_funcs->get_asic_baco_capability)
+			return false;
+
+		if (pp_funcs->get_asic_baco_capability(pp_handle, &baco_cap))
+			return false;
+
+		return baco_cap ? true : false;
+	}
+}
+
+int amdgpu_dpm_mode2_reset(struct amdgpu_device *adev)
+{
+	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+	void *pp_handle = adev->powerplay.pp_handle;
+	struct smu_context *smu = &adev->smu;
+
+	if (is_support_sw_smu(adev)) {
+		return smu_mode2_reset(smu);
+	} else {
+		if (!pp_funcs || !pp_funcs->asic_reset_mode_2)
+			return -ENOENT;
+
+		return pp_funcs->asic_reset_mode_2(pp_handle);
+	}
+}
+
+int amdgpu_dpm_baco_reset(struct amdgpu_device *adev)
+{
+	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+	void *pp_handle = adev->powerplay.pp_handle;
+	struct smu_context *smu = &adev->smu;
+	int ret = 0;
+
+	dev_info(adev->dev, "GPU BACO reset\n");
+
+	if (is_support_sw_smu(adev)) {
+		ret = smu_baco_enter(smu);
+		if (ret)
+			return ret;
+
+		ret = smu_baco_exit(smu);
+		if (ret)
+			return ret;
+	} else {
+		if (!pp_funcs
+		    || !pp_funcs->set_asic_baco_state)
+			return -ENOENT;
+
+		/* enter BACO state */
+		ret = pp_funcs->set_asic_baco_state(pp_handle, 1);
+		if (ret)
+			return ret;
+
+		/* exit BACO state */
+		ret = pp_funcs->set_asic_baco_state(pp_handle, 0);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int amdgpu_dpm_switch_power_profile(struct amdgpu_device *adev,
+				    enum PP_SMC_POWER_PROFILE type,
+				    bool en)
+{
+	int ret = 0;
+
+	if (is_support_sw_smu(adev))
+		ret = smu_switch_power_profile(&adev->smu, type, en);
+	else if (adev->powerplay.pp_funcs &&
+		 adev->powerplay.pp_funcs->switch_power_profile)
+		ret = adev->powerplay.pp_funcs->switch_power_profile(
+			adev->powerplay.pp_handle, type, en);
+
+	return ret;
+}
+
+int amdgpu_dpm_set_xgmi_pstate(struct amdgpu_device *adev,
+			       uint32_t pstate)
+{
+	int ret = 0;
+
+	if (is_support_sw_smu_xgmi(adev))
+		ret = smu_set_xgmi_pstate(&adev->smu, pstate);
+	else if (adev->powerplay.pp_funcs &&
+		 adev->powerplay.pp_funcs->set_xgmi_pstate)
+		ret = adev->powerplay.pp_funcs->set_xgmi_pstate(adev->powerplay.pp_handle,
+								pstate);
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
index 2cfb677272af..902ca6c00cca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@@ -341,10 +341,6 @@ enum amdgpu_pcie_gen {
 		((adev)->powerplay.pp_funcs->reset_power_profile_state(\
 			(adev)->powerplay.pp_handle, request))
 
-#define amdgpu_dpm_switch_power_profile(adev, type, en) \
-		((adev)->powerplay.pp_funcs->switch_power_profile(\
-			(adev)->powerplay.pp_handle, type, en))
-
 #define amdgpu_dpm_set_clockgating_by_smu(adev, msg_id) \
 		((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\
 			(adev)->powerplay.pp_handle, msg_id))
@@ -517,4 +513,24 @@ extern int amdgpu_dpm_get_sclk(struct amdgpu_device *adev, bool low);
 
 extern int amdgpu_dpm_get_mclk(struct amdgpu_device *adev, bool low);
 
+int amdgpu_dpm_set_xgmi_pstate(struct amdgpu_device *adev,
+			       uint32_t pstate);
+
+int amdgpu_dpm_switch_power_profile(struct amdgpu_device *adev,
+				    enum PP_SMC_POWER_PROFILE type,
+				    bool en);
+
+int amdgpu_dpm_baco_reset(struct amdgpu_device *adev);
+
+int amdgpu_dpm_mode2_reset(struct amdgpu_device *adev);
+
+bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev);
+
+int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev,
+			     enum pp_mp1_state mp1_state);
+
+int amdgpu_dpm_baco_exit(struct amdgpu_device *adev);
+
+int amdgpu_dpm_baco_enter(struct amdgpu_device *adev);
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index e9efee04ca23..3c01252b1e0e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -741,10 +741,18 @@ static int amdgpu_debugfs_gpu_recover(struct seq_file *m, void *data)
 	struct drm_info_node *node = (struct drm_info_node *) m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct amdgpu_device *adev = dev->dev_private;
+	int r;
+
+	r = pm_runtime_get_sync(dev->dev);
+	if (r < 0)
+		return 0;
 
 	seq_printf(m, "gpu recover\n");
 	amdgpu_device_gpu_recover(adev, NULL);
 
+	pm_runtime_mark_last_busy(dev->dev);
+	pm_runtime_put_autosuspend(dev->dev);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index db7b2b3f9966..b88b8b82bb64 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -543,12 +543,6 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
 	if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
 		return;
 
-	if (!is_support_sw_smu(adev) &&
-	    (!adev->powerplay.pp_funcs ||
-	     !adev->powerplay.pp_funcs->set_powergating_by_smu))
-		return;
-
-
 	mutex_lock(&adev->gfx.gfx_off_mutex);
 
 	if (!enable)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 8e88e0411662..af4bd279f42f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -76,11 +76,15 @@ struct kiq_pm4_funcs {
 					struct amdgpu_ring *ring,
 					u64 addr,
 					u64 seq);
+	void (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring,
+				uint16_t pasid, uint32_t flush_type,
+				bool all_hub);
 	/* Packet sizes */
 	int set_resources_size;
 	int map_queues_size;
 	int unmap_queues_size;
 	int query_status_size;
+	int invalidate_tlbs_size;
 };
 
 struct amdgpu_kiq {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index c91dd602d5f1..86267baca07c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -60,6 +60,11 @@
  */
 #define AMDGPU_GMC_FAULT_TIMEOUT	5000ULL
 
+/*
+ * Default stolen memory size, 1024 * 768 * 4
+ */
+#define AMDGPU_STOLEN_BIST_TRAINING_DEFAULT_SIZE	0x300000ULL
+
 struct firmware;
 
 /*
@@ -92,6 +97,9 @@ struct amdgpu_gmc_funcs {
 	/* flush the vm tlb via mmio */
 	void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid,
 				uint32_t vmhub, uint32_t flush_type);
+	/* flush the vm tlb via pasid */
+	int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
+					uint32_t flush_type, bool all_hub);
 	/* flush the vm tlb via ring */
 	uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
 				       uint64_t pd_addr);
@@ -216,6 +224,9 @@ struct amdgpu_gmc {
 };
 
 #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
+#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \
+	((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
+	((adev), (pasid), (type), (allhub)))
 #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
 #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
 #define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 73328d0c741d..d42be880a236 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -153,7 +153,6 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
 	if (r)
 		return r;
 
-	job->owner = owner;
 	*f = dma_fence_get(&job->base.s_fence->finished);
 	amdgpu_job_free_resources(job);
 	priority = job->base.s_priority;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index dc7ee9358dcd..3f7b8433d179 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -49,7 +49,6 @@ struct amdgpu_job {
 	uint32_t		preamble_status;
 	uint32_t                preemption_status;
 	uint32_t		num_ibs;
-	void			*owner;
 	bool                    vm_needs_flush;
 	uint64_t		vm_pd_addr;
 	unsigned		vmid;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 285d460624c8..b03b1eb7ba04 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -37,6 +37,7 @@
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
 #include <linux/nospec.h>
+#include <linux/pm_runtime.h>
 #include "hwmgr.h"
 #define WIDTH_4K 3840
 
@@ -158,10 +159,15 @@ static ssize_t amdgpu_get_dpm_state(struct device *dev,
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 	enum amd_pm_state_type pm;
+	int ret;
 
 	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
 		return 0;
 
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev)) {
 		if (adev->smu.ppt_funcs->get_current_power_state)
 			pm = smu_get_current_power_state(&adev->smu);
@@ -173,6 +179,9 @@ static ssize_t amdgpu_get_dpm_state(struct device *dev,
 		pm = adev->pm.dpm.user_state;
 	}
 
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
 	return snprintf(buf, PAGE_SIZE, "%s\n",
 			(pm == POWER_STATE_TYPE_BATTERY) ? "battery" :
 			(pm == POWER_STATE_TYPE_BALANCED) ? "balanced" : "performance");
@@ -186,6 +195,7 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev,
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 	enum amd_pm_state_type  state;
+	int ret;
 
 	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
 		return -EINVAL;
@@ -196,10 +206,12 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev,
 		state = POWER_STATE_TYPE_BALANCED;
 	else if (strncmp("performance", buf, strlen("performance")) == 0)
 		state = POWER_STATE_TYPE_PERFORMANCE;
-	else {
-		count = -EINVAL;
-		goto fail;
-	}
+	else
+		return -EINVAL;
+
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
 
 	if (is_support_sw_smu(adev)) {
 		mutex_lock(&adev->pm.mutex);
@@ -212,12 +224,11 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev,
 		adev->pm.dpm.user_state = state;
 		mutex_unlock(&adev->pm.mutex);
 
-		/* Can't set dpm state when the card is off */
-		if (!(adev->flags & AMD_IS_PX) ||
-		    (ddev->switch_power_state == DRM_SWITCH_POWER_ON))
-			amdgpu_pm_compute_clocks(adev);
+		amdgpu_pm_compute_clocks(adev);
 	}
-fail:
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
 	return count;
 }
 
@@ -288,13 +299,14 @@ static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev,
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 	enum amd_dpm_forced_level level = 0xff;
+	int ret;
 
 	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
 		return 0;
 
-	if ((adev->flags & AMD_IS_PX) &&
-	    (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
-		return snprintf(buf, PAGE_SIZE, "off\n");
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
 
 	if (is_support_sw_smu(adev))
 		level = smu_get_performance_level(&adev->smu);
@@ -303,6 +315,9 @@ static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev,
 	else
 		level = adev->pm.dpm.forced_level;
 
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
 	return snprintf(buf, PAGE_SIZE, "%s\n",
 			(level == AMD_DPM_FORCED_LEVEL_AUTO) ? "auto" :
 			(level == AMD_DPM_FORCED_LEVEL_LOW) ? "low" :
@@ -329,11 +344,6 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
 	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
 		return -EINVAL;
 
-	/* Can't force performance level when the card is off */
-	if  ((adev->flags & AMD_IS_PX) &&
-	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
-		return -EINVAL;
-
 	if (strncmp("low", buf, strlen("low")) == 0) {
 		level = AMD_DPM_FORCED_LEVEL_LOW;
 	} else if (strncmp("high", buf, strlen("high")) == 0) {
@@ -353,17 +363,23 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
 	} else if (strncmp("profile_peak", buf, strlen("profile_peak")) == 0) {
 		level = AMD_DPM_FORCED_LEVEL_PROFILE_PEAK;
 	}  else {
-		count = -EINVAL;
-		goto fail;
+		return -EINVAL;
 	}
 
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev))
 		current_level = smu_get_performance_level(&adev->smu);
 	else if (adev->powerplay.pp_funcs->get_performance_level)
 		current_level = amdgpu_dpm_get_performance_level(adev);
 
-	if (current_level == level)
+	if (current_level == level) {
+		pm_runtime_mark_last_busy(ddev->dev);
+		pm_runtime_put_autosuspend(ddev->dev);
 		return count;
+	}
 
 	/* profile_exit setting is valid only when current mode is in profile mode */
 	if (!(current_level & (AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD |
@@ -372,29 +388,40 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
 	    AMD_DPM_FORCED_LEVEL_PROFILE_PEAK)) &&
 	    (level == AMD_DPM_FORCED_LEVEL_PROFILE_EXIT)) {
 		pr_err("Currently not in any profile mode!\n");
+		pm_runtime_mark_last_busy(ddev->dev);
+		pm_runtime_put_autosuspend(ddev->dev);
 		return -EINVAL;
 	}
 
 	if (is_support_sw_smu(adev)) {
 		ret = smu_force_performance_level(&adev->smu, level);
-		if (ret)
-			count = -EINVAL;
+		if (ret) {
+			pm_runtime_mark_last_busy(ddev->dev);
+			pm_runtime_put_autosuspend(ddev->dev);
+			return -EINVAL;
+		}
 	} else if (adev->powerplay.pp_funcs->force_performance_level) {
 		mutex_lock(&adev->pm.mutex);
 		if (adev->pm.dpm.thermal_active) {
-			count = -EINVAL;
 			mutex_unlock(&adev->pm.mutex);
-			goto fail;
+			pm_runtime_mark_last_busy(ddev->dev);
+			pm_runtime_put_autosuspend(ddev->dev);
+			return -EINVAL;
 		}
 		ret = amdgpu_dpm_force_performance_level(adev, level);
-		if (ret)
-			count = -EINVAL;
-		else
+		if (ret) {
+			mutex_unlock(&adev->pm.mutex);
+			pm_runtime_mark_last_busy(ddev->dev);
+			pm_runtime_put_autosuspend(ddev->dev);
+			return -EINVAL;
+		} else {
 			adev->pm.dpm.forced_level = level;
+		}
 		mutex_unlock(&adev->pm.mutex);
 	}
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
 
-fail:
 	return count;
 }
 
@@ -407,6 +434,10 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev,
 	struct pp_states_info data;
 	int i, buf_len, ret;
 
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev)) {
 		ret = smu_get_power_num_states(&adev->smu, &data);
 		if (ret)
@@ -414,6 +445,9 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev,
 	} else if (adev->powerplay.pp_funcs->get_pp_num_states)
 		amdgpu_dpm_get_pp_num_states(adev, &data);
 
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
 	buf_len = snprintf(buf, PAGE_SIZE, "states: %d\n", data.nums);
 	for (i = 0; i < data.nums; i++)
 		buf_len += snprintf(buf + buf_len, PAGE_SIZE, "%d %s\n", i,
@@ -439,6 +473,10 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev,
 	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
 		return 0;
 
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev)) {
 		pm = smu_get_current_power_state(smu);
 		ret = smu_get_power_num_states(smu, &data);
@@ -450,6 +488,9 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev,
 		amdgpu_dpm_get_pp_num_states(adev, &data);
 	}
 
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
 	for (i = 0; i < data.nums; i++) {
 		if (pm == data.states[i])
 			break;
@@ -500,14 +541,18 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
 		struct pp_states_info data;
 
 		ret = kstrtoul(buf, 0, &idx);
-		if (ret || idx >= ARRAY_SIZE(data.states)) {
-			count = -EINVAL;
-			goto fail;
-		}
+		if (ret || idx >= ARRAY_SIZE(data.states))
+			return -EINVAL;
+
 		idx = array_index_nospec(idx, ARRAY_SIZE(data.states));
 
 		amdgpu_dpm_get_pp_num_states(adev, &data);
 		state = data.states[idx];
+
+		ret = pm_runtime_get_sync(ddev->dev);
+		if (ret < 0)
+			return ret;
+
 		/* only set user selected power states */
 		if (state != POWER_STATE_TYPE_INTERNAL_BOOT &&
 		    state != POWER_STATE_TYPE_DEFAULT) {
@@ -515,8 +560,10 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
 					AMD_PP_TASK_ENABLE_USER_STATE, &state);
 			adev->pp_force_state_enabled = true;
 		}
+		pm_runtime_mark_last_busy(ddev->dev);
+		pm_runtime_put_autosuspend(ddev->dev);
 	}
-fail:
+
 	return count;
 }
 
@@ -538,20 +585,32 @@ static ssize_t amdgpu_get_pp_table(struct device *dev,
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 	char *table = NULL;
-	int size;
+	int size, ret;
 
 	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
 		return 0;
 
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev)) {
 		size = smu_sys_get_pp_table(&adev->smu, (void **)&table);
+		pm_runtime_mark_last_busy(ddev->dev);
+		pm_runtime_put_autosuspend(ddev->dev);
 		if (size < 0)
 			return size;
-	}
-	else if (adev->powerplay.pp_funcs->get_pp_table)
+	} else if (adev->powerplay.pp_funcs->get_pp_table) {
 		size = amdgpu_dpm_get_pp_table(adev, &table);
-	else
+		pm_runtime_mark_last_busy(ddev->dev);
+		pm_runtime_put_autosuspend(ddev->dev);
+		if (size < 0)
+			return size;
+	} else {
+		pm_runtime_mark_last_busy(ddev->dev);
+		pm_runtime_put_autosuspend(ddev->dev);
 		return 0;
+	}
 
 	if (size >= PAGE_SIZE)
 		size = PAGE_SIZE - 1;
@@ -573,13 +632,23 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
 	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
 		return -EINVAL;
 
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev)) {
 		ret = smu_sys_set_pp_table(&adev->smu, (void *)buf, count);
-		if (ret)
+		if (ret) {
+			pm_runtime_mark_last_busy(ddev->dev);
+			pm_runtime_put_autosuspend(ddev->dev);
 			return ret;
+		}
 	} else if (adev->powerplay.pp_funcs->set_pp_table)
 		amdgpu_dpm_set_pp_table(adev, buf, count);
 
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
 	return count;
 }
 
@@ -703,18 +772,28 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
 			tmp_str++;
 	}
 
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev)) {
 		ret = smu_od_edit_dpm_table(&adev->smu, type,
 					    parameter, parameter_size);
 
-		if (ret)
+		if (ret) {
+			pm_runtime_mark_last_busy(ddev->dev);
+			pm_runtime_put_autosuspend(ddev->dev);
 			return -EINVAL;
+		}
 	} else {
 		if (adev->powerplay.pp_funcs->odn_edit_dpm_table) {
 			ret = amdgpu_dpm_odn_edit_dpm_table(adev, type,
 						parameter, parameter_size);
-			if (ret)
+			if (ret) {
+				pm_runtime_mark_last_busy(ddev->dev);
+				pm_runtime_put_autosuspend(ddev->dev);
 				return -EINVAL;
+			}
 		}
 
 		if (type == PP_OD_COMMIT_DPM_TABLE) {
@@ -722,12 +801,18 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
 				amdgpu_dpm_dispatch_task(adev,
 						AMD_PP_TASK_READJUST_POWER_STATE,
 						NULL);
+				pm_runtime_mark_last_busy(ddev->dev);
+				pm_runtime_put_autosuspend(ddev->dev);
 				return count;
 			} else {
+				pm_runtime_mark_last_busy(ddev->dev);
+				pm_runtime_put_autosuspend(ddev->dev);
 				return -EINVAL;
 			}
 		}
 	}
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
 
 	return count;
 }
@@ -738,27 +823,33 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
 {
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
-	uint32_t size = 0;
+	ssize_t size;
+	int ret;
 
 	if (amdgpu_sriov_vf(adev))
 		return 0;
 
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev)) {
 		size = smu_print_clk_levels(&adev->smu, SMU_OD_SCLK, buf);
 		size += smu_print_clk_levels(&adev->smu, SMU_OD_MCLK, buf+size);
 		size += smu_print_clk_levels(&adev->smu, SMU_OD_VDDC_CURVE, buf+size);
 		size += smu_print_clk_levels(&adev->smu, SMU_OD_RANGE, buf+size);
-		return size;
 	} else if (adev->powerplay.pp_funcs->print_clock_levels) {
 		size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf);
 		size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size);
 		size += amdgpu_dpm_print_clock_levels(adev, OD_VDDC_CURVE, buf+size);
 		size += amdgpu_dpm_print_clock_levels(adev, OD_RANGE, buf+size);
-		return size;
 	} else {
-		return snprintf(buf, PAGE_SIZE, "\n");
+		size = snprintf(buf, PAGE_SIZE, "\n");
 	}
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
 
+	return size;
 }
 
 /**
@@ -796,15 +887,27 @@ static ssize_t amdgpu_set_pp_feature_status(struct device *dev,
 
 	pr_debug("featuremask = 0x%llx\n", featuremask);
 
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev)) {
 		ret = smu_sys_set_pp_feature_mask(&adev->smu, featuremask);
-		if (ret)
+		if (ret) {
+			pm_runtime_mark_last_busy(ddev->dev);
+			pm_runtime_put_autosuspend(ddev->dev);
 			return -EINVAL;
+		}
 	} else if (adev->powerplay.pp_funcs->set_ppfeature_status) {
 		ret = amdgpu_dpm_set_ppfeature_status(adev, featuremask);
-		if (ret)
+		if (ret) {
+			pm_runtime_mark_last_busy(ddev->dev);
+			pm_runtime_put_autosuspend(ddev->dev);
 			return -EINVAL;
+		}
 	}
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
 
 	return count;
 }
@@ -815,16 +918,27 @@ static ssize_t amdgpu_get_pp_feature_status(struct device *dev,
 {
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
+	ssize_t size;
+	int ret;
 
 	if (amdgpu_sriov_vf(adev))
 		return 0;
 
-	if (is_support_sw_smu(adev)) {
-		return smu_sys_get_pp_feature_mask(&adev->smu, buf);
-	} else if (adev->powerplay.pp_funcs->get_ppfeature_status)
-		return amdgpu_dpm_get_ppfeature_status(adev, buf);
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
 
-	return snprintf(buf, PAGE_SIZE, "\n");
+	if (is_support_sw_smu(adev))
+		size = smu_sys_get_pp_feature_mask(&adev->smu, buf);
+	else if (adev->powerplay.pp_funcs->get_ppfeature_status)
+		size = amdgpu_dpm_get_ppfeature_status(adev, buf);
+	else
+		size = snprintf(buf, PAGE_SIZE, "\n");
+
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
+	return size;
 }
 
 /**
@@ -863,16 +977,27 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
 {
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
+	ssize_t size;
+	int ret;
 
 	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
 		return 0;
 
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev))
-		return smu_print_clk_levels(&adev->smu, SMU_SCLK, buf);
+		size = smu_print_clk_levels(&adev->smu, SMU_SCLK, buf);
 	else if (adev->powerplay.pp_funcs->print_clock_levels)
-		return amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf);
+		size = amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf);
 	else
-		return snprintf(buf, PAGE_SIZE, "\n");
+		size = snprintf(buf, PAGE_SIZE, "\n");
+
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
+	return size;
 }
 
 /*
@@ -928,11 +1053,18 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
 	if (ret)
 		return ret;
 
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev))
 		ret = smu_force_clk_levels(&adev->smu, SMU_SCLK, mask, true);
 	else if (adev->powerplay.pp_funcs->force_clock_level)
 		ret = amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask);
 
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
 	if (ret)
 		return -EINVAL;
 
@@ -945,16 +1077,27 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev,
 {
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
+	ssize_t size;
+	int ret;
 
 	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
 		return 0;
 
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev))
-		return smu_print_clk_levels(&adev->smu, SMU_MCLK, buf);
+		size = smu_print_clk_levels(&adev->smu, SMU_MCLK, buf);
 	else if (adev->powerplay.pp_funcs->print_clock_levels)
-		return amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf);
+		size = amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf);
 	else
-		return snprintf(buf, PAGE_SIZE, "\n");
+		size = snprintf(buf, PAGE_SIZE, "\n");
+
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
+	return size;
 }
 
 static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
@@ -964,8 +1107,8 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
 {
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
-	int ret;
 	uint32_t mask = 0;
+	int ret;
 
 	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
 			return -EINVAL;
@@ -974,11 +1117,18 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
 	if (ret)
 		return ret;
 
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev))
 		ret = smu_force_clk_levels(&adev->smu, SMU_MCLK, mask, true);
 	else if (adev->powerplay.pp_funcs->force_clock_level)
 		ret = amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask);
 
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
 	if (ret)
 		return -EINVAL;
 
@@ -991,16 +1141,27 @@ static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev,
 {
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
+	ssize_t size;
+	int ret;
 
 	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
 		return 0;
 
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev))
-		return smu_print_clk_levels(&adev->smu, SMU_SOCCLK, buf);
+		size = smu_print_clk_levels(&adev->smu, SMU_SOCCLK, buf);
 	else if (adev->powerplay.pp_funcs->print_clock_levels)
-		return amdgpu_dpm_print_clock_levels(adev, PP_SOCCLK, buf);
+		size = amdgpu_dpm_print_clock_levels(adev, PP_SOCCLK, buf);
 	else
-		return snprintf(buf, PAGE_SIZE, "\n");
+		size = snprintf(buf, PAGE_SIZE, "\n");
+
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
+	return size;
 }
 
 static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev,
@@ -1020,10 +1181,19 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev,
 	if (ret)
 		return ret;
 
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev))
 		ret = smu_force_clk_levels(&adev->smu, SMU_SOCCLK, mask, true);
 	else if (adev->powerplay.pp_funcs->force_clock_level)
 		ret = amdgpu_dpm_force_clock_level(adev, PP_SOCCLK, mask);
+	else
+		ret = 0;
+
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
 
 	if (ret)
 		return -EINVAL;
@@ -1037,16 +1207,27 @@ static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev,
 {
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
+	ssize_t size;
+	int ret;
 
 	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
 		return 0;
 
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev))
-		return smu_print_clk_levels(&adev->smu, SMU_FCLK, buf);
+		size = smu_print_clk_levels(&adev->smu, SMU_FCLK, buf);
 	else if (adev->powerplay.pp_funcs->print_clock_levels)
-		return amdgpu_dpm_print_clock_levels(adev, PP_FCLK, buf);
+		size = amdgpu_dpm_print_clock_levels(adev, PP_FCLK, buf);
 	else
-		return snprintf(buf, PAGE_SIZE, "\n");
+		size = snprintf(buf, PAGE_SIZE, "\n");
+
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
+	return size;
 }
 
 static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev,
@@ -1066,10 +1247,19 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev,
 	if (ret)
 		return ret;
 
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev))
 		ret = smu_force_clk_levels(&adev->smu, SMU_FCLK, mask, true);
 	else if (adev->powerplay.pp_funcs->force_clock_level)
 		ret = amdgpu_dpm_force_clock_level(adev, PP_FCLK, mask);
+	else
+		ret = 0;
+
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
 
 	if (ret)
 		return -EINVAL;
@@ -1083,16 +1273,27 @@ static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev,
 {
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
+	ssize_t size;
+	int ret;
 
 	if (amdgpu_sriov_vf(adev))
 		return 0;
 
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev))
-		return smu_print_clk_levels(&adev->smu, SMU_DCEFCLK, buf);
+		size = smu_print_clk_levels(&adev->smu, SMU_DCEFCLK, buf);
 	else if (adev->powerplay.pp_funcs->print_clock_levels)
-		return amdgpu_dpm_print_clock_levels(adev, PP_DCEFCLK, buf);
+		size = amdgpu_dpm_print_clock_levels(adev, PP_DCEFCLK, buf);
 	else
-		return snprintf(buf, PAGE_SIZE, "\n");
+		size = snprintf(buf, PAGE_SIZE, "\n");
+
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
+	return size;
 }
 
 static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev,
@@ -1112,10 +1313,19 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev,
 	if (ret)
 		return ret;
 
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev))
 		ret = smu_force_clk_levels(&adev->smu, SMU_DCEFCLK, mask, true);
 	else if (adev->powerplay.pp_funcs->force_clock_level)
 		ret = amdgpu_dpm_force_clock_level(adev, PP_DCEFCLK, mask);
+	else
+		ret = 0;
+
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
 
 	if (ret)
 		return -EINVAL;
@@ -1129,16 +1339,27 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev,
 {
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
+	ssize_t size;
+	int ret;
 
 	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
 		return 0;
 
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev))
-		return smu_print_clk_levels(&adev->smu, SMU_PCIE, buf);
+		size = smu_print_clk_levels(&adev->smu, SMU_PCIE, buf);
 	else if (adev->powerplay.pp_funcs->print_clock_levels)
-		return amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf);
+		size = amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf);
 	else
-		return snprintf(buf, PAGE_SIZE, "\n");
+		size = snprintf(buf, PAGE_SIZE, "\n");
+
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
+	return size;
 }
 
 static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
@@ -1158,10 +1379,19 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
 	if (ret)
 		return ret;
 
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev))
 		ret = smu_force_clk_levels(&adev->smu, SMU_PCIE, mask, true);
 	else if (adev->powerplay.pp_funcs->force_clock_level)
 		ret = amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask);
+	else
+		ret = 0;
+
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
 
 	if (ret)
 		return -EINVAL;
@@ -1176,15 +1406,23 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev,
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 	uint32_t value = 0;
+	int ret;
 
 	if (amdgpu_sriov_vf(adev))
 		return 0;
 
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev))
 		value = smu_get_od_percentage(&(adev->smu), SMU_OD_SCLK);
 	else if (adev->powerplay.pp_funcs->get_sclk_od)
 		value = amdgpu_dpm_get_sclk_od(adev);
 
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
 	return snprintf(buf, PAGE_SIZE, "%d\n", value);
 }
 
@@ -1203,10 +1441,12 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
 
 	ret = kstrtol(buf, 0, &value);
 
-	if (ret) {
-		count = -EINVAL;
-		goto fail;
-	}
+	if (ret)
+		return -EINVAL;
+
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
 
 	if (is_support_sw_smu(adev)) {
 		value = smu_set_od_percentage(&(adev->smu), SMU_OD_SCLK, (uint32_t)value);
@@ -1222,7 +1462,9 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
 		}
 	}
 
-fail:
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
 	return count;
 }
 
@@ -1233,15 +1475,23 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev,
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 	uint32_t value = 0;
+	int ret;
 
 	if (amdgpu_sriov_vf(adev))
 		return 0;
 
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev))
 		value = smu_get_od_percentage(&(adev->smu), SMU_OD_MCLK);
 	else if (adev->powerplay.pp_funcs->get_mclk_od)
 		value = amdgpu_dpm_get_mclk_od(adev);
 
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
 	return snprintf(buf, PAGE_SIZE, "%d\n", value);
 }
 
@@ -1260,10 +1510,12 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
 
 	ret = kstrtol(buf, 0, &value);
 
-	if (ret) {
-		count = -EINVAL;
-		goto fail;
-	}
+	if (ret)
+		return -EINVAL;
+
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
 
 	if (is_support_sw_smu(adev)) {
 		value = smu_set_od_percentage(&(adev->smu), SMU_OD_MCLK, (uint32_t)value);
@@ -1279,7 +1531,9 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
 		}
 	}
 
-fail:
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
 	return count;
 }
 
@@ -1309,16 +1563,27 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
 {
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
+	ssize_t size;
+	int ret;
 
 	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
 		return 0;
 
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev))
-		return smu_get_power_profile_mode(&adev->smu, buf);
+		size = smu_get_power_profile_mode(&adev->smu, buf);
 	else if (adev->powerplay.pp_funcs->get_power_profile_mode)
-		return amdgpu_dpm_get_power_profile_mode(adev, buf);
+		size = amdgpu_dpm_get_power_profile_mode(adev, buf);
+	else
+		size = snprintf(buf, PAGE_SIZE, "\n");
+
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
 
-	return snprintf(buf, PAGE_SIZE, "\n");
+	return size;
 }
 
 
@@ -1343,7 +1608,7 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
 	tmp[1] = '\0';
 	ret = kstrtol(tmp, 0, &profile_mode);
 	if (ret)
-		goto fail;
+		return -EINVAL;
 
 	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
 		return -EINVAL;
@@ -1358,23 +1623,30 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
 		while (tmp_str[0]) {
 			sub_str = strsep(&tmp_str, delimiter);
 			ret = kstrtol(sub_str, 0, &parameter[parameter_size]);
-			if (ret) {
-				count = -EINVAL;
-				goto fail;
-			}
+			if (ret)
+				return -EINVAL;
 			parameter_size++;
 			while (isspace(*tmp_str))
 				tmp_str++;
 		}
 	}
 	parameter[parameter_size] = profile_mode;
+
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev))
 		ret = smu_set_power_profile_mode(&adev->smu, parameter, parameter_size, true);
 	else if (adev->powerplay.pp_funcs->set_power_profile_mode)
 		ret = amdgpu_dpm_set_power_profile_mode(adev, parameter, parameter_size);
+
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
 	if (!ret)
 		return count;
-fail:
+
 	return -EINVAL;
 }
 
@@ -1397,10 +1669,17 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev,
 	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
 		return 0;
 
+	r = pm_runtime_get_sync(ddev->dev);
+	if (r < 0)
+		return r;
+
 	/* read the IP busy sensor */
 	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD,
 				   (void *)&value, &size);
 
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
 	if (r)
 		return r;
 
@@ -1426,10 +1705,17 @@ static ssize_t amdgpu_get_memory_busy_percent(struct device *dev,
 	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
 		return 0;
 
+	r = pm_runtime_get_sync(ddev->dev);
+	if (r < 0)
+		return r;
+
 	/* read the IP busy sensor */
 	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD,
 				   (void *)&value, &size);
 
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
 	if (r)
 		return r;
 
@@ -1455,11 +1741,20 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev,
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 	uint64_t count0, count1;
+	int ret;
 
 	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
 		return 0;
 
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	amdgpu_asic_get_pcie_usage(adev, &count0, &count1);
+
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
 	return snprintf(buf, PAGE_SIZE,	"%llu %llu %i\n",
 			count0, count1, pcie_get_mps(adev->pdev));
 }
@@ -1547,42 +1842,43 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
 				      char *buf)
 {
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
-	struct drm_device *ddev = adev->ddev;
 	int channel = to_sensor_dev_attr(attr)->index;
 	int r, temp = 0, size = sizeof(temp);
 
-	/* Can't get temperature when the card is off */
-	if  ((adev->flags & AMD_IS_PX) &&
-	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
-		return -EINVAL;
-
 	if (channel >= PP_TEMP_MAX)
 		return -EINVAL;
 
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
+
 	switch (channel) {
 	case PP_TEMP_JUNCTION:
 		/* get current junction temperature */
 		r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_HOTSPOT_TEMP,
 					   (void *)&temp, &size);
-		if (r)
-			return r;
 		break;
 	case PP_TEMP_EDGE:
 		/* get current edge temperature */
 		r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_EDGE_TEMP,
 					   (void *)&temp, &size);
-		if (r)
-			return r;
 		break;
 	case PP_TEMP_MEM:
 		/* get current memory temperature */
 		r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_TEMP,
 					   (void *)&temp, &size);
-		if (r)
-			return r;
+		break;
+	default:
+		r = -EINVAL;
 		break;
 	}
 
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
+	if (r)
+		return r;
+
 	return snprintf(buf, PAGE_SIZE, "%d\n", temp);
 }
 
@@ -1678,16 +1974,27 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
 {
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
 	u32 pwm_mode = 0;
+	int ret;
+
+	ret = pm_runtime_get_sync(adev->ddev->dev);
+	if (ret < 0)
+		return ret;
 
 	if (is_support_sw_smu(adev)) {
 		pwm_mode = smu_get_fan_control_mode(&adev->smu);
 	} else {
-		if (!adev->powerplay.pp_funcs->get_fan_control_mode)
+		if (!adev->powerplay.pp_funcs->get_fan_control_mode) {
+			pm_runtime_mark_last_busy(adev->ddev->dev);
+			pm_runtime_put_autosuspend(adev->ddev->dev);
 			return -EINVAL;
+		}
 
 		pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
 	}
 
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	return sprintf(buf, "%i\n", pwm_mode);
 }
 
@@ -1697,27 +2004,32 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
 					    size_t count)
 {
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
-	int err;
+	int err, ret;
 	int value;
 
-	/* Can't adjust fan when the card is off */
-	if  ((adev->flags & AMD_IS_PX) &&
-	     (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
-		return -EINVAL;
-
 	err = kstrtoint(buf, 10, &value);
 	if (err)
 		return err;
 
+	ret = pm_runtime_get_sync(adev->ddev->dev);
+	if (ret < 0)
+		return ret;
+
 	if (is_support_sw_smu(adev)) {
 		smu_set_fan_control_mode(&adev->smu, value);
 	} else {
-		if (!adev->powerplay.pp_funcs->set_fan_control_mode)
+		if (!adev->powerplay.pp_funcs->set_fan_control_mode) {
+			pm_runtime_mark_last_busy(adev->ddev->dev);
+			pm_runtime_put_autosuspend(adev->ddev->dev);
 			return -EINVAL;
+		}
 
 		amdgpu_dpm_set_fan_control_mode(adev, value);
 	}
 
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	return count;
 }
 
@@ -1744,34 +2056,43 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev,
 	u32 value;
 	u32 pwm_mode;
 
-	/* Can't adjust fan when the card is off */
-	if  ((adev->flags & AMD_IS_PX) &&
-	     (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
-		return -EINVAL;
+	err = pm_runtime_get_sync(adev->ddev->dev);
+	if (err < 0)
+		return err;
+
 	if (is_support_sw_smu(adev))
 		pwm_mode = smu_get_fan_control_mode(&adev->smu);
 	else
 		pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
+
 	if (pwm_mode != AMD_FAN_CTRL_MANUAL) {
 		pr_info("manual fan speed control should be enabled first\n");
+		pm_runtime_mark_last_busy(adev->ddev->dev);
+		pm_runtime_put_autosuspend(adev->ddev->dev);
 		return -EINVAL;
 	}
 
 	err = kstrtou32(buf, 10, &value);
-	if (err)
+	if (err) {
+		pm_runtime_mark_last_busy(adev->ddev->dev);
+		pm_runtime_put_autosuspend(adev->ddev->dev);
 		return err;
+	}
 
 	value = (value * 100) / 255;
 
-	if (is_support_sw_smu(adev)) {
+	if (is_support_sw_smu(adev))
 		err = smu_set_fan_speed_percent(&adev->smu, value);
-		if (err)
-			return err;
-	} else if (adev->powerplay.pp_funcs->set_fan_speed_percent) {
+	else if (adev->powerplay.pp_funcs->set_fan_speed_percent)
 		err = amdgpu_dpm_set_fan_speed_percent(adev, value);
-		if (err)
-			return err;
-	}
+	else
+		err = -EINVAL;
+
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
+	if (err)
+		return err;
 
 	return count;
 }
@@ -1784,20 +2105,22 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev,
 	int err;
 	u32 speed = 0;
 
-	/* Can't adjust fan when the card is off */
-	if  ((adev->flags & AMD_IS_PX) &&
-	     (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
-		return -EINVAL;
+	err = pm_runtime_get_sync(adev->ddev->dev);
+	if (err < 0)
+		return err;
 
-	if (is_support_sw_smu(adev)) {
+	if (is_support_sw_smu(adev))
 		err = smu_get_fan_speed_percent(&adev->smu, &speed);
-		if (err)
-			return err;
-	} else if (adev->powerplay.pp_funcs->get_fan_speed_percent) {
+	else if (adev->powerplay.pp_funcs->get_fan_speed_percent)
 		err = amdgpu_dpm_get_fan_speed_percent(adev, &speed);
-		if (err)
-			return err;
-	}
+	else
+		err = -EINVAL;
+
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
+	if (err)
+		return err;
 
 	speed = (speed * 255) / 100;
 
@@ -1812,20 +2135,22 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
 	int err;
 	u32 speed = 0;
 
-	/* Can't adjust fan when the card is off */
-	if  ((adev->flags & AMD_IS_PX) &&
-	     (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
-		return -EINVAL;
+	err = pm_runtime_get_sync(adev->ddev->dev);
+	if (err < 0)
+		return err;
 
-	if (is_support_sw_smu(adev)) {
+	if (is_support_sw_smu(adev))
 		err = smu_get_fan_speed_rpm(&adev->smu, &speed);
-		if (err)
-			return err;
-	} else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) {
+	else if (adev->powerplay.pp_funcs->get_fan_speed_rpm)
 		err = amdgpu_dpm_get_fan_speed_rpm(adev, &speed);
-		if (err)
-			return err;
-	}
+	else
+		err = -EINVAL;
+
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
+	if (err)
+		return err;
 
 	return sprintf(buf, "%i\n", speed);
 }
@@ -1839,8 +2164,16 @@ static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev,
 	u32 size = sizeof(min_rpm);
 	int r;
 
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
+
 	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MIN_FAN_RPM,
 				   (void *)&min_rpm, &size);
+
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	if (r)
 		return r;
 
@@ -1856,8 +2189,16 @@ static ssize_t amdgpu_hwmon_get_fan1_max(struct device *dev,
 	u32 size = sizeof(max_rpm);
 	int r;
 
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
+
 	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MAX_FAN_RPM,
 				   (void *)&max_rpm, &size);
+
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	if (r)
 		return r;
 
@@ -1872,20 +2213,22 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev,
 	int err;
 	u32 rpm = 0;
 
-	/* Can't adjust fan when the card is off */
-	if  ((adev->flags & AMD_IS_PX) &&
-	     (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
-		return -EINVAL;
+	err = pm_runtime_get_sync(adev->ddev->dev);
+	if (err < 0)
+		return err;
 
-	if (is_support_sw_smu(adev)) {
+	if (is_support_sw_smu(adev))
 		err = smu_get_fan_speed_rpm(&adev->smu, &rpm);
-		if (err)
-			return err;
-	} else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) {
+	else if (adev->powerplay.pp_funcs->get_fan_speed_rpm)
 		err = amdgpu_dpm_get_fan_speed_rpm(adev, &rpm);
-		if (err)
-			return err;
-	}
+	else
+		err = -EINVAL;
+
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
+	if (err)
+		return err;
 
 	return sprintf(buf, "%i\n", rpm);
 }
@@ -1899,32 +2242,40 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev,
 	u32 value;
 	u32 pwm_mode;
 
+	err = pm_runtime_get_sync(adev->ddev->dev);
+	if (err < 0)
+		return err;
+
 	if (is_support_sw_smu(adev))
 		pwm_mode = smu_get_fan_control_mode(&adev->smu);
 	else
 		pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
 
-	if (pwm_mode != AMD_FAN_CTRL_MANUAL)
+	if (pwm_mode != AMD_FAN_CTRL_MANUAL) {
+		pm_runtime_mark_last_busy(adev->ddev->dev);
+		pm_runtime_put_autosuspend(adev->ddev->dev);
 		return -ENODATA;
-
-	/* Can't adjust fan when the card is off */
-	if  ((adev->flags & AMD_IS_PX) &&
-	     (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
-		return -EINVAL;
+	}
 
 	err = kstrtou32(buf, 10, &value);
-	if (err)
+	if (err) {
+		pm_runtime_mark_last_busy(adev->ddev->dev);
+		pm_runtime_put_autosuspend(adev->ddev->dev);
 		return err;
+	}
 
-	if (is_support_sw_smu(adev)) {
+	if (is_support_sw_smu(adev))
 		err = smu_set_fan_speed_rpm(&adev->smu, value);
-		if (err)
-			return err;
-	} else if (adev->powerplay.pp_funcs->set_fan_speed_rpm) {
+	else if (adev->powerplay.pp_funcs->set_fan_speed_rpm)
 		err = amdgpu_dpm_set_fan_speed_rpm(adev, value);
-		if (err)
-			return err;
-	}
+	else
+		err = -EINVAL;
+
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
+	if (err)
+		return err;
 
 	return count;
 }
@@ -1935,15 +2286,27 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev,
 {
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
 	u32 pwm_mode = 0;
+	int ret;
+
+	ret = pm_runtime_get_sync(adev->ddev->dev);
+	if (ret < 0)
+		return ret;
 
 	if (is_support_sw_smu(adev)) {
 		pwm_mode = smu_get_fan_control_mode(&adev->smu);
 	} else {
-		if (!adev->powerplay.pp_funcs->get_fan_control_mode)
+		if (!adev->powerplay.pp_funcs->get_fan_control_mode) {
+			pm_runtime_mark_last_busy(adev->ddev->dev);
+			pm_runtime_put_autosuspend(adev->ddev->dev);
 			return -EINVAL;
+		}
 
 		pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
 	}
+
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	return sprintf(buf, "%i\n", pwm_mode == AMD_FAN_CTRL_AUTO ? 0 : 1);
 }
 
@@ -1957,12 +2320,6 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev,
 	int value;
 	u32 pwm_mode;
 
-	/* Can't adjust fan when the card is off */
-	if  ((adev->flags & AMD_IS_PX) &&
-	     (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
-		return -EINVAL;
-
-
 	err = kstrtoint(buf, 10, &value);
 	if (err)
 		return err;
@@ -1974,14 +2331,24 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev,
 	else
 		return -EINVAL;
 
+	err = pm_runtime_get_sync(adev->ddev->dev);
+	if (err < 0)
+		return err;
+
 	if (is_support_sw_smu(adev)) {
 		smu_set_fan_control_mode(&adev->smu, pwm_mode);
 	} else {
-		if (!adev->powerplay.pp_funcs->set_fan_control_mode)
+		if (!adev->powerplay.pp_funcs->set_fan_control_mode) {
+			pm_runtime_mark_last_busy(adev->ddev->dev);
+			pm_runtime_put_autosuspend(adev->ddev->dev);
 			return -EINVAL;
+		}
 		amdgpu_dpm_set_fan_control_mode(adev, pwm_mode);
 	}
 
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	return count;
 }
 
@@ -1990,18 +2357,20 @@ static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev,
 					char *buf)
 {
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
-	struct drm_device *ddev = adev->ddev;
 	u32 vddgfx;
 	int r, size = sizeof(vddgfx);
 
-	/* Can't get voltage when the card is off */
-	if  ((adev->flags & AMD_IS_PX) &&
-	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
-		return -EINVAL;
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
 
 	/* get the voltage */
 	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX,
 				   (void *)&vddgfx, &size);
+
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	if (r)
 		return r;
 
@@ -2020,7 +2389,6 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev,
 				       char *buf)
 {
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
-	struct drm_device *ddev = adev->ddev;
 	u32 vddnb;
 	int r, size = sizeof(vddnb);
 
@@ -2028,14 +2396,17 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev,
 	if  (!(adev->flags & AMD_IS_APU))
 		return -EINVAL;
 
-	/* Can't get voltage when the card is off */
-	if  ((adev->flags & AMD_IS_PX) &&
-	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
-		return -EINVAL;
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
 
 	/* get the voltage */
 	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB,
 				   (void *)&vddnb, &size);
+
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	if (r)
 		return r;
 
@@ -2054,19 +2425,21 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev,
 					   char *buf)
 {
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
-	struct drm_device *ddev = adev->ddev;
 	u32 query = 0;
 	int r, size = sizeof(u32);
 	unsigned uw;
 
-	/* Can't get power when the card is off */
-	if  ((adev->flags & AMD_IS_PX) &&
-	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
-		return -EINVAL;
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
 
 	/* get the voltage */
 	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER,
 				   (void *)&query, &size);
+
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	if (r)
 		return r;
 
@@ -2089,16 +2462,27 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev,
 {
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
 	uint32_t limit = 0;
+	ssize_t size;
+	int r;
+
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
 
 	if (is_support_sw_smu(adev)) {
 		smu_get_power_limit(&adev->smu, &limit, true, true);
-		return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
+		size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
 	} else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) {
 		adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, true);
-		return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
+		size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
 	} else {
-		return snprintf(buf, PAGE_SIZE, "\n");
+		size = snprintf(buf, PAGE_SIZE, "\n");
 	}
+
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
+	return size;
 }
 
 static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev,
@@ -2107,16 +2491,27 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev,
 {
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
 	uint32_t limit = 0;
+	ssize_t size;
+	int r;
+
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
 
 	if (is_support_sw_smu(adev)) {
 		smu_get_power_limit(&adev->smu, &limit, false,  true);
-		return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
+		size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
 	} else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) {
 		adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, false);
-		return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
+		size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
 	} else {
-		return snprintf(buf, PAGE_SIZE, "\n");
+		size = snprintf(buf, PAGE_SIZE, "\n");
 	}
+
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
+	return size;
 }
 
 
@@ -2138,13 +2533,20 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
 
 	value = value / 1000000; /* convert to Watt */
 
-	if (is_support_sw_smu(adev)) {
+
+	err = pm_runtime_get_sync(adev->ddev->dev);
+	if (err < 0)
+		return err;
+
+	if (is_support_sw_smu(adev))
 		err = smu_set_power_limit(&adev->smu, value);
-	} else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit) {
+	else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit)
 		err = adev->powerplay.pp_funcs->set_power_limit(adev->powerplay.pp_handle, value);
-	} else {
+	else
 		err = -EINVAL;
-	}
+
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
 
 	if (err)
 		return err;
@@ -2157,18 +2559,20 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev,
 				      char *buf)
 {
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
-	struct drm_device *ddev = adev->ddev;
 	uint32_t sclk;
 	int r, size = sizeof(sclk);
 
-	/* Can't get voltage when the card is off */
-	if  ((adev->flags & AMD_IS_PX) &&
-	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
-		return -EINVAL;
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
 
 	/* get the sclk */
 	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK,
 				   (void *)&sclk, &size);
+
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	if (r)
 		return r;
 
@@ -2187,18 +2591,20 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev,
 				      char *buf)
 {
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
-	struct drm_device *ddev = adev->ddev;
 	uint32_t mclk;
 	int r, size = sizeof(mclk);
 
-	/* Can't get voltage when the card is off */
-	if  ((adev->flags & AMD_IS_PX) &&
-	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
-		return -EINVAL;
+	r = pm_runtime_get_sync(adev->ddev->dev);
+	if (r < 0)
+		return r;
 
 	/* get the sclk */
 	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_MCLK,
 				   (void *)&mclk, &size);
+
+	pm_runtime_mark_last_busy(adev->ddev->dev);
+	pm_runtime_put_autosuspend(adev->ddev->dev);
+
 	if (r)
 		return r;
 
@@ -3220,8 +3626,12 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data)
 	struct drm_info_node *node = (struct drm_info_node *) m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct amdgpu_device *adev = dev->dev_private;
-	struct drm_device *ddev = adev->ddev;
 	u32 flags = 0;
+	int r;
+
+	r = pm_runtime_get_sync(dev->dev);
+	if (r < 0)
+		return r;
 
 	amdgpu_device_ip_get_clockgating_state(adev, &flags);
 	seq_printf(m, "Clock Gating Flags Mask: 0x%x\n", flags);
@@ -3230,23 +3640,28 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data)
 
 	if (!adev->pm.dpm_enabled) {
 		seq_printf(m, "dpm not enabled\n");
+		pm_runtime_mark_last_busy(dev->dev);
+		pm_runtime_put_autosuspend(dev->dev);
 		return 0;
 	}
-	if  ((adev->flags & AMD_IS_PX) &&
-	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) {
-		seq_printf(m, "PX asic powered off\n");
-	} else if (!is_support_sw_smu(adev) && adev->powerplay.pp_funcs->debugfs_print_current_performance_level) {
+
+	if (!is_support_sw_smu(adev) &&
+	    adev->powerplay.pp_funcs->debugfs_print_current_performance_level) {
 		mutex_lock(&adev->pm.mutex);
 		if (adev->powerplay.pp_funcs->debugfs_print_current_performance_level)
 			adev->powerplay.pp_funcs->debugfs_print_current_performance_level(adev, m);
 		else
 			seq_printf(m, "Debugfs support not implemented for this asic\n");
 		mutex_unlock(&adev->pm.mutex);
+		r = 0;
 	} else {
-		return amdgpu_debugfs_pm_info_pp(m, adev);
+		r = amdgpu_debugfs_pm_info_pp(m, adev);
 	}
 
-	return 0;
+	pm_runtime_mark_last_busy(dev->dev);
+	pm_runtime_put_autosuspend(dev->dev);
+
+	return r;
 }
 
 static const struct drm_info_list amdgpu_pm_info_list[] = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
index cf21ad0cad9a..07914e34bc25 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
@@ -74,9 +74,9 @@ static void amdgpu_perf_start(struct perf_event *event, int flags)
 	switch (pe->pmu_perf_type) {
 	case PERF_TYPE_AMDGPU_DF:
 		if (!(flags & PERF_EF_RELOAD))
-			pe->adev->df_funcs->pmc_start(pe->adev, hwc->conf, 1);
+			pe->adev->df.funcs->pmc_start(pe->adev, hwc->conf, 1);
 
-		pe->adev->df_funcs->pmc_start(pe->adev, hwc->conf, 0);
+		pe->adev->df.funcs->pmc_start(pe->adev, hwc->conf, 0);
 		break;
 	default:
 		break;
@@ -101,7 +101,7 @@ static void amdgpu_perf_read(struct perf_event *event)
 
 		switch (pe->pmu_perf_type) {
 		case PERF_TYPE_AMDGPU_DF:
-			pe->adev->df_funcs->pmc_get_count(pe->adev, hwc->conf,
+			pe->adev->df.funcs->pmc_get_count(pe->adev, hwc->conf,
 							  &count);
 			break;
 		default:
@@ -126,7 +126,7 @@ static void amdgpu_perf_stop(struct perf_event *event, int flags)
 
 	switch (pe->pmu_perf_type) {
 	case PERF_TYPE_AMDGPU_DF:
-		pe->adev->df_funcs->pmc_stop(pe->adev, hwc->conf, 0);
+		pe->adev->df.funcs->pmc_stop(pe->adev, hwc->conf, 0);
 		break;
 	default:
 		break;
@@ -156,7 +156,7 @@ static int amdgpu_perf_add(struct perf_event *event, int flags)
 
 	switch (pe->pmu_perf_type) {
 	case PERF_TYPE_AMDGPU_DF:
-		retval = pe->adev->df_funcs->pmc_start(pe->adev, hwc->conf, 1);
+		retval = pe->adev->df.funcs->pmc_start(pe->adev, hwc->conf, 1);
 		break;
 	default:
 		return 0;
@@ -184,7 +184,7 @@ static void amdgpu_perf_del(struct perf_event *event, int flags)
 
 	switch (pe->pmu_perf_type) {
 	case PERF_TYPE_AMDGPU_DF:
-		pe->adev->df_funcs->pmc_stop(pe->adev, hwc->conf, 1);
+		pe->adev->df.funcs->pmc_stop(pe->adev, hwc->conf, 1);
 		break;
 	default:
 		break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 281d89640344..3a1570dafe34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -529,6 +529,11 @@ static int psp_xgmi_unload(struct psp_context *psp)
 {
 	int ret;
 	struct psp_gfx_cmd_resp *cmd;
+	struct amdgpu_device *adev = psp->adev;
+
+	/* XGMI TA unload currently is not supported on Arcturus */
+	if (adev->asic_type == CHIP_ARCTURUS)
+		return 0;
 
 	/*
 	 * TODO: bypass the unloading in sriov for now
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 96fc538ec824..766be7f18282 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -686,6 +686,7 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev,
 {
 	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
 	struct ras_err_data err_data = {0, 0, 0, NULL};
+	int i;
 
 	if (!obj)
 		return -EINVAL;
@@ -700,6 +701,13 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev,
 		if (adev->umc.funcs->query_ras_error_address)
 			adev->umc.funcs->query_ras_error_address(adev, &err_data);
 		break;
+	case AMDGPU_RAS_BLOCK__SDMA:
+		if (adev->sdma.funcs->query_ras_error_count) {
+			for (i = 0; i < adev->sdma.num_instances; i++)
+				adev->sdma.funcs->query_ras_error_count(adev, i,
+									&err_data);
+		}
+		break;
 	case AMDGPU_RAS_BLOCK__GFX:
 		if (adev->gfx.funcs->query_ras_error_count)
 			adev->gfx.funcs->query_ras_error_count(adev, &err_data);
@@ -1345,7 +1353,8 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
 	struct amdgpu_ras *ras =
 		container_of(work, struct amdgpu_ras, recovery_work);
 
-	amdgpu_device_gpu_recover(ras->adev, 0);
+	if (amdgpu_device_should_recover_gpu(ras->adev))
+		amdgpu_device_gpu_recover(ras->adev, 0);
 	atomic_set(&ras->in_recovery, 0);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index 346dcb1f7146..485335267d78 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -50,6 +50,14 @@ struct amdgpu_sdma_instance {
 	bool			burst_nop;
 };
 
+struct amdgpu_sdma_ras_funcs {
+	int (*ras_late_init)(struct amdgpu_device *adev,
+			void *ras_ih_info);
+	void (*ras_fini)(struct amdgpu_device *adev);
+	int (*query_ras_error_count)(struct amdgpu_device *adev,
+			uint32_t instance, void *ras_error_status);
+};
+
 struct amdgpu_sdma {
 	struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES];
 	struct drm_gpu_scheduler    *sdma_sched[AMDGPU_MAX_SDMA_INSTANCES];
@@ -61,6 +69,7 @@ struct amdgpu_sdma {
 	uint32_t                    srbm_soft_reset;
 	bool			has_page_queue;
 	struct ras_common_if	*ras_if;
+	const struct amdgpu_sdma_ras_funcs	*funcs;
 };
 
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 3114d8a47e88..dee446278417 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -41,6 +41,7 @@
 #include <linux/swap.h>
 #include <linux/swiotlb.h>
 #include <linux/dma-buf.h>
+#include <linux/sizes.h>
 
 #include <drm/ttm/ttm_bo_api.h>
 #include <drm/ttm/ttm_bo_driver.h>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index ed106d99d078..f96464e2c157 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -75,6 +75,9 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
 		break;
 	case CHIP_ARCTURUS:
 		fw_name = FIRMWARE_ARCTURUS;
+		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
+		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
+			adev->vcn.indirect_sram = true;
 		break;
 	case CHIP_RENOIR:
 		fw_name = FIRMWARE_RENOIR;
@@ -165,15 +168,15 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
 			dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r);
 			return r;
 		}
-	}
 
-	if (adev->vcn.indirect_sram) {
-		r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE,
-			    AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.dpg_sram_bo,
-			    &adev->vcn.dpg_sram_gpu_addr, &adev->vcn.dpg_sram_cpu_addr);
-		if (r) {
-			dev_err(adev->dev, "(%d) failed to allocate DPG bo\n", r);
-			return r;
+		if (adev->vcn.indirect_sram) {
+			r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE,
+					AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].dpg_sram_bo,
+					&adev->vcn.inst[i].dpg_sram_gpu_addr, &adev->vcn.inst[i].dpg_sram_cpu_addr);
+			if (r) {
+				dev_err(adev->dev, "VCN %d (%d) failed to allocate DPG bo\n", i, r);
+				return r;
+			}
 		}
 	}
 
@@ -186,15 +189,14 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
 
 	cancel_delayed_work_sync(&adev->vcn.idle_work);
 
-	if (adev->vcn.indirect_sram) {
-		amdgpu_bo_free_kernel(&adev->vcn.dpg_sram_bo,
-				      &adev->vcn.dpg_sram_gpu_addr,
-				      (void **)&adev->vcn.dpg_sram_cpu_addr);
-	}
-
 	for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
 		if (adev->vcn.harvest_config & (1 << j))
 			continue;
+		if (adev->vcn.indirect_sram) {
+			amdgpu_bo_free_kernel(&adev->vcn.inst[j].dpg_sram_bo,
+						  &adev->vcn.inst[j].dpg_sram_gpu_addr,
+						  (void **)&adev->vcn.inst[j].dpg_sram_cpu_addr);
+		}
 		kvfree(adev->vcn.inst[j].saved_bo);
 
 		amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo,
@@ -298,7 +300,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
 			else
 				new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
 
-			adev->vcn.pause_dpg_mode(adev, &new_state);
+			adev->vcn.pause_dpg_mode(adev, j, &new_state);
 		}
 
 		fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec);
@@ -341,7 +343,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
 		if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
 			new_state.fw_based = VCN_DPG_STATE__PAUSE;
 
-		adev->vcn.pause_dpg_mode(adev, &new_state);
+		adev->vcn.pause_dpg_mode(adev, ring->me, &new_state);
 	}
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index e6dee8224d33..c4984c5fb2db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -57,6 +57,11 @@
 #define VCN_VID_IP_ADDRESS_2_0		0x0
 #define VCN_AON_IP_ADDRESS_2_0		0x30000
 
+#define mmUVD_RBC_XX_IB_REG_CHECK 					0x026b
+#define mmUVD_RBC_XX_IB_REG_CHECK_BASE_IDX 				1
+#define mmUVD_REG_XX_MASK 						0x026c
+#define mmUVD_REG_XX_MASK_BASE_IDX 					1
+
 /* 1 second timeout */
 #define VCN_IDLE_TIMEOUT	msecs_to_jiffies(1000)
 
@@ -104,27 +109,27 @@
 		internal_reg_offset >>= 2;							\
 	})
 
-#define RREG32_SOC15_DPG_MODE_2_0(offset, mask_en) 						\
-	({ 											\
-		WREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_CTL, 					\
-			(0x0 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | 				\
-			mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | 				\
-			offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); 			\
-		RREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_DATA); 					\
+#define RREG32_SOC15_DPG_MODE_2_0(inst_idx, offset, mask_en) 					\
+	({											\
+		WREG32_SOC15(VCN, inst, mmUVD_DPG_LMA_CTL, 					\
+			(0x0 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT |				\
+			mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT |				\
+			offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT));			\
+		RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA);				\
 	})
 
-#define WREG32_SOC15_DPG_MODE_2_0(offset, value, mask_en, indirect)				\
-	do { 											\
-		if (!indirect) { 								\
-			WREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_DATA, value); 			\
-			WREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_CTL, 				\
-				(0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | 			\
-				 mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | 			\
-				 offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); 		\
-		} else { 									\
-			*adev->vcn.dpg_sram_curr_addr++ = offset; 				\
-			*adev->vcn.dpg_sram_curr_addr++ = value; 				\
-		} 										\
+#define WREG32_SOC15_DPG_MODE_2_0(inst_idx, offset, value, mask_en, indirect)			\
+	do {											\
+		if (!indirect) {								\
+			WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA, value);			\
+			WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_CTL, 				\
+				(0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT |			\
+				 mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT |			\
+				 offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT));		\
+		} else {									\
+			*adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = offset;		\
+			*adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = value;			\
+		}										\
 	} while (0)
 
 enum engine_status_constants {
@@ -173,6 +178,10 @@ struct amdgpu_vcn_inst {
 	struct amdgpu_ring	ring_enc[AMDGPU_VCN_MAX_ENC_RINGS];
 	struct amdgpu_irq_src	irq;
 	struct amdgpu_vcn_reg	external;
+	struct amdgpu_bo	*dpg_sram_bo;
+	void			*dpg_sram_cpu_addr;
+	uint64_t		dpg_sram_gpu_addr;
+	uint32_t		*dpg_sram_curr_addr;
 };
 
 struct amdgpu_vcn {
@@ -184,10 +193,6 @@ struct amdgpu_vcn {
 	struct dpg_pause_state pause_state;
 
 	bool			indirect_sram;
-	struct amdgpu_bo	*dpg_sram_bo;
-	void			*dpg_sram_cpu_addr;
-	uint64_t		dpg_sram_gpu_addr;
-	uint32_t		*dpg_sram_curr_addr;
 
 	uint8_t	num_vcn_inst;
 	struct amdgpu_vcn_inst	 inst[AMDGPU_MAX_VCN_INSTANCES];
@@ -199,7 +204,7 @@ struct amdgpu_vcn {
 
 	unsigned	harvest_config;
 	int (*pause_dpg_mode)(struct amdgpu_device *adev,
-		struct dpg_pause_state *new_state);
+		int inst_idx, struct dpg_pause_state *new_state);
 };
 
 int amdgpu_vcn_sw_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 4dc75eda1d91..d16231d6a790 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -83,6 +83,32 @@ struct amdgpu_prt_cb {
 };
 
 /**
+ * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
+ * happens while holding this lock anywhere to prevent deadlocks when
+ * an MMU notifier runs in reclaim-FS context.
+ */
+static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm)
+{
+	mutex_lock(&vm->eviction_lock);
+	vm->saved_flags = memalloc_nofs_save();
+}
+
+static inline int amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm)
+{
+	if (mutex_trylock(&vm->eviction_lock)) {
+		vm->saved_flags = memalloc_nofs_save();
+		return 1;
+	}
+	return 0;
+}
+
+static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm)
+{
+	memalloc_nofs_restore(vm->saved_flags);
+	mutex_unlock(&vm->eviction_lock);
+}
+
+/**
  * amdgpu_vm_level_shift - return the addr shift for each level
  *
  * @adev: amdgpu_device pointer
@@ -678,9 +704,9 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		}
 	}
 
-	mutex_lock(&vm->eviction_lock);
+	amdgpu_vm_eviction_lock(vm);
 	vm->evicting = false;
-	mutex_unlock(&vm->eviction_lock);
+	amdgpu_vm_eviction_unlock(vm);
 
 	return 0;
 }
@@ -1559,7 +1585,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	if (!(flags & AMDGPU_PTE_VALID))
 		owner = AMDGPU_FENCE_OWNER_KFD;
 
-	mutex_lock(&vm->eviction_lock);
+	amdgpu_vm_eviction_lock(vm);
 	if (vm->evicting) {
 		r = -EBUSY;
 		goto error_unlock;
@@ -1576,7 +1602,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	r = vm->update_funcs->commit(&params, fence);
 
 error_unlock:
-	mutex_unlock(&vm->eviction_lock);
+	amdgpu_vm_eviction_unlock(vm);
 	return r;
 }
 
@@ -2533,18 +2559,18 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
 		return false;
 
 	/* Try to block ongoing updates */
-	if (!mutex_trylock(&bo_base->vm->eviction_lock))
+	if (!amdgpu_vm_eviction_trylock(bo_base->vm))
 		return false;
 
 	/* Don't evict VM page tables while they are updated */
 	if (!dma_fence_is_signaled(bo_base->vm->last_direct) ||
 	    !dma_fence_is_signaled(bo_base->vm->last_delayed)) {
-		mutex_unlock(&bo_base->vm->eviction_lock);
+		amdgpu_vm_eviction_unlock(bo_base->vm);
 		return false;
 	}
 
 	bo_base->vm->evicting = true;
-	mutex_unlock(&bo_base->vm->eviction_lock);
+	amdgpu_vm_eviction_unlock(bo_base->vm);
 	return true;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index fade4f45320c..b4640ab38c95 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -30,6 +30,7 @@
 #include <drm/gpu_scheduler.h>
 #include <drm/drm_file.h>
 #include <drm/ttm/ttm_bo_driver.h>
+#include <linux/sched/mm.h>
 
 #include "amdgpu_sync.h"
 #include "amdgpu_ring.h"
@@ -239,9 +240,12 @@ struct amdgpu_vm {
 	/* tree of virtual addresses mapped */
 	struct rb_root_cached	va;
 
-	/* Lock to prevent eviction while we are updating page tables */
+	/* Lock to prevent eviction while we are updating page tables
+	 * use vm_eviction_lock/unlock(vm)
+	 */
 	struct mutex		eviction_lock;
 	bool			evicting;
+	unsigned int		saved_flags;
 
 	/* BOs who needs a validation */
 	struct list_head	evicted;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 5cf920d9358b..a97af422575a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -146,16 +146,16 @@ static ssize_t amdgpu_xgmi_show_error(struct device *dev,
 	ficaa_pie_ctl_in = AMDGPU_XGMI_SET_FICAA(0x200);
 	ficaa_pie_status_in = AMDGPU_XGMI_SET_FICAA(0x208);
 
-	fica_out = adev->df_funcs->get_fica(adev, ficaa_pie_ctl_in);
+	fica_out = adev->df.funcs->get_fica(adev, ficaa_pie_ctl_in);
 	if (fica_out != 0x1f)
 		pr_err("xGMI error counters not enabled!\n");
 
-	fica_out = adev->df_funcs->get_fica(adev, ficaa_pie_status_in);
+	fica_out = adev->df.funcs->get_fica(adev, ficaa_pie_status_in);
 
 	if ((fica_out & 0xffff) == 2)
 		error_count = ((fica_out >> 62) & 0x1) + (fica_out >> 63);
 
-	adev->df_funcs->set_fica(adev, ficaa_pie_status_in, 0, 0);
+	adev->df.funcs->set_fica(adev, ficaa_pie_status_in, 0, 0);
 
 	return snprintf(buf, PAGE_SIZE, "%d\n", error_count);
 }
@@ -291,13 +291,7 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
 
 	dev_dbg(adev->dev, "Set xgmi pstate %d.\n", pstate);
 
-	if (is_support_sw_smu_xgmi(adev))
-		ret = smu_set_xgmi_pstate(&adev->smu, pstate);
-	else if (adev->powerplay.pp_funcs &&
-		 adev->powerplay.pp_funcs->set_xgmi_pstate)
-		ret = adev->powerplay.pp_funcs->set_xgmi_pstate(adev->powerplay.pp_handle,
-								pstate);
-
+	ret = amdgpu_dpm_set_xgmi_pstate(adev, pstate);
 	if (ret) {
 		dev_err(adev->dev,
 			"XGMI: Set pstate failure on device %llx, hive %llx, ret %d",
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index e9822ea8bb19..006f21ef7ddf 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1312,19 +1312,13 @@ static int cik_asic_pci_config_reset(struct amdgpu_device *adev)
 
 static bool cik_asic_supports_baco(struct amdgpu_device *adev)
 {
-	bool baco_support;
-
 	switch (adev->asic_type) {
 	case CHIP_BONAIRE:
 	case CHIP_HAWAII:
-		smu7_asic_get_baco_capability(adev, &baco_support);
-		break;
+		return amdgpu_dpm_is_baco_supported(adev);
 	default:
-		baco_support = false;
-		break;
+		return false;
 	}
-
-	return baco_support;
 }
 
 static enum amd_reset_method
@@ -1366,7 +1360,7 @@ static int cik_asic_reset(struct amdgpu_device *adev)
 	if (cik_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
 		if (!adev->in_suspend)
 			amdgpu_inc_vram_lost(adev);
-		r = smu7_asic_baco_reset(adev);
+		r = amdgpu_dpm_baco_reset(adev);
 	} else {
 		r = cik_asic_pci_config_reset(adev);
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.h b/drivers/gpu/drm/amd/amdgpu/cik.h
index 9870bf27870e..f91ab4c246b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.h
+++ b/drivers/gpu/drm/amd/amdgpu/cik.h
@@ -31,7 +31,5 @@ void cik_srbm_select(struct amdgpu_device *adev,
 int cik_set_ip_blocks(struct amdgpu_device *adev);
 
 void legacy_doorbell_index_init(struct amdgpu_device *adev);
-int smu7_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap);
-int smu7_asic_baco_reset(struct amdgpu_device *adev);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
index d6221298b477..d6aca1c08068 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
@@ -31,6 +31,9 @@ static u32 df_v1_7_channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 2};
 
 static void df_v1_7_sw_init(struct amdgpu_device *adev)
 {
+	adev->df.hash_status.hash_64k = false;
+	adev->df.hash_status.hash_2m = false;
+	adev->df.hash_status.hash_1g = false;
 }
 
 static void df_v1_7_sw_fini(struct amdgpu_device *adev)
@@ -66,7 +69,7 @@ static u32 df_v1_7_get_hbm_channel_number(struct amdgpu_device *adev)
 {
 	int fb_channel_number;
 
-	fb_channel_number = adev->df_funcs->get_fb_channel_number(adev);
+	fb_channel_number = adev->df.funcs->get_fb_channel_number(adev);
 
 	return df_v1_7_channel_number[fb_channel_number];
 }
@@ -77,7 +80,7 @@ static void df_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev,
 	u32 tmp;
 
 	/* Put DF on broadcast mode */
-	adev->df_funcs->enable_broadcast_mode(adev, true);
+	adev->df.funcs->enable_broadcast_mode(adev, true);
 
 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) {
 		tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
@@ -92,7 +95,7 @@ static void df_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev,
 	}
 
 	/* Exit boradcast mode */
-	adev->df_funcs->enable_broadcast_mode(adev, false);
+	adev->df.funcs->enable_broadcast_mode(adev, false);
 }
 
 static void df_v1_7_get_clockgating_state(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
index 2f884d941e8d..f51326598a8c 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
@@ -262,6 +262,32 @@ static ssize_t df_v3_6_get_df_cntr_avail(struct device *dev,
 /* device attr for available perfmon counters */
 static DEVICE_ATTR(df_cntr_avail, S_IRUGO, df_v3_6_get_df_cntr_avail, NULL);
 
+static void df_v3_6_query_hashes(struct amdgpu_device *adev)
+{
+	u32 tmp;
+
+	adev->df.hash_status.hash_64k = false;
+	adev->df.hash_status.hash_2m = false;
+	adev->df.hash_status.hash_1g = false;
+
+	if (adev->asic_type != CHIP_ARCTURUS)
+		return;
+
+	/* encoding for hash-enabled on Arcturus */
+	if (adev->df.funcs->get_fb_channel_number(adev) == 0xe) {
+		tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DfGlobalCtrl);
+		adev->df.hash_status.hash_64k = REG_GET_FIELD(tmp,
+						DF_CS_UMC_AON0_DfGlobalCtrl,
+						GlbHashIntlvCtl64K);
+		adev->df.hash_status.hash_2m = REG_GET_FIELD(tmp,
+						DF_CS_UMC_AON0_DfGlobalCtrl,
+						GlbHashIntlvCtl2M);
+		adev->df.hash_status.hash_1g = REG_GET_FIELD(tmp,
+						DF_CS_UMC_AON0_DfGlobalCtrl,
+						GlbHashIntlvCtl1G);
+	}
+}
+
 /* init perfmons */
 static void df_v3_6_sw_init(struct amdgpu_device *adev)
 {
@@ -273,6 +299,8 @@ static void df_v3_6_sw_init(struct amdgpu_device *adev)
 
 	for (i = 0; i < AMDGPU_MAX_DF_PERFMONS; i++)
 		adev->df_perfmon_config_assign_mask[i] = 0;
+
+	df_v3_6_query_hashes(adev);
 }
 
 static void df_v3_6_sw_fini(struct amdgpu_device *adev)
@@ -311,7 +339,7 @@ static u32 df_v3_6_get_hbm_channel_number(struct amdgpu_device *adev)
 {
 	int fb_channel_number;
 
-	fb_channel_number = adev->df_funcs->get_fb_channel_number(adev);
+	fb_channel_number = adev->df.funcs->get_fb_channel_number(adev);
 	if (fb_channel_number >= ARRAY_SIZE(df_v3_6_channel_number))
 		fb_channel_number = 0;
 
@@ -325,7 +353,7 @@ static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev,
 
 	if (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG) {
 		/* Put DF on broadcast mode */
-		adev->df_funcs->enable_broadcast_mode(adev, true);
+		adev->df.funcs->enable_broadcast_mode(adev, true);
 
 		if (enable) {
 			tmp = RREG32_SOC15(DF, 0,
@@ -344,7 +372,7 @@ static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev,
 		}
 
 		/* Exit broadcast mode */
-		adev->df_funcs->enable_broadcast_mode(adev, false);
+		adev->df.funcs->enable_broadcast_mode(adev, false);
 	}
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 6bc3b937fba2..874f641de281 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -40,6 +40,7 @@
 #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h"
 
 #include "soc15.h"
+#include "soc15d.h"
 #include "soc15_common.h"
 #include "clearstate_gfx10.h"
 #include "v10_structs.h"
@@ -120,7 +121,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1[] =
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000800, 0x00000820),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL, 0x001f0000, 0x00070104),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x001f0000, 0x00070104),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000100, 0x00000130),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff),
@@ -168,7 +169,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_1[] =
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000800, 0x00000820),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL, 0x001f0000, 0x00070105),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x001f0000, 0x00070105),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff),
@@ -345,15 +346,29 @@ static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring,
 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
 }
 
+static void gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
+				uint16_t pasid, uint32_t flush_type,
+				bool all_hub)
+{
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+	amdgpu_ring_write(kiq_ring,
+			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
+			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+}
+
 static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
 	.kiq_set_resources = gfx10_kiq_set_resources,
 	.kiq_map_queues = gfx10_kiq_map_queues,
 	.kiq_unmap_queues = gfx10_kiq_unmap_queues,
 	.kiq_query_status = gfx10_kiq_query_status,
+	.kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs,
 	.set_resources_size = 8,
 	.map_queues_size = 7,
 	.unmap_queues_size = 6,
 	.query_status_size = 7,
+	.invalidate_tlbs_size = 12,
 };
 
 static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
@@ -807,10 +822,11 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
 		info->fw = adev->gfx.rlc_fw;
-		header = (const struct common_firmware_header *)info->fw->data;
-		adev->firmware.fw_size +=
-			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
-
+		if (info->fw) {
+			header = (const struct common_firmware_header *)info->fw->data;
+			adev->firmware.fw_size +=
+				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+		}
 		if (adev->gfx.rlc.is_rlc_v2_1 &&
 		    adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
 		    adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
@@ -3321,8 +3337,11 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
 	mqd->cp_hqd_ib_control = tmp;
 
-	/* activate the queue */
-	mqd->cp_hqd_active = 1;
+	/* map_queues packet doesn't need activate the queue,
+	 * so only kiq need set this field.
+	 */
+	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+		mqd->cp_hqd_active = 1;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index cfc1403fc855..46f0533ba43f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4558,8 +4558,11 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
 	mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
 	mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
 
-	/* activate the queue */
-	mqd->cp_hqd_active = 1;
+	/* map_queues packet doesn't need activate the queue,
+	 * so only kiq need set this field.
+	 */
+	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+		mqd->cp_hqd_active = 1;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index a5492e375f29..46ab46757b25 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -48,15 +48,6 @@
 
 #include "amdgpu_ras.h"
 
-#include "sdma0/sdma0_4_2_offset.h"
-#include "sdma1/sdma1_4_2_offset.h"
-#include "sdma2/sdma2_4_2_2_offset.h"
-#include "sdma3/sdma3_4_2_2_offset.h"
-#include "sdma4/sdma4_4_2_2_offset.h"
-#include "sdma5/sdma5_4_2_2_offset.h"
-#include "sdma6/sdma6_4_2_2_offset.h"
-#include "sdma7/sdma7_4_2_2_offset.h"
-
 #define GFX9_NUM_GFX_RINGS     1
 #define GFX9_MEC_HPD_SIZE 4096
 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
@@ -748,6 +739,134 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
 				     void *inject_if);
 
+static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
+				uint64_t queue_mask)
+{
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
+	amdgpu_ring_write(kiq_ring,
+		PACKET3_SET_RESOURCES_VMID_MASK(0) |
+		/* vmid_mask:0* queue_type:0 (KIQ) */
+		PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
+	amdgpu_ring_write(kiq_ring,
+			lower_32_bits(queue_mask));	/* queue mask lo */
+	amdgpu_ring_write(kiq_ring,
+			upper_32_bits(queue_mask));	/* queue mask hi */
+	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
+	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
+	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
+	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
+}
+
+static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
+				 struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = kiq_ring->adev;
+	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
+	uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
+	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+			 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+			 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
+			 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
+			 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
+			 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
+			 /*queue_type: normal compute queue */
+			 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
+			 /* alloc format: all_on_one_pipe */
+			 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
+			 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
+			 /* num_queues: must be 1 */
+			 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
+	amdgpu_ring_write(kiq_ring,
+			PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
+	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
+	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
+	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
+	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
+}
+
+static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
+				   struct amdgpu_ring *ring,
+				   enum amdgpu_unmap_queues_action action,
+				   u64 gpu_addr, u64 seq)
+{
+	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
+	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+			  PACKET3_UNMAP_QUEUES_ACTION(action) |
+			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
+			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
+			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
+	amdgpu_ring_write(kiq_ring,
+			PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
+
+	if (action == PREEMPT_QUEUES_NO_UNMAP) {
+		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
+		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
+		amdgpu_ring_write(kiq_ring, seq);
+	} else {
+		amdgpu_ring_write(kiq_ring, 0);
+		amdgpu_ring_write(kiq_ring, 0);
+		amdgpu_ring_write(kiq_ring, 0);
+	}
+}
+
+static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
+				   struct amdgpu_ring *ring,
+				   u64 addr,
+				   u64 seq)
+{
+	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
+	amdgpu_ring_write(kiq_ring,
+			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
+			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
+			  PACKET3_QUERY_STATUS_COMMAND(2));
+	/* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+	amdgpu_ring_write(kiq_ring,
+			PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
+			PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
+	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
+	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
+	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
+	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
+}
+
+static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
+				uint16_t pasid, uint32_t flush_type,
+				bool all_hub)
+{
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+	amdgpu_ring_write(kiq_ring,
+			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
+			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+}
+
+static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
+	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
+	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
+	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
+	.kiq_query_status = gfx_v9_0_kiq_query_status,
+	.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
+	.set_resources_size = 8,
+	.map_queues_size = 7,
+	.unmap_queues_size = 6,
+	.query_status_size = 7,
+	.invalidate_tlbs_size = 12,
+};
+
+static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
+{
+	adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
+}
+
 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
 {
 	switch (adev->asic_type) {
@@ -3115,74 +3234,6 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
 }
 
-static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
-{
-	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
-	uint64_t queue_mask = 0;
-	int r, i;
-
-	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
-		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
-			continue;
-
-		/* This situation may be hit in the future if a new HW
-		 * generation exposes more than 64 queues. If so, the
-		 * definition of queue_mask needs updating */
-		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
-			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
-			break;
-		}
-
-		queue_mask |= (1ull << i);
-	}
-
-	r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
-	if (r) {
-		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
-		return r;
-	}
-
-	/* set resources */
-	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
-	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
-			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
-	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
-	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
-	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
-	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
-	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
-	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
-	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
-		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
-		uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
-
-		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
-		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
-		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
-				  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
-				  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
-				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
-				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
-				  PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
-				  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
-				  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
-				  PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
-				  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
-		amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
-		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
-		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
-		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
-		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
-	}
-
-	r = amdgpu_ring_test_helper(kiq_ring);
-	if (r)
-		DRM_ERROR("KCQ enable failed\n");
-
-	return r;
-}
-
 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
@@ -3319,8 +3370,11 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
 	mqd->cp_hqd_ib_control = tmp;
 
-	/* activate the queue */
-	mqd->cp_hqd_active = 1;
+	/* map_queues packet doesn't need activate the queue,
+	 * so only kiq need set this field.
+	 */
+	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+		mqd->cp_hqd_active = 1;
 
 	return 0;
 }
@@ -3589,7 +3643,7 @@ static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
 			goto done;
 	}
 
-	r = gfx_v9_0_kiq_kcq_enable(adev);
+	r = amdgpu_gfx_enable_kcq(adev);
 done:
 	return r;
 }
@@ -3646,6 +3700,23 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
 	return 0;
 }
 
+static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
+{
+	u32 tmp;
+
+	if (adev->asic_type != CHIP_ARCTURUS)
+		return;
+
+	tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
+	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
+				adev->df.hash_status.hash_64k);
+	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
+				adev->df.hash_status.hash_2m);
+	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
+				adev->df.hash_status.hash_1g);
+	WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
+}
+
 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
 {
 	if (adev->asic_type != CHIP_ARCTURUS)
@@ -3663,6 +3734,8 @@ static int gfx_v9_0_hw_init(void *handle)
 
 	gfx_v9_0_constants_init(adev);
 
+	gfx_v9_0_init_tcp_config(adev);
+
 	r = adev->gfx.rlc.funcs->resume(adev);
 	if (r)
 		return r;
@@ -3674,36 +3747,6 @@ static int gfx_v9_0_hw_init(void *handle)
 	return r;
 }
 
-static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
-{
-	int r, i;
-	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
-
-	r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
-	if (r)
-		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
-
-	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
-
-		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
-		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
-						PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
-						PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
-						PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
-						PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
-		amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
-		amdgpu_ring_write(kiq_ring, 0);
-		amdgpu_ring_write(kiq_ring, 0);
-		amdgpu_ring_write(kiq_ring, 0);
-	}
-	r = amdgpu_ring_test_helper(kiq_ring);
-	if (r)
-		DRM_ERROR("KCQ disable failed\n");
-
-	return r;
-}
-
 static int gfx_v9_0_hw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -3715,7 +3758,7 @@ static int gfx_v9_0_hw_fini(void *handle)
 	/* DF freeze and kcq disable will fail */
 	if (!amdgpu_ras_intr_triggered())
 		/* disable KCQ to avoid CPC touch memory not valid anymore */
-		gfx_v9_0_kcq_disable(adev);
+		amdgpu_gfx_disable_kcq(adev);
 
 	if (amdgpu_sriov_vf(adev)) {
 		gfx_v9_0_cp_gfx_enable(adev, false);
@@ -4021,14 +4064,6 @@ static const struct soc15_reg_entry sec_ded_counter_registers[] = {
    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
    { SOC15_REG_ENTRY(HDP, 0, mmHDP_EDC_CNT), 0, 1, 1},
-   { SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), 0, 1, 1},
-   { SOC15_REG_ENTRY(SDMA1, 0, mmSDMA1_EDC_COUNTER), 0, 1, 1},
-   { SOC15_REG_ENTRY(SDMA2, 0, mmSDMA2_EDC_COUNTER), 0, 1, 1},
-   { SOC15_REG_ENTRY(SDMA3, 0, mmSDMA3_EDC_COUNTER), 0, 1, 1},
-   { SOC15_REG_ENTRY(SDMA4, 0, mmSDMA4_EDC_COUNTER), 0, 1, 1},
-   { SOC15_REG_ENTRY(SDMA5, 0, mmSDMA5_EDC_COUNTER), 0, 1, 1},
-   { SOC15_REG_ENTRY(SDMA6, 0, mmSDMA6_EDC_COUNTER), 0, 1, 1},
-   { SOC15_REG_ENTRY(SDMA7, 0, mmSDMA7_EDC_COUNTER), 0, 1, 1},
 };
 
 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
@@ -4092,7 +4127,6 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
 						adev->gfx.config.max_sh_per_se;
 	int sgpr_work_group_size = 5;
 	int gpr_reg_size = compute_dim_x / 16 + 6;
-	int sec_ded_counter_reg_size = adev->sdma.num_instances + 34;
 
 	/* only support when RAS is enabled */
 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
@@ -4232,7 +4266,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
 
 	/* read back registers to clear the counters */
 	mutex_lock(&adev->grbm_idx_mutex);
-	for (i = 0; i < sec_ded_counter_reg_size; i++) {
+	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
 		for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
 			for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
 				gfx_v9_0_select_se_sh(adev, j, 0x0, k);
@@ -4259,6 +4293,7 @@ static int gfx_v9_0_early_init(void *handle)
 	else
 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
 	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+	gfx_v9_0_set_kiq_pm4_funcs(adev);
 	gfx_v9_0_set_ring_funcs(adev);
 	gfx_v9_0_set_irq_funcs(adev);
 	gfx_v9_0_set_gds_init(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index da9765ff45d6..bbede09983e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -30,6 +30,8 @@
 #include "hdp/hdp_5_0_0_sh_mask.h"
 #include "gc/gc_10_1_0_sh_mask.h"
 #include "mmhub/mmhub_2_0_0_sh_mask.h"
+#include "athub/athub_2_0_0_sh_mask.h"
+#include "athub/athub_2_0_0_offset.h"
 #include "dcn/dcn_2_0_0_offset.h"
 #include "dcn/dcn_2_0_0_sh_mask.h"
 #include "oss/osssys_5_0_0_offset.h"
@@ -37,6 +39,7 @@
 #include "navi10_enum.h"
 
 #include "soc15.h"
+#include "soc15d.h"
 #include "soc15_common.h"
 
 #include "nbio_v2_3.h"
@@ -234,6 +237,19 @@ static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev,
 		(!amdgpu_sriov_vf(adev)));
 }
 
+static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(
+					struct amdgpu_device *adev,
+					uint8_t vmid, uint16_t *p_pasid)
+{
+	uint32_t value;
+
+	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+		     + vmid);
+	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+
+	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
+}
+
 /*
  * GART
  * VMID 0 is the physical GPU addresses as used by the kernel.
@@ -380,6 +396,63 @@ error_alloc:
 	DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
 }
 
+/**
+ * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+					uint16_t pasid, uint32_t flush_type,
+					bool all_hub)
+{
+	int vmid, i;
+	signed long r;
+	uint32_t seq;
+	uint16_t queried_pasid;
+	bool ret;
+	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+
+	if (amdgpu_emu_mode == 0 && ring->sched.ready) {
+		spin_lock(&adev->gfx.kiq.ring_lock);
+		amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size);
+		kiq->pmf->kiq_invalidate_tlbs(ring,
+					pasid, flush_type, all_hub);
+		amdgpu_fence_emit_polling(ring, &seq);
+		amdgpu_ring_commit(ring);
+		spin_unlock(&adev->gfx.kiq.ring_lock);
+		r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+		if (r < 1) {
+			DRM_ERROR("wait for kiq fence error: %ld.\n", r);
+			return -ETIME;
+		}
+
+		return 0;
+	}
+
+	for (vmid = 1; vmid < 16; vmid++) {
+
+		ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
+				&queried_pasid);
+		if (ret	&& queried_pasid == pasid) {
+			if (all_hub) {
+				for (i = 0; i < adev->num_vmhubs; i++)
+					gmc_v10_0_flush_gpu_tlb(adev, vmid,
+							i, 0);
+			} else {
+				gmc_v10_0_flush_gpu_tlb(adev, vmid,
+						AMDGPU_GFXHUB_0, 0);
+			}
+			break;
+		}
+	}
+
+	return 0;
+}
+
 static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
 					     unsigned vmid, uint64_t pd_addr)
 {
@@ -531,6 +604,7 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
 
 static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = {
 	.flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb,
+	.flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid,
 	.emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb,
 	.emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping,
 	.map_mtype = gmc_v10_0_map_mtype,
@@ -566,6 +640,13 @@ static int gmc_v10_0_late_init(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	int r;
 
+	/*
+	 * Can't free the stolen VGA memory when it might be used for memory
+	 * training again.
+	 */
+	if (!adev->fw_vram_usage.mem_train_support)
+		amdgpu_bo_late_init(adev);
+
 	r = amdgpu_gmc_allocate_vm_inv_eng(adev);
 	if (r)
 		return r;
@@ -720,6 +801,10 @@ static int gmc_v10_0_sw_init(void *handle)
 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC,
 			      VMC_1_0__SRCID__VM_FAULT,
 			      &adev->gmc.vm_fault);
+
+	if (r)
+		return r;
+
 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2,
 			      UTCL2_1_0__SRCID__FAULT,
 			      &adev->gmc.vm_fault);
@@ -732,15 +817,6 @@ static int gmc_v10_0_sw_init(void *handle)
 	 */
 	adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
 
-	/*
-	 * Reserve 8M stolen memory for navi10 like vega10
-	 * TODO: will check if it's really needed on asic.
-	 */
-	if (amdgpu_emu_mode == 1)
-		adev->gmc.stolen_size = 0;
-	else
-		adev->gmc.stolen_size = 9 * 1024 *1024;
-
 	r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
 	if (r) {
 		printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
@@ -753,6 +829,19 @@ static int gmc_v10_0_sw_init(void *handle)
 
 	adev->gmc.stolen_size = gmc_v10_0_get_vbios_fb_size(adev);
 
+	/*
+	 * In dual GPUs scenario, stolen_size is assigned to zero on the
+	 * secondary GPU, since there is no pre-OS console using that memory.
+	 * Then the bottom region of VRAM was allocated as GTT, unfortunately a
+	 * small region of bottom VRAM was encroached by UMC firmware during
+	 * GDDR6 BIST training, this cause page fault.
+	 * The page fault can be fixed by forcing stolen_size to 3MB, then the
+	 * bottom region of VRAM was allocated as stolen memory, GTT corruption
+	 * avoid.
+	 */
+	adev->gmc.stolen_size = max(adev->gmc.stolen_size,
+				    AMDGPU_STOLEN_BIST_TRAINING_DEFAULT_SIZE);
+
 	/* Memory manager */
 	r = amdgpu_bo_init(adev);
 	if (r)
@@ -792,6 +881,13 @@ static void gmc_v10_0_gart_fini(struct amdgpu_device *adev)
 static int gmc_v10_0_sw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	void *stolen_vga_buf;
+
+	/*
+	 * Free the stolen memory if it wasn't already freed in late_init
+	 * because of memory training.
+	 */
+	amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, &stolen_vga_buf);
 
 	amdgpu_vm_manager_fini(adev);
 	gmc_v10_0_gart_fini(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index f08e5330642d..19d5b133e1d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -418,6 +418,38 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
 	return 0;
 }
 
+/**
+ * gmc_v7_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+					uint16_t pasid, uint32_t flush_type,
+					bool all_hub)
+{
+	int vmid;
+	unsigned int tmp;
+
+	if (adev->in_gpu_reset)
+		return -EIO;
+
+	for (vmid = 1; vmid < 16; vmid++) {
+
+		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
+			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
+			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+			RREG32(mmVM_INVALIDATE_RESPONSE);
+			break;
+		}
+	}
+
+	return 0;
+}
+
 /*
  * GART
  * VMID 0 is the physical GPU addresses as used by the kernel.
@@ -1333,6 +1365,7 @@ static const struct amd_ip_funcs gmc_v7_0_ip_funcs = {
 
 static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = {
 	.flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb,
+	.flush_gpu_tlb_pasid = gmc_v7_0_flush_gpu_tlb_pasid,
 	.emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb,
 	.emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping,
 	.set_prt = gmc_v7_0_set_prt,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 6d96d40fbcb8..27d83204fa2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -620,6 +620,39 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
 	return 0;
 }
 
+/**
+ * gmc_v8_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+					uint16_t pasid, uint32_t flush_type,
+					bool all_hub)
+{
+	int vmid;
+	unsigned int tmp;
+
+	if (adev->in_gpu_reset)
+		return -EIO;
+
+	for (vmid = 1; vmid < 16; vmid++) {
+
+		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
+			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
+			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+			RREG32(mmVM_INVALIDATE_RESPONSE);
+			break;
+		}
+	}
+
+	return 0;
+
+}
+
 /*
  * GART
  * VMID 0 is the physical GPU addresses as used by the kernel.
@@ -1700,6 +1733,7 @@ static const struct amd_ip_funcs gmc_v8_0_ip_funcs = {
 
 static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = {
 	.flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb,
+	.flush_gpu_tlb_pasid = gmc_v8_0_flush_gpu_tlb_pasid,
 	.emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb,
 	.emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping,
 	.set_prt = gmc_v8_0_set_prt,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 26194ac9af98..40a496804356 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -38,10 +38,12 @@
 #include "dce/dce_12_0_sh_mask.h"
 #include "vega10_enum.h"
 #include "mmhub/mmhub_1_0_offset.h"
+#include "athub/athub_1_0_sh_mask.h"
 #include "athub/athub_1_0_offset.h"
 #include "oss/osssys_4_0_offset.h"
 
 #include "soc15.h"
+#include "soc15d.h"
 #include "soc15_common.h"
 #include "umc/umc_6_0_sh_mask.h"
 
@@ -441,6 +443,18 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
 		   adev->pdev->device == 0x15d8)));
 }
 
+static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
+					uint8_t vmid, uint16_t *p_pasid)
+{
+	uint32_t value;
+
+	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+		     + vmid);
+	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+
+	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
+}
+
 /*
  * GART
  * VMID 0 is the physical GPU addresses as used by the kernel.
@@ -539,6 +553,67 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	DRM_ERROR("Timeout waiting for VM flush ACK!\n");
 }
 
+/**
+ * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+					uint16_t pasid, uint32_t flush_type,
+					bool all_hub)
+{
+	int vmid, i;
+	signed long r;
+	uint32_t seq;
+	uint16_t queried_pasid;
+	bool ret;
+	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+
+	if (adev->in_gpu_reset)
+		return -EIO;
+
+	if (ring->sched.ready) {
+		spin_lock(&adev->gfx.kiq.ring_lock);
+		amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size);
+		kiq->pmf->kiq_invalidate_tlbs(ring,
+					pasid, flush_type, all_hub);
+		amdgpu_fence_emit_polling(ring, &seq);
+		amdgpu_ring_commit(ring);
+		spin_unlock(&adev->gfx.kiq.ring_lock);
+		r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+		if (r < 1) {
+			DRM_ERROR("wait for kiq fence error: %ld.\n", r);
+			return -ETIME;
+		}
+
+		return 0;
+	}
+
+	for (vmid = 1; vmid < 16; vmid++) {
+
+		ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
+				&queried_pasid);
+		if (ret && queried_pasid == pasid) {
+			if (all_hub) {
+				for (i = 0; i < adev->num_vmhubs; i++)
+					gmc_v9_0_flush_gpu_tlb(adev, vmid,
+							i, 0);
+			} else {
+				gmc_v9_0_flush_gpu_tlb(adev, vmid,
+						AMDGPU_GFXHUB_0, 0);
+			}
+			break;
+		}
+	}
+
+	return 0;
+
+}
+
 static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
 					    unsigned vmid, uint64_t pd_addr)
 {
@@ -700,6 +775,7 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
 
 static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
 	.flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
+	.flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
 	.emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
 	.emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
 	.map_mtype = gmc_v9_0_map_mtype,
@@ -817,8 +893,8 @@ static int gmc_v9_0_late_init(void *handle)
 			r = amdgpu_atomfirmware_mem_ecc_supported(adev);
 			if (!r) {
 				DRM_INFO("ECC is not present.\n");
-				if (adev->df_funcs->enable_ecc_force_par_wr_rmw)
-					adev->df_funcs->enable_ecc_force_par_wr_rmw(adev, false);
+				if (adev->df.funcs->enable_ecc_force_par_wr_rmw)
+					adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false);
 			} else {
 				DRM_INFO("ECC is active.\n");
 			}
@@ -1023,7 +1099,7 @@ static int gmc_v9_0_sw_init(void *handle)
 		else
 			chansize = 128;
 
-		numchan = adev->df_funcs->get_hbm_channel_number(adev);
+		numchan = adev->df.funcs->get_hbm_channel_number(adev);
 		adev->gmc.vram_width = numchan * chansize;
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index b0229543e887..2e0f8933410e 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -478,7 +478,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
 		amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block);
 		amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
-		    is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev))
+		    !amdgpu_sriov_vf(adev))
 			amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
 		if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
 			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
@@ -489,7 +489,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
 		amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block);
 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT &&
-		    is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev))
+		    !amdgpu_sriov_vf(adev))
 			amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block);
@@ -502,7 +502,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
 		amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block);
 		amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
-		    is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev))
+		    !amdgpu_sriov_vf(adev))
 			amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
 		if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
 			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
@@ -513,7 +513,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
 		amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block);
 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT &&
-		    is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev))
+		    !amdgpu_sriov_vf(adev))
 			amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block);
@@ -726,6 +726,12 @@ static int nv_common_early_init(void *handle)
 			AMD_PG_SUPPORT_VCN_DPG |
 			AMD_PG_SUPPORT_JPEG |
 			AMD_PG_SUPPORT_ATHUB;
+		/* guest vm gets 0xffffffff when reading RCC_DEV0_EPF0_STRAP0,
+		 * as a consequence, the rev_id and external_rev_id are wrong.
+		 * workaround it by hardcoding rev_id to 0 (default value).
+		 */
+		if (amdgpu_sriov_vf(adev))
+			adev->rev_id = 0;
 		adev->external_rev_id = adev->rev_id + 0xa;
 		break;
 	default:
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
index a57f3d737677..685dd9754c67 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -43,10 +43,13 @@ MODULE_FIRMWARE("amdgpu/vega20_asd.bin");
 MODULE_FIRMWARE("amdgpu/vega20_ta.bin");
 MODULE_FIRMWARE("amdgpu/navi10_sos.bin");
 MODULE_FIRMWARE("amdgpu/navi10_asd.bin");
+MODULE_FIRMWARE("amdgpu/navi10_ta.bin");
 MODULE_FIRMWARE("amdgpu/navi14_sos.bin");
 MODULE_FIRMWARE("amdgpu/navi14_asd.bin");
+MODULE_FIRMWARE("amdgpu/navi14_ta.bin");
 MODULE_FIRMWARE("amdgpu/navi12_sos.bin");
 MODULE_FIRMWARE("amdgpu/navi12_asd.bin");
+MODULE_FIRMWARE("amdgpu/navi12_ta.bin");
 MODULE_FIRMWARE("amdgpu/arcturus_sos.bin");
 MODULE_FIRMWARE("amdgpu/arcturus_asd.bin");
 MODULE_FIRMWARE("amdgpu/arcturus_ta.bin");
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index ce0753a9d241..27c7001be1ee 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -82,6 +82,7 @@ static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev);
 static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev);
 static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev);
 static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev);
+static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev);
 
 static const struct soc15_reg_golden golden_settings_sdma_4[] = {
 	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
@@ -254,7 +255,106 @@ static const struct soc15_reg_golden golden_settings_sdma_4_3[] = {
 	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
 	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
 	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
-	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x00000000)
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x03fbe1fe)
+};
+
+static const struct soc15_ras_field_entry sdma_v4_0_ras_fields[] = {
+	{ "SDMA_UCODE_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UCODE_BUF_SED),
+	0, 0,
+	},
+	{ "SDMA_RB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_RB_CMD_BUF_SED),
+	0, 0,
+	},
+	{ "SDMA_IB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_IB_CMD_BUF_SED),
+	0, 0,
+	},
+	{ "SDMA_UTCL1_RD_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UTCL1_RD_FIFO_SED),
+	0, 0,
+	},
+	{ "SDMA_UTCL1_RDBST_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UTCL1_RDBST_FIFO_SED),
+	0, 0,
+	},
+	{ "SDMA_DATA_LUT_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_DATA_LUT_FIFO_SED),
+	0, 0,
+	},
+	{ "SDMA_MBANK_DATA_BUF0_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF0_SED),
+	0, 0,
+	},
+	{ "SDMA_MBANK_DATA_BUF1_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF1_SED),
+	0, 0,
+	},
+	{ "SDMA_MBANK_DATA_BUF2_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF2_SED),
+	0, 0,
+	},
+	{ "SDMA_MBANK_DATA_BUF3_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF3_SED),
+	0, 0,
+	},
+	{ "SDMA_MBANK_DATA_BUF4_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF4_SED),
+	0, 0,
+	},
+	{ "SDMA_MBANK_DATA_BUF5_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF5_SED),
+	0, 0,
+	},
+	{ "SDMA_MBANK_DATA_BUF6_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF6_SED),
+	0, 0,
+	},
+	{ "SDMA_MBANK_DATA_BUF7_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF7_SED),
+	0, 0,
+	},
+	{ "SDMA_MBANK_DATA_BUF8_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF8_SED),
+	0, 0,
+	},
+	{ "SDMA_MBANK_DATA_BUF9_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF9_SED),
+	0, 0,
+	},
+	{ "SDMA_MBANK_DATA_BUF10_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF10_SED),
+	0, 0,
+	},
+	{ "SDMA_MBANK_DATA_BUF11_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF11_SED),
+	0, 0,
+	},
+	{ "SDMA_MBANK_DATA_BUF12_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF12_SED),
+	0, 0,
+	},
+	{ "SDMA_MBANK_DATA_BUF13_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF13_SED),
+	0, 0,
+	},
+	{ "SDMA_MBANK_DATA_BUF14_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF14_SED),
+	0, 0,
+	},
+	{ "SDMA_MBANK_DATA_BUF15_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF15_SED),
+	0, 0,
+	},
+	{ "SDMA_SPLIT_DAT_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_SPLIT_DAT_BUF_SED),
+	0, 0,
+	},
+	{ "SDMA_MC_WR_ADDR_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MC_WR_ADDR_FIFO_SED),
+	0, 0,
+	},
 };
 
 static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev,
@@ -1686,6 +1786,7 @@ static int sdma_v4_0_early_init(void *handle)
 	sdma_v4_0_set_buffer_funcs(adev);
 	sdma_v4_0_set_vm_pte_funcs(adev);
 	sdma_v4_0_set_irq_funcs(adev);
+	sdma_v4_0_set_ras_funcs(adev);
 
 	return 0;
 }
@@ -1700,8 +1801,18 @@ static int sdma_v4_0_late_init(void *handle)
 	struct ras_ih_if ih_info = {
 		.cb = sdma_v4_0_process_ras_data_cb,
 	};
+	int i;
 
-	return amdgpu_sdma_ras_late_init(adev, &ih_info);
+	/* read back edc counter registers to clear the counters */
+	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
+		for (i = 0; i < adev->sdma.num_instances; i++)
+			RREG32_SDMA(i, mmSDMA0_EDC_COUNTER);
+	}
+
+	if (adev->sdma.funcs && adev->sdma.funcs->ras_late_init)
+		return adev->sdma.funcs->ras_late_init(adev, &ih_info);
+	else
+		return 0;
 }
 
 static int sdma_v4_0_sw_init(void *handle)
@@ -1773,7 +1884,8 @@ static int sdma_v4_0_sw_fini(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	int i;
 
-	amdgpu_sdma_ras_fini(adev);
+	if (adev->sdma.funcs && adev->sdma.funcs->ras_fini)
+		adev->sdma.funcs->ras_fini(adev);
 
 	for (i = 0; i < adev->sdma.num_instances; i++) {
 		amdgpu_ring_fini(&adev->sdma.instance[i].ring);
@@ -2414,6 +2526,70 @@ static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev)
 	adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
 }
 
+static void sdma_v4_0_get_ras_error_count(uint32_t value,
+					uint32_t instance,
+					uint32_t *sec_count)
+{
+	uint32_t i;
+	uint32_t sec_cnt;
+
+	/* double bits error (multiple bits) error detection is not supported */
+	for (i = 0; i < ARRAY_SIZE(sdma_v4_0_ras_fields); i++) {
+		/* the SDMA_EDC_COUNTER register in each sdma instance
+		 * shares the same sed shift_mask
+		 * */
+		sec_cnt = (value &
+			sdma_v4_0_ras_fields[i].sec_count_mask) >>
+			sdma_v4_0_ras_fields[i].sec_count_shift;
+		if (sec_cnt) {
+			DRM_INFO("Detected %s in SDMA%d, SED %d\n",
+				sdma_v4_0_ras_fields[i].name,
+				instance, sec_cnt);
+			*sec_count += sec_cnt;
+		}
+	}
+}
+
+static int sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev,
+			uint32_t instance, void *ras_error_status)
+{
+	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+	uint32_t sec_count = 0;
+	uint32_t reg_value = 0;
+
+	reg_value = RREG32_SDMA(instance, mmSDMA0_EDC_COUNTER);
+	/* double bit error is not supported */
+	if (reg_value)
+		sdma_v4_0_get_ras_error_count(reg_value,
+				instance, &sec_count);
+	/* err_data->ce_count should be initialized to 0
+	 * before calling into this function */
+	err_data->ce_count += sec_count;
+	/* double bit error is not supported
+	 * set ue count to 0 */
+	err_data->ue_count = 0;
+
+	return 0;
+};
+
+static const struct amdgpu_sdma_ras_funcs sdma_v4_0_ras_funcs = {
+	.ras_late_init = amdgpu_sdma_ras_late_init,
+	.ras_fini = amdgpu_sdma_ras_fini,
+	.query_ras_error_count = sdma_v4_0_query_ras_error_count,
+};
+
+static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev)
+{
+	switch (adev->asic_type) {
+	case CHIP_VEGA20:
+	case CHIP_ARCTURUS:
+		adev->sdma.funcs = &sdma_v4_0_ras_funcs;
+		break;
+	default:
+		break;
+	}
+}
+
 const struct amdgpu_ip_block_version sdma_v4_0_ip_block = {
 	.type = AMD_IP_BLOCK_TYPE_SDMA,
 	.major = 4,
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 714cf4dfd0a7..317803f6a561 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -479,62 +479,18 @@ static int soc15_asic_mode1_reset(struct amdgpu_device *adev)
 	return ret;
 }
 
-static int soc15_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap)
-{
-	if (is_support_sw_smu(adev)) {
-		struct smu_context *smu = &adev->smu;
-
-		*cap = smu_baco_is_support(smu);
-		return 0;
-	} else {
-		void *pp_handle = adev->powerplay.pp_handle;
-		const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
-
-		if (!pp_funcs || !pp_funcs->get_asic_baco_capability) {
-			*cap = false;
-			return -ENOENT;
-		}
-
-		return pp_funcs->get_asic_baco_capability(pp_handle, cap);
-	}
-}
-
 static int soc15_asic_baco_reset(struct amdgpu_device *adev)
 {
 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+	int ret = 0;
 
 	/* avoid NBIF got stuck when do RAS recovery in BACO reset */
 	if (ras && ras->supported)
 		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
 
-	dev_info(adev->dev, "GPU BACO reset\n");
-
-	if (is_support_sw_smu(adev)) {
-		struct smu_context *smu = &adev->smu;
-		int ret;
-
-		ret = smu_baco_enter(smu);
-		if (ret)
-			return ret;
-
-		ret = smu_baco_exit(smu);
-		if (ret)
-			return ret;
-	} else {
-		void *pp_handle = adev->powerplay.pp_handle;
-		const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
-
-		if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state)
-			return -ENOENT;
-
-		/* enter BACO state */
-		if (pp_funcs->set_asic_baco_state(pp_handle, 1))
-			return -EIO;
-
-		/* exit BACO state */
-		if (pp_funcs->set_asic_baco_state(pp_handle, 0))
-			return -EIO;
-	}
+	ret = amdgpu_dpm_baco_reset(adev);
+	if (ret)
+		return ret;
 
 	/* re-enable doorbell interrupt after BACO exit */
 	if (ras && ras->supported)
@@ -543,17 +499,6 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev)
 	return 0;
 }
 
-static int soc15_mode2_reset(struct amdgpu_device *adev)
-{
-	if (is_support_sw_smu(adev))
-		return smu_mode2_reset(&adev->smu);
-	if (!adev->powerplay.pp_funcs ||
-	    !adev->powerplay.pp_funcs->asic_reset_mode_2)
-		return -ENOENT;
-
-	return adev->powerplay.pp_funcs->asic_reset_mode_2(adev->powerplay.pp_handle);
-}
-
 static enum amd_reset_method
 soc15_asic_reset_method(struct amdgpu_device *adev)
 {
@@ -567,11 +512,11 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
 	case CHIP_ARCTURUS:
-		soc15_asic_get_baco_capability(adev, &baco_reset);
+		baco_reset = amdgpu_dpm_is_baco_supported(adev);
 		break;
 	case CHIP_VEGA20:
 		if (adev->psp.sos_fw_version >= 0x80067)
-			soc15_asic_get_baco_capability(adev, &baco_reset);
+			baco_reset = amdgpu_dpm_is_baco_supported(adev);
 
 		/*
 		 * 1. PMFW version > 0x284300: all cases use baco
@@ -598,7 +543,7 @@ static int soc15_asic_reset(struct amdgpu_device *adev)
 				amdgpu_inc_vram_lost(adev);
 			return soc15_asic_baco_reset(adev);
 		case AMD_RESET_METHOD_MODE2:
-			return soc15_mode2_reset(adev);
+			return amdgpu_dpm_mode2_reset(adev);
 		default:
 			if (!adev->in_suspend)
 				amdgpu_inc_vram_lost(adev);
@@ -608,25 +553,18 @@ static int soc15_asic_reset(struct amdgpu_device *adev)
 
 static bool soc15_supports_baco(struct amdgpu_device *adev)
 {
-	bool baco_support;
-
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
 	case CHIP_ARCTURUS:
-		soc15_asic_get_baco_capability(adev, &baco_support);
-		break;
+		return amdgpu_dpm_is_baco_supported(adev);
 	case CHIP_VEGA20:
 		if (adev->psp.sos_fw_version >= 0x80067)
-			soc15_asic_get_baco_capability(adev, &baco_support);
-		else
-			baco_support = false;
-		break;
+			return amdgpu_dpm_is_baco_supported(adev);
+		return false;
 	default:
 		return false;
 	}
-
-	return baco_support;
 }
 
 /*static int soc15_set_uvd_clock(struct amdgpu_device *adev, u32 clock,
@@ -739,9 +677,9 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
 	}
 
 	if (adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS)
-		adev->df_funcs = &df_v3_6_funcs;
+		adev->df.funcs = &df_v3_6_funcs;
 	else
-		adev->df_funcs = &df_v1_7_funcs;
+		adev->df.funcs = &df_v1_7_funcs;
 
 	adev->rev_id = soc15_get_rev_id(adev);
 	adev->nbio.funcs->detect_hw_virt(adev);
@@ -834,8 +772,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
 			if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP))
 				amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block);
 		} else {
-			if (unlikely(adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT))
-				amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block);
+			amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block);
 		}
 		if (!amdgpu_sriov_vf(adev))
 			amdgpu_device_ip_block_add(adev, &jpeg_v2_5_ip_block);
@@ -846,8 +783,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
 		amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
 		if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP))
 			amdgpu_device_ip_block_add(adev, &psp_v12_0_ip_block);
-		if (is_support_sw_smu(adev))
-			amdgpu_device_ip_block_add(adev, &smu_v12_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &smu_v12_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
 		if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
@@ -1311,7 +1247,7 @@ static int soc15_common_sw_init(void *handle)
 	if (amdgpu_sriov_vf(adev))
 		xgpu_ai_mailbox_add_irq_id(adev);
 
-	adev->df_funcs->sw_init(adev);
+	adev->df.funcs->sw_init(adev);
 
 	return 0;
 }
@@ -1321,7 +1257,7 @@ static int soc15_common_sw_fini(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	amdgpu_nbio_ras_fini(adev);
-	adev->df_funcs->sw_fini(adev);
+	adev->df.funcs->sw_fini(adev);
 	return 0;
 }
 
@@ -1542,7 +1478,7 @@ static int soc15_common_set_clockgating_state(void *handle,
 				state == AMD_CG_STATE_GATE ? true : false);
 		soc15_update_rom_medium_grain_clock_gating(adev,
 				state == AMD_CG_STATE_GATE ? true : false);
-		adev->df_funcs->update_medium_grain_clock_gating(adev,
+		adev->df.funcs->update_medium_grain_clock_gating(adev,
 				state == AMD_CG_STATE_GATE ? true : false);
 		break;
 	case CHIP_RAVEN:
@@ -1600,7 +1536,7 @@ static void soc15_common_get_clockgating_state(void *handle, u32 *flags)
 	if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK))
 		*flags |= AMD_CG_SUPPORT_ROM_MGCG;
 
-	adev->df_funcs->get_clockgating_state(adev, flags);
+	adev->df.funcs->get_clockgating_state(adev, flags);
 }
 
 static int soc15_common_set_powergating_state(void *handle,
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
index 11e924dd88ff..793bf70e64b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
@@ -30,8 +30,6 @@
 #include "umc/umc_6_1_1_sh_mask.h"
 #include "umc/umc_6_1_2_offset.h"
 
-#define smnMCA_UMC0_MCUMC_ADDRT0	0x50f10
-
 #define UMC_6_INST_DIST			0x40000
 
 /*
@@ -56,12 +54,30 @@ const uint32_t
 		{9, 25, 0, 16},		{15, 31, 6, 22}
 };
 
+static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev)
+{
+	WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
+			RSMU_UMC_INDEX_MODE_EN, 1);
+}
+
 static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev)
 {
 	WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
 			RSMU_UMC_INDEX_MODE_EN, 0);
 }
 
+static uint32_t umc_v6_1_get_umc_index_mode_state(struct amdgpu_device *adev)
+{
+	uint32_t rsmu_umc_index;
+
+	rsmu_umc_index = RREG32_SOC15(RSMU, 0,
+			mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU);
+
+	return REG_GET_FIELD(rsmu_umc_index,
+			RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
+			RSMU_UMC_INDEX_MODE_EN);
+}
+
 static inline uint32_t get_umc_6_reg_offset(struct amdgpu_device *adev,
 					    uint32_t umc_inst,
 					    uint32_t ch_inst)
@@ -165,6 +181,11 @@ static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev,
 	uint32_t ch_inst         = 0;
 	uint32_t umc_reg_offset  = 0;
 
+	uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev);
+
+	if (rsmu_umc_index_state)
+		umc_v6_1_disable_umc_index_mode(adev);
+
 	LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
 		umc_reg_offset = get_umc_6_reg_offset(adev,
 						      umc_inst,
@@ -177,6 +198,9 @@ static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev,
 							  umc_reg_offset,
 							  &(err_data->ue_count));
 	}
+
+	if (rsmu_umc_index_state)
+		umc_v6_1_enable_umc_index_mode(adev);
 }
 
 static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
@@ -186,7 +210,7 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
 					 uint32_t umc_inst)
 {
 	uint32_t lsb, mc_umc_status_addr;
-	uint64_t mc_umc_status, err_addr, retired_page;
+	uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
 	struct eeprom_table_record *err_rec;
 	uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
 
@@ -194,10 +218,14 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
 		/* UMC 6_1_2 registers */
 		mc_umc_status_addr =
 			SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT);
+		mc_umc_addrt0 =
+			SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0_ARCT);
 	} else {
 		/* UMC 6_1_1 registers */
 		mc_umc_status_addr =
 			SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+		mc_umc_addrt0 =
+			SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0);
 	}
 
 	/* skip error address process if -ENOMEM */
@@ -214,8 +242,8 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
 	if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
 	    (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
 	    REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
-		err_addr = RREG64_PCIE(smnMCA_UMC0_MCUMC_ADDRT0 + umc_reg_offset * 4);
 
+		err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
 		/* the lowest lsb bits should be ignored */
 		lsb = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, LSB);
 		err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
@@ -255,6 +283,11 @@ static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev,
 	uint32_t ch_inst         = 0;
 	uint32_t umc_reg_offset  = 0;
 
+	uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev);
+
+	if (rsmu_umc_index_state)
+		umc_v6_1_disable_umc_index_mode(adev);
+
 	LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
 		umc_reg_offset = get_umc_6_reg_offset(adev,
 						      umc_inst,
@@ -267,6 +300,8 @@ static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev,
 					     umc_inst);
 	}
 
+	if (rsmu_umc_index_state)
+		umc_v6_1_enable_umc_index_mode(adev);
 }
 
 static void umc_v6_1_err_cnt_init_per_channel(struct amdgpu_device *adev,
@@ -313,7 +348,10 @@ static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev)
 	uint32_t ch_inst         = 0;
 	uint32_t umc_reg_offset  = 0;
 
-	umc_v6_1_disable_umc_index_mode(adev);
+	uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev);
+
+	if (rsmu_umc_index_state)
+		umc_v6_1_disable_umc_index_mode(adev);
 
 	LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
 		umc_reg_offset = get_umc_6_reg_offset(adev,
@@ -322,6 +360,9 @@ static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev)
 
 		umc_v6_1_err_cnt_init_per_channel(adev, umc_reg_offset);
 	}
+
+	if (rsmu_umc_index_state)
+		umc_v6_1_enable_umc_index_mode(adev);
 }
 
 const struct amdgpu_umc_funcs umc_v6_1_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index 3b025a3f8c7d..e654938f6cca 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -39,10 +39,10 @@
 #include "ivsrcid/vcn/irqsrcs_vcn_1_0.h"
 #include "jpeg_v1_0.h"
 
-#define mmUVD_RBC_XX_IB_REG_CHECK				0x05ab
-#define mmUVD_RBC_XX_IB_REG_CHECK_BASE_IDX	1
-#define mmUVD_REG_XX_MASK							0x05ac
-#define mmUVD_REG_XX_MASK_BASE_IDX				1
+#define mmUVD_RBC_XX_IB_REG_CHECK_1_0		0x05ab
+#define mmUVD_RBC_XX_IB_REG_CHECK_1_0_BASE_IDX	1
+#define mmUVD_REG_XX_MASK_1_0			0x05ac
+#define mmUVD_REG_XX_MASK_1_0_BASE_IDX		1
 
 static int vcn_v1_0_stop(struct amdgpu_device *adev);
 static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev);
@@ -50,7 +50,7 @@ static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev);
 static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev);
 static int vcn_v1_0_set_powergating_state(void *handle, enum amd_powergating_state state);
 static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
-				struct dpg_pause_state *new_state);
+				int inst_idx, struct dpg_pause_state *new_state);
 
 static void vcn_v1_0_idle_work_handler(struct work_struct *work);
 
@@ -835,9 +835,9 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev)
 
 	vcn_v1_0_mc_resume_spg_mode(adev);
 
-	WREG32_SOC15(UVD, 0, mmUVD_REG_XX_MASK, 0x10);
-	WREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK,
-		RREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK) | 0x3);
+	WREG32_SOC15(UVD, 0, mmUVD_REG_XX_MASK_1_0, 0x10);
+	WREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK_1_0,
+		RREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK_1_0) | 0x3);
 
 	/* enable VCPU clock */
 	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, UVD_VCPU_CNTL__CLK_EN_MASK);
@@ -1199,7 +1199,7 @@ static int vcn_v1_0_stop(struct amdgpu_device *adev)
 }
 
 static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
-				struct dpg_pause_state *new_state)
+				int inst_idx, struct dpg_pause_state *new_state)
 {
 	int ret_code;
 	uint32_t reg_data = 0;
@@ -1786,7 +1786,7 @@ static void vcn_v1_0_idle_work_handler(struct work_struct *work)
 		else
 			new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
 
-		adev->vcn.pause_dpg_mode(adev, &new_state);
+		adev->vcn.pause_dpg_mode(adev, 0, &new_state);
 	}
 
 	fences += amdgpu_fence_count_emitted(&adev->jpeg.inst->ring_dec);
@@ -1840,7 +1840,7 @@ void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring)
 		else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)
 			new_state.jpeg = VCN_DPG_STATE__PAUSE;
 
-		adev->vcn.pause_dpg_mode(adev, &new_state);
+		adev->vcn.pause_dpg_mode(adev, 0, &new_state);
 	}
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
index d76ece38c97b..f4db8af6536b 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
@@ -47,18 +47,13 @@
 #define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET		0x5a7
 #define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET			0x1e2
 
-#define mmUVD_RBC_XX_IB_REG_CHECK 					0x026b
-#define mmUVD_RBC_XX_IB_REG_CHECK_BASE_IDX 				1
-#define mmUVD_REG_XX_MASK 						0x026c
-#define mmUVD_REG_XX_MASK_BASE_IDX 					1
-
 static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev);
 static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev);
 static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev);
 static int vcn_v2_0_set_powergating_state(void *handle,
 				enum amd_powergating_state state);
 static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
-				struct dpg_pause_state *new_state);
+				int inst_idx, struct dpg_pause_state *new_state);
 
 /**
  * vcn_v2_0_early_init - set function pointers
@@ -356,88 +351,88 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec
 	/* cache window 0: fw */
 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 		if (!indirect) {
-			WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+			WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 				UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
 				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo), 0, indirect);
-			WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+			WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 				UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
 				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi), 0, indirect);
-			WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+			WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 				UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
 		} else {
-			WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+			WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 				UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
-			WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+			WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 				UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
-			WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+			WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 				UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
 		}
 		offset = 0;
 	} else {
-		WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+		WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 			UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
 			lower_32_bits(adev->vcn.inst->gpu_addr), 0, indirect);
-		WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+		WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 			UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
 			upper_32_bits(adev->vcn.inst->gpu_addr), 0, indirect);
 		offset = size;
-		WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+		WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 			UVD, 0, mmUVD_VCPU_CACHE_OFFSET0),
 			AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
 	}
 
 	if (!indirect)
-		WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+		WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 			UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
 	else
-		WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+		WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 			UVD, 0, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
 
 	/* cache window 1: stack */
 	if (!indirect) {
-		WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+		WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 			UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
 			lower_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect);
-		WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+		WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 			UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
 			upper_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect);
-		WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+		WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 			UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
 	} else {
-		WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+		WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 			UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
-		WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+		WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 			UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
-		WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+		WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 			UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
 	}
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
 
 	/* cache window 2: context */
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
 		lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
 		upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
 
 	/* non-cache window */
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 0, 0, indirect);
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 0, 0, indirect);
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0), 0, 0, indirect);
 
 	/* VCN global tiling registers */
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
 }
 
@@ -583,19 +578,19 @@ static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_device *adev,
 		 UVD_CGC_CTRL__WCB_MODE_MASK |
 		 UVD_CGC_CTRL__VCPU_MODE_MASK |
 		 UVD_CGC_CTRL__SCPU_MODE_MASK);
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect);
 
 	/* turn off clock gating */
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_CGC_GATE), 0, sram_sel, indirect);
 
 	/* turn on SUVD clock gating */
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
 
 	/* turn on sw mode in UVD_SUVD_CGC_CTRL */
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
 }
 
@@ -759,7 +754,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
 	WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, tmp);
 
 	if (indirect)
-		adev->vcn.dpg_sram_curr_addr = (uint32_t*)adev->vcn.dpg_sram_cpu_addr;
+		adev->vcn.inst->dpg_sram_curr_addr = (uint32_t*)adev->vcn.inst->dpg_sram_cpu_addr;
 
 	/* enable clock gating */
 	vcn_v2_0_clock_gating_dpg_mode(adev, 0, indirect);
@@ -768,11 +763,11 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
 	tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
 	tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
 	tmp |= UVD_VCPU_CNTL__MIF_WR_LOW_THRESHOLD_BP_MASK;
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect);
 
 	/* disable master interupt */
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_MASTINT_EN), 0, 0, indirect);
 
 	/* setup mmUVD_LMI_CTRL */
@@ -784,28 +779,28 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
 		UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
 		(8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
 		0x00100000L);
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_LMI_CTRL), tmp, 0, indirect);
 
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_MPC_CNTL),
 		0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
 
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_MPC_SET_MUXA0),
 		((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
 		 (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
 		 (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
 		 (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
 
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_MPC_SET_MUXB0),
 		((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
 		 (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
 		 (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
 		 (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
 
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_MPC_SET_MUX),
 		((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
 		 (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
@@ -813,29 +808,29 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
 
 	vcn_v2_0_mc_resume_dpg_mode(adev, indirect);
 
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect);
 
 	/* release VCPU reset to boot */
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_SOFT_RESET), 0, 0, indirect);
 
 	/* enable LMI MC and UMC channels */
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_LMI_CTRL2),
 		0x1F << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT, 0, indirect);
 
 	/* enable master interrupt */
-	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_MASTINT_EN),
 		UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
 
 	if (indirect)
-		psp_update_vcn_sram(adev, 0, adev->vcn.dpg_sram_gpu_addr,
-				    (uint32_t)((uintptr_t)adev->vcn.dpg_sram_curr_addr -
-					       (uintptr_t)adev->vcn.dpg_sram_cpu_addr));
+		psp_update_vcn_sram(adev, 0, adev->vcn.inst->dpg_sram_gpu_addr,
+				    (uint32_t)((uintptr_t)adev->vcn.inst->dpg_sram_curr_addr -
+					       (uintptr_t)adev->vcn.inst->dpg_sram_cpu_addr));
 
 	/* force RBC into idle state */
 	rb_bufsz = order_base_2(ring->ring_size);
@@ -1135,7 +1130,7 @@ power_off:
 }
 
 static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
-				struct dpg_pause_state *new_state)
+				int inst_idx, struct dpg_pause_state *new_state)
 {
 	struct amdgpu_ring *ring;
 	uint32_t reg_data = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
index 4ea8e20ed15d..c8b63d57a541 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
@@ -55,6 +55,8 @@ static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev);
 static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev);
 static int vcn_v2_5_set_powergating_state(void *handle,
 				enum amd_powergating_state state);
+static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
+				int inst_idx, struct dpg_pause_state *new_state);
 static int vcn_v2_5_sriov_start(struct amdgpu_device *adev);
 
 static int amdgpu_ih_clientid_vcns[] = {
@@ -212,6 +214,9 @@ static int vcn_v2_5_sw_init(void *handle)
 			return r;
 	}
 
+	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+		adev->vcn.pause_dpg_mode = vcn_v2_5_pause_dpg_mode;
+
 	return 0;
 }
 
@@ -286,7 +291,8 @@ static int vcn_v2_5_hw_init(void *handle)
 
 done:
 	if (!r)
-		DRM_INFO("VCN decode and encode initialized successfully.\n");
+		DRM_INFO("VCN decode and encode initialized successfully(under %s).\n",
+			(adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
 
 	return r;
 }
@@ -309,7 +315,9 @@ static int vcn_v2_5_hw_fini(void *handle)
 			continue;
 		ring = &adev->vcn.inst[i].ring_dec;
 
-		if (RREG32_SOC15(VCN, i, mmUVD_STATUS))
+		if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
+		    (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
+		     RREG32_SOC15(VCN, i, mmUVD_STATUS)))
 			vcn_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
 
 		ring->sched.ready = false;
@@ -384,9 +392,9 @@ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev)
 		/* cache window 0: fw */
 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 			WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
-				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo));
+				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo));
 			WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
-				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi));
+				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi));
 			WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, 0);
 			offset = 0;
 		} else {
@@ -418,6 +426,99 @@ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev)
 	}
 }
 
+static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+{
+	uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+	uint32_t offset;
+
+	/* cache window 0: fw */
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+		if (!indirect) {
+			WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+				UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect);
+			WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+				UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect);
+			WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+				UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+		} else {
+			WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+				UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
+			WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+				UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
+			WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+				UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+		}
+		offset = 0;
+	} else {
+		WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+			UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+			lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+		WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+			UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+			upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+		offset = size;
+		WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+			UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET0),
+			AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
+	}
+
+	if (!indirect)
+		WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+			UVD, inst_idx, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
+	else
+		WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+			UVD, inst_idx, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
+
+	/* cache window 1: stack */
+	if (!indirect) {
+		WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+			UVD, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+			lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+		WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+			UVD, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+			upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+		WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+			UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+	} else {
+		WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+			UVD, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
+		WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+			UVD, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
+		WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+			UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+	}
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
+
+	/* cache window 2: context */
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+		lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+		upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
+
+	/* non-cache window */
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 0, 0, indirect);
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 0, 0, indirect);
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_VCPU_NONCACHE_SIZE0), 0, 0, indirect);
+
+	/* VCN global tiling registers */
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_GFX8_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
+}
+
 /**
  * vcn_v2_5_disable_clock_gating - disable VCN clock gating
  *
@@ -536,6 +637,54 @@ static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev)
 	}
 }
 
+static void vcn_v2_5_clock_gating_dpg_mode(struct amdgpu_device *adev,
+		uint8_t sram_sel, int inst_idx, uint8_t indirect)
+{
+	uint32_t reg_data = 0;
+
+	/* enable sw clock gating control */
+	if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+		reg_data = 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+	else
+		reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+	reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+	reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+	reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK |
+		 UVD_CGC_CTRL__UDEC_CM_MODE_MASK |
+		 UVD_CGC_CTRL__UDEC_IT_MODE_MASK |
+		 UVD_CGC_CTRL__UDEC_DB_MODE_MASK |
+		 UVD_CGC_CTRL__UDEC_MP_MODE_MASK |
+		 UVD_CGC_CTRL__SYS_MODE_MASK |
+		 UVD_CGC_CTRL__UDEC_MODE_MASK |
+		 UVD_CGC_CTRL__MPEG2_MODE_MASK |
+		 UVD_CGC_CTRL__REGS_MODE_MASK |
+		 UVD_CGC_CTRL__RBC_MODE_MASK |
+		 UVD_CGC_CTRL__LMI_MC_MODE_MASK |
+		 UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
+		 UVD_CGC_CTRL__IDCT_MODE_MASK |
+		 UVD_CGC_CTRL__MPRD_MODE_MASK |
+		 UVD_CGC_CTRL__MPC_MODE_MASK |
+		 UVD_CGC_CTRL__LBSI_MODE_MASK |
+		 UVD_CGC_CTRL__LRBBM_MODE_MASK |
+		 UVD_CGC_CTRL__WCB_MODE_MASK |
+		 UVD_CGC_CTRL__VCPU_MODE_MASK |
+		 UVD_CGC_CTRL__MMSCH_MODE_MASK);
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect);
+
+	/* turn off clock gating */
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_CGC_GATE), 0, sram_sel, indirect);
+
+	/* turn on SUVD clock gating */
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
+
+	/* turn on sw mode in UVD_SUVD_CGC_CTRL */
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
+}
+
 /**
  * vcn_v2_5_enable_clock_gating - enable VCN clock gating
  *
@@ -598,6 +747,138 @@ static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev)
 	}
 }
 
+static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+{
+	struct amdgpu_ring *ring;
+	uint32_t rb_bufsz, tmp;
+
+	/* disable register anti-hang mechanism */
+	WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, mmUVD_POWER_STATUS), 1,
+		~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+	/* enable dynamic power gating mode */
+	tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_POWER_STATUS);
+	tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
+	tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK;
+	WREG32_SOC15(UVD, inst_idx, mmUVD_POWER_STATUS, tmp);
+
+	if (indirect)
+		adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t*)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
+
+	/* enable clock gating */
+	vcn_v2_5_clock_gating_dpg_mode(adev, 0, inst_idx, indirect);
+
+	/* enable VCPU clock */
+	tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+	tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+	tmp |= UVD_VCPU_CNTL__BLK_RST_MASK;
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect);
+
+	/* disable master interupt */
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_MASTINT_EN), 0, 0, indirect);
+
+	/* setup mmUVD_LMI_CTRL */
+	tmp = (0x8 | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+		UVD_LMI_CTRL__REQ_MODE_MASK |
+		UVD_LMI_CTRL__CRC_RESET_MASK |
+		UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+		UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+		UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+		(8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+		0x00100000L);
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_LMI_CTRL), tmp, 0, indirect);
+
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_MPC_CNTL),
+		0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
+
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_MPC_SET_MUXA0),
+		((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+		 (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+		 (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+		 (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
+
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_MPC_SET_MUXB0),
+		((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+		 (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+		 (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+		 (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
+
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_MPC_SET_MUX),
+		((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+		 (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+		 (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
+
+	vcn_v2_5_mc_resume_dpg_mode(adev, inst_idx, indirect);
+
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect);
+
+	/* enable LMI MC and UMC channels */
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_LMI_CTRL2), 0, 0, indirect);
+
+	/* unblock VCPU register access */
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_RB_ARB_CTRL), 0, 0, indirect);
+
+	tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+	tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect);
+
+	/* enable master interrupt */
+	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+		UVD, inst_idx, mmUVD_MASTINT_EN),
+		UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
+
+	if (indirect)
+		psp_update_vcn_sram(adev, inst_idx, adev->vcn.inst[inst_idx].dpg_sram_gpu_addr,
+				    (uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr -
+					       (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr));
+
+	ring = &adev->vcn.inst[inst_idx].ring_dec;
+	/* force RBC into idle state */
+	rb_bufsz = order_base_2(ring->ring_size);
+	tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+	WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_CNTL, tmp);
+
+	/* set the write pointer delay */
+	WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR_CNTL, 0);
+
+	/* set the wb address */
+	WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR_ADDR,
+		(upper_32_bits(ring->gpu_addr) >> 2));
+
+	/* programm the RB_BASE for ring buffer */
+	WREG32_SOC15(UVD, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
+		lower_32_bits(ring->gpu_addr));
+	WREG32_SOC15(UVD, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
+		upper_32_bits(ring->gpu_addr));
+
+	/* Initialize the ring buffer's read and write pointers */
+	WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR, 0);
+
+	WREG32_SOC15(UVD, inst_idx, mmUVD_SCRATCH2, 0);
+
+	ring->wptr = RREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR);
+	WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR,
+		lower_32_bits(ring->wptr));
+
+	return 0;
+}
+
 static int vcn_v2_5_start(struct amdgpu_device *adev)
 {
 	struct amdgpu_ring *ring;
@@ -610,6 +891,9 @@ static int vcn_v2_5_start(struct amdgpu_device *adev)
 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
 		if (adev->vcn.harvest_config & (1 << i))
 			continue;
+		if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+			return vcn_v2_5_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
+
 		/* disable register anti-hang mechanism */
 		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_POWER_STATUS), 0,
 			~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
@@ -973,6 +1257,35 @@ static int vcn_v2_5_sriov_start(struct amdgpu_device *adev)
 	return vcn_v2_5_mmsch_start(adev, &adev->virt.mm_table);
 }
 
+static int vcn_v2_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
+{
+	int ret_code = 0;
+	uint32_t tmp;
+
+	/* Wait for power status to be 1 */
+	SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 1,
+		UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+
+	/* wait for read ptr to be equal to write ptr */
+	tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR);
+	SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
+
+	tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR2);
+	SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF, ret_code);
+
+	tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF;
+	SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
+
+	SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 1,
+		UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+
+	/* disable dynamic power gating mode */
+	WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, mmUVD_POWER_STATUS), 0,
+			~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+
+	return 0;
+}
+
 static int vcn_v2_5_stop(struct amdgpu_device *adev)
 {
 	uint32_t tmp;
@@ -981,6 +1294,12 @@ static int vcn_v2_5_stop(struct amdgpu_device *adev)
 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
 		if (adev->vcn.harvest_config & (1 << i))
 			continue;
+
+		if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+			r = vcn_v2_5_stop_dpg_mode(adev, i);
+			goto power_off;
+		}
+
 		/* wait for vcn idle */
 		SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7, r);
 		if (r)
@@ -1030,12 +1349,74 @@ static int vcn_v2_5_stop(struct amdgpu_device *adev)
 			~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
 	}
 
+power_off:
 	if (adev->pm.dpm_enabled)
 		amdgpu_dpm_enable_uvd(adev, false);
 
 	return 0;
 }
 
+static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
+				int inst_idx, struct dpg_pause_state *new_state)
+{
+	struct amdgpu_ring *ring;
+	uint32_t reg_data = 0;
+	int ret_code;
+
+	/* pause/unpause if state is changed */
+	if (adev->vcn.pause_state.fw_based != new_state->fw_based) {
+		DRM_DEBUG("dpg pause state changed %d -> %d",
+			adev->vcn.pause_state.fw_based,	new_state->fw_based);
+		reg_data = RREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE) &
+			(~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+
+		if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
+			ret_code = 0;
+			SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 0x1,
+				UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+
+			if (!ret_code) {
+				/* pause DPG */
+				reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+				WREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE, reg_data);
+
+				/* wait for ACK */
+				SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_DPG_PAUSE,
+					   UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
+					   UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
+
+				/* Restore */
+				ring = &adev->vcn.inst[inst_idx].ring_enc[0];
+				WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr);
+				WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+				WREG32_SOC15(UVD, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4);
+				WREG32_SOC15(UVD, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+				WREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+
+				ring = &adev->vcn.inst[inst_idx].ring_enc[1];
+				WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+				WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+				WREG32_SOC15(UVD, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4);
+				WREG32_SOC15(UVD, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+				WREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+
+				WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR,
+					   RREG32_SOC15(UVD, inst_idx, mmUVD_SCRATCH2) & 0x7FFFFFFF);
+
+				SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS,
+					   0x0, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+			}
+		} else {
+			/* unpause dpg, no need to wait */
+			reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+			WREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE, reg_data);
+		}
+		adev->vcn.pause_state.fw_based = new_state->fw_based;
+	}
+
+	return 0;
+}
+
 /**
  * vcn_v2_5_dec_ring_get_rptr - get read pointer
  *
@@ -1078,6 +1459,10 @@ static void vcn_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
 
+	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+		WREG32_SOC15(UVD, ring->me, mmUVD_SCRATCH2,
+			lower_32_bits(ring->wptr) | 0x80000000);
+
 	if (ring->use_doorbell) {
 		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index e4f4201b3c34..78b35901643b 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -689,40 +689,6 @@ static int vi_gpu_pci_config_reset(struct amdgpu_device *adev)
 	return -EINVAL;
 }
 
-int smu7_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap)
-{
-	void *pp_handle = adev->powerplay.pp_handle;
-	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
-
-	if (!pp_funcs || !pp_funcs->get_asic_baco_capability) {
-		*cap = false;
-		return -ENOENT;
-	}
-
-	return pp_funcs->get_asic_baco_capability(pp_handle, cap);
-}
-
-int smu7_asic_baco_reset(struct amdgpu_device *adev)
-{
-	void *pp_handle = adev->powerplay.pp_handle;
-	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
-
-	if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state)
-		return -ENOENT;
-
-	/* enter BACO state */
-	if (pp_funcs->set_asic_baco_state(pp_handle, 1))
-		return -EIO;
-
-	/* exit BACO state */
-	if (pp_funcs->set_asic_baco_state(pp_handle, 0))
-		return -EIO;
-
-	dev_info(adev->dev, "GPU BACO reset\n");
-
-	return 0;
-}
-
 /**
  * vi_asic_pci_config_reset - soft reset GPU
  *
@@ -747,8 +713,6 @@ static int vi_asic_pci_config_reset(struct amdgpu_device *adev)
 
 static bool vi_asic_supports_baco(struct amdgpu_device *adev)
 {
-	bool baco_support;
-
 	switch (adev->asic_type) {
 	case CHIP_FIJI:
 	case CHIP_TONGA:
@@ -756,14 +720,10 @@ static bool vi_asic_supports_baco(struct amdgpu_device *adev)
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
 	case CHIP_TOPAZ:
-		smu7_asic_get_baco_capability(adev, &baco_support);
-		break;
+		return amdgpu_dpm_is_baco_supported(adev);
 	default:
-		baco_support = false;
-		break;
+		return false;
 	}
-
-	return baco_support;
 }
 
 static enum amd_reset_method
@@ -778,7 +738,7 @@ vi_asic_reset_method(struct amdgpu_device *adev)
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
 	case CHIP_TOPAZ:
-		smu7_asic_get_baco_capability(adev, &baco_reset);
+		baco_reset = amdgpu_dpm_is_baco_supported(adev);
 		break;
 	default:
 		baco_reset = false;
@@ -807,7 +767,7 @@ static int vi_asic_reset(struct amdgpu_device *adev)
 	if (vi_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
 		if (!adev->in_suspend)
 			amdgpu_inc_vram_lost(adev);
-		r = smu7_asic_baco_reset(adev);
+		r = amdgpu_dpm_baco_reset(adev);
 	} else {
 		r = vi_asic_pci_config_reset(adev);
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.h b/drivers/gpu/drm/amd/amdgpu/vi.h
index 40d4174913a4..defb4aaf929a 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.h
+++ b/drivers/gpu/drm/amd/amdgpu/vi.h
@@ -31,7 +31,5 @@ void vi_srbm_select(struct amdgpu_device *adev,
 int vi_set_ip_blocks(struct amdgpu_device *adev);
 
 void legacy_doorbell_index_init(struct amdgpu_device *adev);
-int smu7_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap);
-int smu7_asic_baco_reset(struct amdgpu_device *adev);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index d7eb6ac37f62..2870553a2ce0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -934,6 +934,7 @@ static void uninitialize(struct device_queue_manager *dqm)
 
 static int start_nocpsch(struct device_queue_manager *dqm)
 {
+	pr_info("SW scheduler is used");
 	init_interrupts(dqm);
 	
 	if (dqm->dev->device_info->asic_family == CHIP_HAWAII)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
index 7832ec6e480b..d1d68a51bfb8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
@@ -153,6 +153,14 @@ static int load_mqd(struct mqd_manager *mm, void *mqd,
 	return r;
 }
 
+static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
+			    uint32_t pipe_id, uint32_t queue_id,
+			    struct queue_properties *p, struct mm_struct *mms)
+{
+	return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->kgd, mqd, pipe_id,
+					      queue_id, p->doorbell_off);
+}
+
 static void update_mqd(struct mqd_manager *mm, void *mqd,
 		      struct queue_properties *q)
 {
@@ -409,7 +417,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
 		mqd->allocate_mqd = allocate_hiq_mqd;
 		mqd->init_mqd = init_mqd_hiq;
 		mqd->free_mqd = free_mqd_hiq_sdma;
-		mqd->load_mqd = load_mqd;
+		mqd->load_mqd = hiq_load_mqd_kiq;
 		mqd->update_mqd = update_mqd;
 		mqd->destroy_mqd = destroy_mqd;
 		mqd->is_occupied = is_occupied;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index aa9010995eaf..436b7f518979 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -191,6 +191,14 @@ static int load_mqd(struct mqd_manager *mm, void *mqd,
 					  wptr_shift, 0, mms);
 }
 
+static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
+			    uint32_t pipe_id, uint32_t queue_id,
+			    struct queue_properties *p, struct mm_struct *mms)
+{
+	return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->kgd, mqd, pipe_id,
+					      queue_id, p->doorbell_off);
+}
+
 static void update_mqd(struct mqd_manager *mm, void *mqd,
 		      struct queue_properties *q)
 {
@@ -449,7 +457,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
 		mqd->allocate_mqd = allocate_hiq_mqd;
 		mqd->init_mqd = init_mqd_hiq;
 		mqd->free_mqd = free_mqd_hiq_sdma;
-		mqd->load_mqd = load_mqd;
+		mqd->load_mqd = hiq_load_mqd_kiq;
 		mqd->update_mqd = update_mqd;
 		mqd->destroy_mqd = destroy_mqd;
 		mqd->is_occupied = is_occupied;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 536a153ac9a4..25b90f70aecd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -32,6 +32,7 @@
 #include <linux/mman.h>
 #include <linux/file.h>
 #include "amdgpu_amdkfd.h"
+#include "amdgpu.h"
 
 struct mm_struct;
 
@@ -1152,16 +1153,17 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
 void kfd_flush_tlb(struct kfd_process_device *pdd)
 {
 	struct kfd_dev *dev = pdd->dev;
-	const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
 
 	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
 		/* Nothing to flush until a VMID is assigned, which
 		 * only happens when the first queue is created.
 		 */
 		if (pdd->qpd.vmid)
-			f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid);
+			amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd,
+							pdd->qpd.vmid);
 	} else {
-		f2g->invalidate_tlbs(dev->kgd, pdd->process->pasid);
+		amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
+						pdd->process->pasid);
 	}
 }
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 76673c7234ed..9402374d2466 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -906,13 +906,16 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 
 	init_data.dce_environment = DCE_ENV_PRODUCTION_DRV;
 
-	/*
-	 * TODO debug why this doesn't work on Raven
-	 */
-	if (adev->flags & AMD_IS_APU &&
-	    adev->asic_type >= CHIP_CARRIZO &&
-	    adev->asic_type < CHIP_RAVEN)
+	switch (adev->asic_type) {
+	case CHIP_CARRIZO:
+	case CHIP_STONEY:
+	case CHIP_RAVEN:
+	case CHIP_RENOIR:
 		init_data.flags.gpu_vm_support = true;
+		break;
+	default:
+		break;
+	}
 
 	if (amdgpu_dc_feature_mask & DC_FBC_MASK)
 		init_data.flags.fbc_support = true;
@@ -8390,17 +8393,37 @@ static bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream)
 bool amdgpu_dm_psr_enable(struct dc_stream_state *stream)
 {
 	struct dc_link *link = stream->link;
-	struct dc_static_screen_events triggers = {0};
+	unsigned int vsync_rate_hz = 0;
+	struct dc_static_screen_params params = {0};
+	/* Calculate number of static frames before generating interrupt to
+	 * enter PSR.
+	 */
+	unsigned int frame_time_microsec = 1000000 / vsync_rate_hz;
+	// Init fail safe of 2 frames static
+	unsigned int num_frames_static = 2;
 
 	DRM_DEBUG_DRIVER("Enabling psr...\n");
 
-	triggers.cursor_update = true;
-	triggers.overlay_update = true;
-	triggers.surface_update = true;
+	vsync_rate_hz = div64_u64(div64_u64((
+			stream->timing.pix_clk_100hz * 100),
+			stream->timing.v_total),
+			stream->timing.h_total);
+
+	/* Round up
+	 * Calculate number of frames such that at least 30 ms of time has
+	 * passed.
+	 */
+	if (vsync_rate_hz != 0)
+		num_frames_static = (30000 / frame_time_microsec) + 1;
+
+	params.triggers.cursor_update = true;
+	params.triggers.overlay_update = true;
+	params.triggers.surface_update = true;
+	params.num_frames = num_frames_static;
 
-	dc_stream_set_static_screen_events(link->ctx->dc,
+	dc_stream_set_static_screen_params(link->ctx->dc,
 					   &stream, 1,
-					   &triggers);
+					   &params);
 
 	return dc_link_set_psr_allow_active(link, true, false);
 }
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 52fb207393ef..96b391e4b3e7 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -534,7 +534,7 @@ static int kbps_to_peak_pbn(int kbps)
 
 	peak_kbps *= 1006;
 	peak_kbps = div_u64(peak_kbps, 1000);
-	return (int) DIV_ROUND_UP(peak_kbps * 64, (54 * 8 * 1000));
+	return (int) DIV64_U64_ROUND_UP(peak_kbps * 64, (54 * 8 * 1000));
 }
 
 static void set_dsc_configs_from_fairness_vars(struct dsc_mst_fairness_params *params,
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
index e6c22345f0ea..a27d84ca15a5 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
@@ -705,8 +705,8 @@ static void hack_bounding_box(struct dcn_bw_internal_vars *v,
 
 unsigned int get_highest_allowed_voltage_level(uint32_t hw_internal_rev)
 {
-	/* for dali, the highest voltage level we want is 0 */
-	if (ASICREV_IS_DALI(hw_internal_rev))
+	/* for dali & pollock, the highest voltage level we want is 0 */
+	if (ASICREV_IS_POLLOCK(hw_internal_rev) || ASICREV_IS_DALI(hw_internal_rev))
 		return 0;
 
 	/* we are ok with all levels */
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
index 6d60ef822619..a78e5c74c79c 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
@@ -134,13 +134,13 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p
 
 #if defined(CONFIG_DRM_AMD_DC_DCN)
 	case FAMILY_RV:
-		if (ASICREV_IS_DALI(asic_id.hw_internal_rev)) {
+		if (ASICREV_IS_DALI(asic_id.hw_internal_rev) ||
+				ASICREV_IS_POLLOCK(asic_id.hw_internal_rev)) {
 			/* TEMP: this check has to come before ASICREV_IS_RENOIR */
-			/* which also incorrectly returns true for Dali */
+			/* which also incorrectly returns true for Dali/Pollock*/
 			rv2_clk_mgr_construct(ctx, clk_mgr, pp_smu);
 			break;
 		}
-
 		if (ASICREV_IS_RENOIR(asic_id.hw_internal_rev)) {
 			rn_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg);
 			break;
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
index ffed7207c099..7ae4c06232dd 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
@@ -59,14 +59,16 @@ int rn_get_active_display_cnt_wa(
 		struct dc_state *context)
 {
 	int i, display_count;
-	bool hdmi_present = false;
+	bool tmds_present = false;
 
 	display_count = 0;
 	for (i = 0; i < context->stream_count; i++) {
 		const struct dc_stream_state *stream = context->streams[i];
 
-		if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A)
-			hdmi_present = true;
+		if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A ||
+				stream->signal == SIGNAL_TYPE_DVI_SINGLE_LINK ||
+				stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK)
+			tmds_present = true;
 	}
 
 	for (i = 0; i < dc->link_count; i++) {
@@ -85,7 +87,7 @@ int rn_get_active_display_cnt_wa(
 	}
 
 	/* WA for hang on HDMI after display off back back on*/
-	if (display_count == 0 && hdmi_present)
+	if (display_count == 0 && tmds_present)
 		display_count = 1;
 
 	return display_count;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 3d89904003f0..6c797fac189d 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -287,7 +287,6 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc,
 		struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
 
 		if (pipe->stream == stream && pipe->stream_res.tg) {
-			pipe->stream->adjust = *adjust;
 			dc->hwss.set_drr(&pipe,
 					1,
 					adjust->v_total_min,
@@ -511,10 +510,10 @@ bool dc_stream_program_csc_matrix(struct dc *dc, struct dc_stream_state *stream)
 	return ret;
 }
 
-void dc_stream_set_static_screen_events(struct dc *dc,
+void dc_stream_set_static_screen_params(struct dc *dc,
 		struct dc_stream_state **streams,
 		int num_streams,
-		const struct dc_static_screen_events *events)
+		const struct dc_static_screen_params *params)
 {
 	int i = 0;
 	int j = 0;
@@ -533,7 +532,7 @@ void dc_stream_set_static_screen_events(struct dc *dc,
 		}
 	}
 
-	dc->hwss.set_static_screen_control(pipes_affected, num_pipes_affected, events);
+	dc->hwss.set_static_screen_control(pipes_affected, num_pipes_affected, params);
 }
 
 static void dc_destruct(struct dc *dc)
@@ -1319,6 +1318,12 @@ bool dc_commit_state(struct dc *dc, struct dc_state *context)
 	return (result == DC_OK);
 }
 
+bool dc_is_hw_initialized(struct dc *dc)
+{
+	struct dc_bios *dcb = dc->ctx->dc_bios;
+	return dcb->funcs->is_accelerated_mode(dcb);
+}
+
 bool dc_post_update_surfaces_to_stream(struct dc *dc)
 {
 	int i;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index cef8c1ba9797..260c0b62d37d 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -45,6 +45,7 @@
 #include "dpcd_defs.h"
 #include "dmcu.h"
 #include "hw/clk_mgr.h"
+#include "../dce/dmub_psr.h"
 
 #define DC_LOGGER_INIT(logger)
 
@@ -817,8 +818,8 @@ static bool dc_link_detect_helper(struct dc_link *link,
 		}
 
 		case SIGNAL_TYPE_EDP: {
-			read_current_link_settings_on_detect(link);
 			detect_edp_sink_caps(link);
+			read_current_link_settings_on_detect(link);
 			sink_caps.transaction_type = DDC_TRANSACTION_TYPE_I2C_OVER_AUX;
 			sink_caps.signal = SIGNAL_TYPE_EDP;
 			break;
@@ -2404,10 +2405,11 @@ bool dc_link_set_psr_allow_active(struct dc_link *link, bool allow_active, bool
 {
 	struct dc  *dc = link->ctx->dc;
 	struct dmcu *dmcu = dc->res_pool->dmcu;
+	struct dmub_psr *psr = dc->res_pool->psr;
 
-
-
-	if ((dmcu != NULL && dmcu->funcs->is_dmcu_initialized(dmcu)) && link->psr_feature_enabled)
+	if ((psr != NULL) && link->psr_feature_enabled)
+		psr->funcs->set_psr_enable(psr, allow_active);
+	else if ((dmcu != NULL && dmcu->funcs->is_dmcu_initialized(dmcu)) && link->psr_feature_enabled)
 		dmcu->funcs->set_psr_enable(dmcu, allow_active, wait);
 
 	link->psr_allow_active = allow_active;
@@ -2419,8 +2421,11 @@ bool dc_link_get_psr_state(const struct dc_link *link, uint32_t *psr_state)
 {
 	struct dc  *dc = link->ctx->dc;
 	struct dmcu *dmcu = dc->res_pool->dmcu;
+	struct dmub_psr *psr = dc->res_pool->psr;
 
-	if (dmcu != NULL && link->psr_feature_enabled)
+	if (psr != NULL && link->psr_feature_enabled)
+		psr->funcs->get_psr_state(psr_state);
+	else if (dmcu != NULL && link->psr_feature_enabled)
 		dmcu->funcs->get_psr_state(dmcu, psr_state);
 
 	return true;
@@ -2467,6 +2472,7 @@ bool dc_link_setup_psr(struct dc_link *link,
 {
 	struct dc *dc;
 	struct dmcu *dmcu;
+	struct dmub_psr *psr;
 	int i;
 	/* updateSinkPsrDpcdConfig*/
 	union dpcd_psr_configuration psr_configuration;
@@ -2478,8 +2484,9 @@ bool dc_link_setup_psr(struct dc_link *link,
 
 	dc = link->ctx->dc;
 	dmcu = dc->res_pool->dmcu;
+	psr = dc->res_pool->psr;
 
-	if (!dmcu)
+	if (!dmcu && !psr)
 		return false;
 
 
@@ -2535,7 +2542,7 @@ bool dc_link_setup_psr(struct dc_link *link,
 		transmitter_to_phy_id(link->link_enc->transmitter);
 
 	psr_context->crtcTimingVerticalTotal = stream->timing.v_total;
-	psr_context->vsyncRateHz = div64_u64(div64_u64((stream->
+	psr_context->vsync_rate_hz = div64_u64(div64_u64((stream->
 					timing.pix_clk_100hz * 100),
 					stream->timing.v_total),
 					stream->timing.h_total);
@@ -2588,7 +2595,10 @@ bool dc_link_setup_psr(struct dc_link *link,
 	 */
 	psr_context->frame_delay = 0;
 
-	link->psr_feature_enabled = dmcu->funcs->setup_psr(dmcu, link, psr_context);
+	if (psr)
+		link->psr_feature_enabled = psr->funcs->setup_psr(psr, link, psr_context);
+	else
+		link->psr_feature_enabled = dmcu->funcs->setup_psr(dmcu, link, psr_context);
 
 	/* psr_enabled == 0 indicates setup_psr did not succeed, but this
 	 * should not happen since firmware should be running at this point
@@ -2863,6 +2873,39 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx)
 
 	return DC_OK;
 }
+
+enum dc_status dc_link_reallocate_mst_payload(struct dc_link *link)
+{
+	int i;
+	struct pipe_ctx *pipe_ctx;
+
+	// Clear all of MST payload then reallocate
+	for (i = 0; i < MAX_PIPES; i++) {
+		pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i];
+		if (pipe_ctx && pipe_ctx->stream && pipe_ctx->stream->link == link &&
+				pipe_ctx->stream->dpms_off == false &&
+				pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
+			deallocate_mst_payload(pipe_ctx);
+		}
+	}
+
+	for (i = 0; i < MAX_PIPES; i++) {
+		pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i];
+		if (pipe_ctx && pipe_ctx->stream && pipe_ctx->stream->link == link &&
+				pipe_ctx->stream->dpms_off == false &&
+				pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
+			/* enable/disable PHY will clear connection between BE and FE
+			 * need to restore it.
+			 */
+			link->link_enc->funcs->connect_dig_be_to_fe(link->link_enc,
+									pipe_ctx->stream_res.stream_enc->id, true);
+			dc_link_allocate_mst_payload(pipe_ctx);
+		}
+	}
+
+	return DC_OK;
+}
+
 #if defined(CONFIG_DRM_AMD_DC_HDCP)
 static void update_psp_stream_config(struct pipe_ctx *pipe_ctx, bool dpms_off)
 {
@@ -3361,3 +3404,10 @@ const struct dc_link_settings *dc_link_get_link_cap(
 		return &link->preferred_link_setting;
 	return &link->verified_link_cap;
 }
+
+void dc_link_overwrite_extended_receiver_cap(
+		struct dc_link *link)
+{
+	dp_overwrite_extended_receiver_cap(link);
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index 38b0f4347383..6ab298c65247 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -1217,24 +1217,33 @@ static void configure_lttpr_mode(struct dc_link *link)
 	uint8_t repeater_cnt;
 	uint32_t aux_interval_address;
 	uint8_t repeater_id;
+	enum dc_status result = DC_ERROR_UNEXPECTED;
 	uint8_t repeater_mode = DP_PHY_REPEATER_MODE_TRANSPARENT;
 
 	DC_LOG_HW_LINK_TRAINING("%s\n Set LTTPR to Transparent Mode\n", __func__);
-	core_link_write_dpcd(link,
+	result = core_link_write_dpcd(link,
 			DP_PHY_REPEATER_MODE,
 			(uint8_t *)&repeater_mode,
 			sizeof(repeater_mode));
 
+	if (result == DC_OK) {
+		link->dpcd_caps.lttpr_caps.mode = repeater_mode;
+	}
+
 	if (!link->is_lttpr_mode_transparent) {
 
 		DC_LOG_HW_LINK_TRAINING("%s\n Set LTTPR to Non Transparent Mode\n", __func__);
 
 		repeater_mode = DP_PHY_REPEATER_MODE_NON_TRANSPARENT;
-		core_link_write_dpcd(link,
+		result = core_link_write_dpcd(link,
 				DP_PHY_REPEATER_MODE,
 				(uint8_t *)&repeater_mode,
 				sizeof(repeater_mode));
 
+		if (result == DC_OK) {
+			link->dpcd_caps.lttpr_caps.mode = repeater_mode;
+		}
+
 		repeater_cnt = convert_to_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
 		for (repeater_id = repeater_cnt; repeater_id > 0; repeater_id--) {
 			aux_interval_address = DP_TRAINING_AUX_RD_INTERVAL_PHY_REPEATER1 +
@@ -1883,6 +1892,16 @@ bool dp_verify_link_cap(
 	/* disable PHY done possible by BIOS, will be done by driver itself */
 	dp_disable_link_phy(link, link->connector_signal);
 
+	/* Temporary Renoir-specific workaround for SWDEV-215184;
+	 * PHY will sometimes be in bad state on hotplugging display from certain USB-C dongle,
+	 * so add extra cycle of enabling and disabling the PHY before first link training.
+	 */
+	if (link->link_enc->features.flags.bits.DP_IS_USB_C &&
+			link->dc->debug.usbc_combo_phy_reset_wa) {
+		dp_enable_link_phy(link, link->connector_signal, dp_cs_id, cur);
+		dp_disable_link_phy(link, link->connector_signal);
+	}
+
 	dp_cs_id = get_clock_source_id(link);
 
 	/* link training starts with the maximum common settings
@@ -2876,18 +2895,14 @@ bool dc_link_handle_hpd_rx_irq(struct dc_link *link, union hpd_irq_data *out_hpd
 			return false;
 
 		previous_link_settings = link->cur_link_settings;
-		dp_disable_link_phy(link, pipe_ctx->stream->signal);
 
 		perform_link_training_with_retries(&previous_link_settings,
 			true, LINK_TRAINING_ATTEMPTS,
 			pipe_ctx,
 			pipe_ctx->stream->signal);
 
-		if (pipe_ctx && pipe_ctx->stream && pipe_ctx->stream->link == link &&
-				pipe_ctx->stream->dpms_off == false &&
-				pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
-			dc_link_allocate_mst_payload(pipe_ctx);
-		}
+		if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
+			dc_link_reallocate_mst_payload(link);
 
 		status = false;
 		if (out_link_loss)
@@ -3269,7 +3284,7 @@ static bool retrieve_link_cap(struct dc_link *link)
 			dpcd_data[DP_TRAINING_AUX_RD_INTERVAL];
 
 		link->dpcd_caps.ext_receiver_cap_field_present =
-				aux_rd_interval.bits.EXT_RECEIVER_CAP_FIELD_PRESENT == 1 ? true:false;
+				aux_rd_interval.bits.EXT_RECEIVER_CAP_FIELD_PRESENT == 1;
 
 		if (aux_rd_interval.bits.EXT_RECEIVER_CAP_FIELD_PRESENT == 1) {
 			uint8_t ext_cap_data[16];
@@ -3428,6 +3443,68 @@ static bool retrieve_link_cap(struct dc_link *link)
 	return true;
 }
 
+bool dp_overwrite_extended_receiver_cap(struct dc_link *link)
+{
+	uint8_t dpcd_data[16];
+	uint32_t read_dpcd_retry_cnt = 3;
+	enum dc_status status = DC_ERROR_UNEXPECTED;
+	union dp_downstream_port_present ds_port = { 0 };
+	union down_stream_port_count down_strm_port_count;
+	union edp_configuration_cap edp_config_cap;
+
+	int i;
+
+	for (i = 0; i < read_dpcd_retry_cnt; i++) {
+		status = core_link_read_dpcd(
+				link,
+				DP_DPCD_REV,
+				dpcd_data,
+				sizeof(dpcd_data));
+		if (status == DC_OK)
+			break;
+	}
+
+	link->dpcd_caps.dpcd_rev.raw =
+		dpcd_data[DP_DPCD_REV - DP_DPCD_REV];
+
+	if (dpcd_data[DP_MAX_LANE_COUNT - DP_DPCD_REV] == 0)
+		return false;
+
+	ds_port.byte = dpcd_data[DP_DOWNSTREAMPORT_PRESENT -
+			DP_DPCD_REV];
+
+	get_active_converter_info(ds_port.byte, link);
+
+	down_strm_port_count.raw = dpcd_data[DP_DOWN_STREAM_PORT_COUNT -
+			DP_DPCD_REV];
+
+	link->dpcd_caps.allow_invalid_MSA_timing_param =
+		down_strm_port_count.bits.IGNORE_MSA_TIMING_PARAM;
+
+	link->dpcd_caps.max_ln_count.raw = dpcd_data[
+		DP_MAX_LANE_COUNT - DP_DPCD_REV];
+
+	link->dpcd_caps.max_down_spread.raw = dpcd_data[
+		DP_MAX_DOWNSPREAD - DP_DPCD_REV];
+
+	link->reported_link_cap.lane_count =
+		link->dpcd_caps.max_ln_count.bits.MAX_LANE_COUNT;
+	link->reported_link_cap.link_rate = dpcd_data[
+		DP_MAX_LINK_RATE - DP_DPCD_REV];
+	link->reported_link_cap.link_spread =
+		link->dpcd_caps.max_down_spread.bits.MAX_DOWN_SPREAD ?
+		LINK_SPREAD_05_DOWNSPREAD_30KHZ : LINK_SPREAD_DISABLED;
+
+	edp_config_cap.raw = dpcd_data[
+		DP_EDP_CONFIGURATION_CAP - DP_DPCD_REV];
+	link->dpcd_caps.panel_mode_edp =
+		edp_config_cap.bits.ALT_SCRAMBLER_RESET;
+	link->dpcd_caps.dpcd_display_control_capable =
+		edp_config_cap.bits.DPCD_DISPLAY_CONTROL_CAPABLE;
+
+	return true;
+}
+
 bool detect_dp_sink_caps(struct dc_link *link)
 {
 	return retrieve_link_cap(link);
@@ -3603,6 +3680,7 @@ static void set_crtc_test_pattern(struct dc_link *link,
 			struct pipe_ctx *odm_pipe;
 			enum controller_dp_color_space controller_color_space;
 			int opp_cnt = 1;
+			uint8_t count = 0;
 
 			switch (test_pattern_color_space) {
 			case DP_TEST_PATTERN_COLOR_SPACE_RGB:
@@ -3646,6 +3724,12 @@ static void set_crtc_test_pattern(struct dc_link *link,
 				NULL,
 				width,
 				height);
+			/* wait for dpg to blank pixel data with test pattern */
+			for (count = 0; count < 1000; count++)
+				if (opp->funcs->dpg_is_blanked(opp))
+					break;
+				else
+					udelay(100);
 		}
 	}
 	break;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 64a0e08fd019..a0eb9e533a61 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -2066,6 +2066,13 @@ void dc_resource_state_construct(
 	dst_ctx->clk_mgr = dc->clk_mgr;
 }
 
+
+bool dc_resource_is_dsc_encoding_supported(const struct dc *dc)
+{
+	return dc->res_pool->res_cap->num_dsc > 0;
+}
+
+
 /**
  * dc_validate_global_state() - Determine if HW can support a given state
  * Checks HW resource availability and bandwidth requirement.
@@ -2897,6 +2904,3 @@ void get_audio_check(struct audio_info *aud_modes,
 	}
 }
 
-
-
-
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 039004344dc6..3fa85a54360f 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -39,7 +39,7 @@
 #include "inc/hw/dmcu.h"
 #include "dml/display_mode_lib.h"
 
-#define DC_VER "3.2.64"
+#define DC_VER "3.2.68"
 
 #define MAX_SURFACES 3
 #define MAX_PLANES 6
@@ -157,11 +157,14 @@ struct dc_surface_dcc_cap {
 	bool const_color_support;
 };
 
-struct dc_static_screen_events {
-	bool force_trigger;
-	bool cursor_update;
-	bool surface_update;
-	bool overlay_update;
+struct dc_static_screen_params {
+	struct {
+		bool force_trigger;
+		bool cursor_update;
+		bool surface_update;
+		bool overlay_update;
+	} triggers;
+	unsigned int num_frames;
 };
 
 
@@ -420,6 +423,8 @@ struct dc_debug_options {
 	bool nv12_iflip_vm_wa;
 	bool disable_dram_clock_change_vactive_support;
 	bool validate_dml_output;
+	bool enable_dmcub_surface_flip;
+	bool usbc_combo_phy_reset_wa;
 };
 
 struct dc_debug_data {
@@ -910,6 +915,8 @@ void dc_resource_state_copy_construct_current(
 
 void dc_resource_state_destruct(struct dc_state *context);
 
+bool dc_resource_is_dsc_encoding_supported(const struct dc *dc);
+
 /*
  * TODO update to make it about validation sets
  * Set up streams and links associated to drive sinks
@@ -1067,6 +1074,7 @@ unsigned int dc_get_current_backlight_pwm(struct dc *dc);
 unsigned int dc_get_target_backlight_pwm(struct dc *dc);
 
 bool dc_is_dmcu_initialized(struct dc *dc);
+bool dc_is_hw_initialized(struct dc *dc);
 
 enum dc_status dc_set_clock(struct dc *dc, enum dc_clock_type clock_type, uint32_t clk_khz, uint32_t stepping);
 void dc_get_clock(struct dc *dc, enum dc_clock_type clock_type, struct dc_clock_config *clock_cfg);
diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c
index 02a63e9cb62f..737048d8a96c 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c
@@ -552,6 +552,36 @@ uint32_t generic_read_indirect_reg(const struct dc_context *ctx,
 	return value;
 }
 
+uint32_t generic_indirect_reg_get(const struct dc_context *ctx,
+		uint32_t addr_index, uint32_t addr_data,
+		uint32_t index, int n,
+		uint8_t shift1, uint32_t mask1, uint32_t *field_value1,
+		...)
+{
+	uint32_t shift, mask, *field_value;
+	uint32_t value = 0;
+	int i = 1;
+
+	va_list ap;
+
+	va_start(ap, field_value1);
+
+	value = generic_read_indirect_reg(ctx, addr_index, addr_data, index);
+	*field_value1 = get_reg_field_value_ex(value, mask1, shift1);
+
+	while (i < n) {
+		shift = va_arg(ap, uint32_t);
+		mask = va_arg(ap, uint32_t);
+		field_value = va_arg(ap, uint32_t *);
+
+		*field_value = get_reg_field_value_ex(value, mask, shift);
+		i++;
+	}
+
+	va_end(ap);
+
+	return value;
+}
 
 uint32_t generic_indirect_reg_update_ex(const struct dc_context *ctx,
 		uint32_t addr_index, uint32_t addr_data,
diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h
index f420aeac7fbd..d25603128394 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_link.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_link.h
@@ -205,6 +205,7 @@ enum dc_detect_reason {
 bool dc_link_detect(struct dc_link *dc_link, enum dc_detect_reason reason);
 bool dc_link_get_hpd_state(struct dc_link *dc_link);
 enum dc_status dc_link_allocate_mst_payload(struct pipe_ctx *pipe_ctx);
+enum dc_status dc_link_reallocate_mst_payload(struct dc_link *link);
 
 /* Notify DC about DP RX Interrupt (aka Short Pulse Interrupt).
  * Return:
@@ -301,6 +302,9 @@ uint32_t dc_link_bandwidth_kbps(
 const struct dc_link_settings *dc_link_get_link_cap(
 		const struct dc_link *link);
 
+void dc_link_overwrite_extended_receiver_cap(
+		struct dc_link *link);
+
 bool dc_submit_i2c(
 		struct dc *dc,
 		uint32_t link_index,
diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h
index 37c10dbf269e..92096de79dec 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_stream.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h
@@ -439,10 +439,10 @@ bool dc_stream_get_crc(struct dc *dc,
 		       uint32_t *g_y,
 		       uint32_t *b_cb);
 
-void dc_stream_set_static_screen_events(struct dc *dc,
+void dc_stream_set_static_screen_params(struct dc *dc,
 					struct dc_stream_state **stream,
 					int num_streams,
-					const struct dc_static_screen_events *events);
+					const struct dc_static_screen_params *params);
 
 void dc_stream_set_dyn_expansion(struct dc *dc, struct dc_stream_state *stream,
 		enum dc_dynamic_expansion option);
diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h
index b1a372c8df83..e59532d98cb4 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
@@ -229,6 +229,7 @@ struct dc_panel_patch {
 	unsigned int extra_t12_ms;
 	unsigned int extra_delay_backlight_off;
 	unsigned int extra_t7_ms;
+	unsigned int manage_secondary_link;
 };
 
 struct dc_edid_caps {
@@ -728,7 +729,7 @@ struct psr_context {
 	/* The VSync rate in Hz used to calculate the
 	 * step size for smooth brightness feature
 	 */
-	unsigned int vsyncRateHz;
+	unsigned int vsync_rate_hz;
 	unsigned int skipPsrWaitForPllLock;
 	unsigned int numberOfControllers;
 	/* Unused, for future use. To indicate that first changed frame from
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c
index e619e67e6b51..30d953acd016 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c
@@ -537,9 +537,6 @@ static void dcn10_dmcu_set_psr_enable(struct dmcu *dmcu, bool enable, bool wait)
 	if (dmcu->dmcu_state != DMCU_RUNNING)
 		return;
 
-	dcn10_get_dmcu_psr_state(dmcu, &psr_state);
-	if (psr_state == 0 && !enable)
-		return;
 	/* waitDMCUReadyForCmd */
 	REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 0,
 				dmcu_wait_reg_ready_interval,
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c
new file mode 100644
index 000000000000..225955ec6d39
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c
@@ -0,0 +1,220 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dmub_psr.h"
+#include "dc.h"
+#include "dc_dmub_srv.h"
+#include "../../dmub/inc/dmub_srv.h"
+#include "dmub_fw_state.h"
+#include "core_types.h"
+#include "ipp.h"
+
+#define MAX_PIPES 6
+
+/**
+ * Get PSR state from firmware.
+ */
+static void dmub_get_psr_state(uint32_t *psr_state)
+{
+	// Not yet implemented
+	// Trigger GPINT interrupt from firmware
+}
+
+/**
+ * Enable/Disable PSR.
+ */
+static void dmub_set_psr_enable(struct dmub_psr *dmub, bool enable)
+{
+	union dmub_rb_cmd cmd;
+	struct dc_context *dc = dmub->ctx;
+
+	cmd.psr_enable.header.type = DMUB_CMD__PSR;
+
+	if (enable)
+		cmd.psr_enable.header.sub_type = DMUB_CMD__PSR_ENABLE;
+	else
+		cmd.psr_enable.header.sub_type = DMUB_CMD__PSR_DISABLE;
+
+	cmd.psr_enable.header.payload_bytes = 0; // Send header only
+
+	dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd.psr_enable.header);
+	dc_dmub_srv_cmd_execute(dc->dmub_srv);
+	dc_dmub_srv_wait_idle(dc->dmub_srv);
+}
+
+/**
+ * Set PSR level.
+ */
+static void dmub_set_psr_level(struct dmub_psr *dmub, uint16_t psr_level)
+{
+	union dmub_rb_cmd cmd;
+	uint32_t psr_state = 0;
+	struct dc_context *dc = dmub->ctx;
+
+	dmub_get_psr_state(&psr_state);
+
+	if (psr_state == 0)
+		return;
+
+	cmd.psr_set_level.header.type = DMUB_CMD__PSR;
+	cmd.psr_set_level.header.sub_type = DMUB_CMD__PSR_SET_LEVEL;
+	cmd.psr_set_level.header.payload_bytes = sizeof(struct dmub_cmd_psr_set_level_data);
+	cmd.psr_set_level.psr_set_level_data.psr_level = psr_level;
+
+	dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd.psr_set_level.header);
+	dc_dmub_srv_cmd_execute(dc->dmub_srv);
+	dc_dmub_srv_wait_idle(dc->dmub_srv);
+}
+
+/**
+ * Setup PSR by programming phy registers and sending psr hw context values to firmware.
+ */
+static bool dmub_setup_psr(struct dmub_psr *dmub,
+		struct dc_link *link,
+		struct psr_context *psr_context)
+{
+	union dmub_rb_cmd cmd;
+	struct dc_context *dc = dmub->ctx;
+	struct dmub_cmd_psr_copy_settings_data *copy_settings_data
+		= &cmd.psr_copy_settings.psr_copy_settings_data;
+	struct pipe_ctx *pipe_ctx = NULL;
+	struct resource_context *res_ctx = &link->ctx->dc->current_state->res_ctx;
+
+	for (int i = 0; i < MAX_PIPES; i++) {
+		if (res_ctx &&
+				res_ctx->pipe_ctx[i].stream &&
+				res_ctx->pipe_ctx[i].stream->link &&
+				res_ctx->pipe_ctx[i].stream->link == link &&
+				res_ctx->pipe_ctx[i].stream->link->connector_signal == SIGNAL_TYPE_EDP) {
+			pipe_ctx = &res_ctx->pipe_ctx[i];
+			break;
+		}
+	}
+
+	if (!pipe_ctx ||
+			!&pipe_ctx->plane_res ||
+			!&pipe_ctx->stream_res)
+		return false;
+
+	// Program DP DPHY fast training registers
+	link->link_enc->funcs->psr_program_dp_dphy_fast_training(link->link_enc,
+			psr_context->psrExitLinkTrainingRequired);
+
+	// Program DP_SEC_CNTL1 register to set transmission GPS0 line num and priority to high
+	link->link_enc->funcs->psr_program_secondary_packet(link->link_enc,
+			psr_context->sdpTransmitLineNumDeadline);
+
+	cmd.psr_copy_settings.header.type = DMUB_CMD__PSR;
+	cmd.psr_copy_settings.header.sub_type = DMUB_CMD__PSR_COPY_SETTINGS;
+	cmd.psr_copy_settings.header.payload_bytes = sizeof(struct dmub_cmd_psr_copy_settings_data);
+
+	// Hw insts
+	copy_settings_data->dpphy_inst				= psr_context->phyType;
+	copy_settings_data->aux_inst				= psr_context->channel;
+	copy_settings_data->digfe_inst				= psr_context->engineId;
+	copy_settings_data->digbe_inst				= psr_context->transmitterId;
+
+	copy_settings_data->mpcc_inst				= pipe_ctx->plane_res.mpcc_inst;
+
+	if (pipe_ctx->plane_res.hubp)
+		copy_settings_data->hubp_inst			= pipe_ctx->plane_res.hubp->inst;
+	else
+		copy_settings_data->hubp_inst			= 0;
+	if (pipe_ctx->plane_res.dpp)
+		copy_settings_data->dpp_inst			= pipe_ctx->plane_res.dpp->inst;
+	else
+		copy_settings_data->dpp_inst			= 0;
+	if (pipe_ctx->stream_res.opp)
+		copy_settings_data->opp_inst			= pipe_ctx->stream_res.opp->inst;
+	else
+		copy_settings_data->opp_inst			= 0;
+	if (pipe_ctx->stream_res.tg)
+		copy_settings_data->otg_inst			= pipe_ctx->stream_res.tg->inst;
+	else
+		copy_settings_data->otg_inst			= 0;
+
+	// Misc
+	copy_settings_data->psr_level				= psr_context->psr_level.u32all;
+	copy_settings_data->hyst_frames				= psr_context->timehyst_frames;
+	copy_settings_data->hyst_lines				= psr_context->hyst_lines;
+	copy_settings_data->phy_type				= psr_context->phyType;
+	copy_settings_data->aux_repeat				= psr_context->aux_repeats;
+	copy_settings_data->smu_optimizations_en	= psr_context->allow_smu_optimizations;
+	copy_settings_data->skip_wait_for_pll_lock	= psr_context->skipPsrWaitForPllLock;
+	copy_settings_data->frame_delay				= psr_context->frame_delay;
+	copy_settings_data->smu_phy_id				= psr_context->smuPhyId;
+	copy_settings_data->num_of_controllers		= psr_context->numberOfControllers;
+	copy_settings_data->frame_cap_ind			= psr_context->psrFrameCaptureIndicationReq;
+	copy_settings_data->phy_num					= psr_context->frame_delay & 0x7;
+	copy_settings_data->link_rate				= psr_context->frame_delay & 0xF;
+
+	dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd.psr_copy_settings.header);
+	dc_dmub_srv_cmd_execute(dc->dmub_srv);
+	dc_dmub_srv_wait_idle(dc->dmub_srv);
+
+	return true;
+}
+
+static const struct dmub_psr_funcs psr_funcs = {
+	.set_psr_enable			= dmub_set_psr_enable,
+	.setup_psr				= dmub_setup_psr,
+	.get_psr_state			= dmub_get_psr_state,
+	.set_psr_level			= dmub_set_psr_level,
+};
+
+/**
+ * Construct PSR object.
+ */
+static void dmub_psr_construct(struct dmub_psr *psr, struct dc_context *ctx)
+{
+	psr->ctx = ctx;
+	psr->funcs = &psr_funcs;
+}
+
+/**
+ * Allocate and initialize PSR object.
+ */
+struct dmub_psr *dmub_psr_create(struct dc_context *ctx)
+{
+	struct dmub_psr *psr = kzalloc(sizeof(struct dmub_psr), GFP_KERNEL);
+
+	if (psr == NULL) {
+		BREAK_TO_DEBUGGER();
+		return NULL;
+	}
+
+	dmub_psr_construct(psr, ctx);
+
+	return psr;
+}
+
+/**
+ * Deallocate PSR object.
+ */
+void dmub_psr_destroy(struct dmub_psr **dmub)
+{
+	kfree(dmub);
+	*dmub = NULL;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h
new file mode 100644
index 000000000000..229958de3035
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2012-16 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DMUB_PSR_H_
+#define _DMUB_PSR_H_
+
+#include "os_types.h"
+
+struct dmub_psr {
+	struct dc_context *ctx;
+	const struct dmub_psr_funcs *funcs;
+};
+
+struct dmub_psr_funcs {
+	void (*set_psr_enable)(struct dmub_psr *dmub, bool enable);
+	bool (*setup_psr)(struct dmub_psr *dmub, struct dc_link *link, struct psr_context *psr_context);
+	void (*get_psr_state)(uint32_t *psr_state);
+	void (*set_psr_level)(struct dmub_psr *dmub, uint16_t psr_level);
+};
+
+struct dmub_psr *dmub_psr_create(struct dc_context *ctx);
+void dmub_psr_destroy(struct dmub_psr **dmub);
+
+
+#endif /* _DCE_DMUB_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index 4939cf3b316f..5b689273ff44 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -1373,9 +1373,13 @@ static enum dc_status apply_single_controller_ctx_to_hw(
 	// DRR should set trigger event to monitor surface update event
 	if (stream->adjust.v_total_min != 0 && stream->adjust.v_total_max != 0)
 		event_triggers = 0x80;
+	/* Event triggers and num frames initialized for DRR, but can be
+	 * later updated for PSR use. Note DRR trigger events are generated
+	 * regardless of whether num frames met.
+	 */
 	if (pipe_ctx->stream_res.tg->funcs->set_static_screen_control)
 		pipe_ctx->stream_res.tg->funcs->set_static_screen_control(
-				pipe_ctx->stream_res.tg, event_triggers);
+				pipe_ctx->stream_res.tg, event_triggers, 2);
 
 	if (!dc_is_virtual_signal(pipe_ctx->stream->signal))
 		pipe_ctx->stream_res.stream_enc->funcs->dig_connect_to_otg(
@@ -1706,6 +1710,8 @@ static void set_drr(struct pipe_ctx **pipe_ctx,
 	struct drr_params params = {0};
 	// DRR should set trigger event to monitor surface update event
 	unsigned int event_triggers = 0x80;
+	// Note DRR trigger events are generated regardless of whether num frames met.
+	unsigned int num_frames = 2;
 
 	params.vertical_total_max = vmax;
 	params.vertical_total_min = vmin;
@@ -1721,7 +1727,7 @@ static void set_drr(struct pipe_ctx **pipe_ctx,
 		if (vmax != 0 && vmin != 0)
 			pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control(
 					pipe_ctx[i]->stream_res.tg,
-					event_triggers);
+					event_triggers, num_frames);
 	}
 }
 
@@ -1738,30 +1744,31 @@ static void get_position(struct pipe_ctx **pipe_ctx,
 }
 
 static void set_static_screen_control(struct pipe_ctx **pipe_ctx,
-		int num_pipes, const struct dc_static_screen_events *events)
+		int num_pipes, const struct dc_static_screen_params *params)
 {
 	unsigned int i;
-	unsigned int value = 0;
+	unsigned int triggers = 0;
 
-	if (events->overlay_update)
-		value |= 0x100;
-	if (events->surface_update)
-		value |= 0x80;
-	if (events->cursor_update)
-		value |= 0x2;
-	if (events->force_trigger)
-		value |= 0x1;
+	if (params->triggers.overlay_update)
+		triggers |= 0x100;
+	if (params->triggers.surface_update)
+		triggers |= 0x80;
+	if (params->triggers.cursor_update)
+		triggers |= 0x2;
+	if (params->triggers.force_trigger)
+		triggers |= 0x1;
 
 	if (num_pipes) {
 		struct dc *dc = pipe_ctx[0]->stream->ctx->dc;
 
 		if (dc->fbc_compressor)
-			value |= 0x84;
+			triggers |= 0x84;
 	}
 
 	for (i = 0; i < num_pipes; i++)
 		pipe_ctx[i]->stream_res.tg->funcs->
-			set_static_screen_control(pipe_ctx[i]->stream_res.tg, value);
+			set_static_screen_control(pipe_ctx[i]->stream_res.tg,
+					triggers, params->num_frames);
 }
 
 /*
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c
index 5f7c2c5641c4..1ea7db8eeb98 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c
@@ -469,22 +469,27 @@ void dce110_timing_generator_set_drr(
 
 void dce110_timing_generator_set_static_screen_control(
 	struct timing_generator *tg,
-	uint32_t value)
+	uint32_t event_triggers,
+	uint32_t num_frames)
 {
 	struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg);
 	uint32_t static_screen_cntl = 0;
 	uint32_t addr = 0;
 
+	// By register spec, it only takes 8 bit value
+	if (num_frames > 0xFF)
+		num_frames = 0xFF;
+
 	addr = CRTC_REG(mmCRTC_STATIC_SCREEN_CONTROL);
 	static_screen_cntl = dm_read_reg(tg->ctx, addr);
 
 	set_reg_field_value(static_screen_cntl,
-				value,
+				event_triggers,
 				CRTC_STATIC_SCREEN_CONTROL,
 				CRTC_STATIC_SCREEN_EVENT_MASK);
 
 	set_reg_field_value(static_screen_cntl,
-				2,
+				num_frames,
 				CRTC_STATIC_SCREEN_CONTROL,
 				CRTC_STATIC_SCREEN_FRAME_COUNT);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h
index 768ccf27ada9..d8a5ed7b485d 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h
@@ -231,7 +231,8 @@ void dce110_timing_generator_set_drr(
 
 void dce110_timing_generator_set_static_screen_control(
 	struct timing_generator *tg,
-	uint32_t value);
+	uint32_t event_triggers,
+	uint32_t num_frames);
 
 void dce110_timing_generator_get_crtc_scanoutpos(
 	struct timing_generator *tg,
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c
index 098e56962f2a..82bc4e192bbf 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c
+++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c
@@ -819,13 +819,18 @@ void dce120_tg_set_colors(struct timing_generator *tg,
 
 static void dce120_timing_generator_set_static_screen_control(
 	struct timing_generator *tg,
-	uint32_t value)
+	uint32_t event_triggers,
+	uint32_t num_frames)
 {
 	struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg);
 
+	// By register spec, it only takes 8 bit value
+	if (num_frames > 0xFF)
+		num_frames = 0xFF;
+
 	CRTC_REG_UPDATE_2(CRTC0_CRTC_STATIC_SCREEN_CONTROL,
-			CRTC_STATIC_SCREEN_EVENT_MASK, value,
-			CRTC_STATIC_SCREEN_FRAME_COUNT, 2);
+			CRTC_STATIC_SCREEN_EVENT_MASK, event_triggers,
+			CRTC_STATIC_SCREEN_FRAME_COUNT, num_frames);
 }
 
 void dce120_timing_generator_set_test_pattern(
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c
index 935c892622a0..4d3f7d5e1473 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c
@@ -88,26 +88,6 @@ enum dscl_mode_sel {
 	DSCL_MODE_DSCL_BYPASS = 6
 };
 
-static const struct dpp_input_csc_matrix dpp_input_csc_matrix[] = {
-	{COLOR_SPACE_SRGB,
-		{0x2000, 0, 0, 0, 0, 0x2000, 0, 0, 0, 0, 0x2000, 0} },
-	{COLOR_SPACE_SRGB_LIMITED,
-		{0x2000, 0, 0, 0, 0, 0x2000, 0, 0, 0, 0, 0x2000, 0} },
-	{COLOR_SPACE_YCBCR601,
-		{0x2cdd, 0x2000, 0, 0xe991, 0xe926, 0x2000, 0xf4fd, 0x10ef,
-						0, 0x2000, 0x38b4, 0xe3a6} },
-	{COLOR_SPACE_YCBCR601_LIMITED,
-		{0x3353, 0x2568, 0, 0xe400, 0xe5dc, 0x2568, 0xf367, 0x1108,
-						0, 0x2568, 0x40de, 0xdd3a} },
-	{COLOR_SPACE_YCBCR709,
-		{0x3265, 0x2000, 0, 0xe6ce, 0xf105, 0x2000, 0xfa01, 0xa7d, 0,
-						0x2000, 0x3b61, 0xe24f} },
-
-	{COLOR_SPACE_YCBCR709_LIMITED,
-		{0x39a6, 0x2568, 0, 0xe0d6, 0xeedd, 0x2568, 0xf925, 0x9a8, 0,
-						0x2568, 0x43ee, 0xdbb2} }
-};
-
 static void program_gamut_remap(
 		struct dcn10_dpp *dpp,
 		const uint16_t *regval,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
index a02c10e23e0d..f36a0d8cedfe 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
@@ -930,6 +930,9 @@ static bool hubbub1_get_dcc_compression_cap(struct hubbub *hubbub,
 		output->grph.rgb.max_compressed_blk_size = 64;
 		output->grph.rgb.independent_64b_blks = true;
 		break;
+	default:
+		ASSERT(false);
+		break;
 	}
 
 	output->capable = true;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
index 2baff3cd0ae5..f2127afb37b2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
@@ -1188,8 +1188,14 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state *context)
 		if (can_apply_seamless_boot &&
 			pipe_ctx->stream != NULL &&
 			pipe_ctx->stream_res.tg->funcs->is_tg_enabled(
-				pipe_ctx->stream_res.tg))
+				pipe_ctx->stream_res.tg)) {
+			// Enable double buffering for OTG_BLANK no matter if
+			// seamless boot is enabled or not to suppress global sync
+			// signals when OTG blanked. This is to prevent pipe from
+			// requesting data while in PSR.
+			tg->funcs->tg_init(tg);
 			continue;
+		}
 
 		/* Disable on the current state so the new one isn't cleared. */
 		pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i];
@@ -2704,6 +2710,8 @@ void dcn10_set_drr(struct pipe_ctx **pipe_ctx,
 	struct drr_params params = {0};
 	// DRR set trigger event mapped to OTG_TRIG_A (bit 11) for manual control flow
 	unsigned int event_triggers = 0x800;
+	// Note DRR trigger events are generated regardless of whether num frames met.
+	unsigned int num_frames = 2;
 
 	params.vertical_total_max = vmax;
 	params.vertical_total_min = vmin;
@@ -2720,7 +2728,7 @@ void dcn10_set_drr(struct pipe_ctx **pipe_ctx,
 		if (vmax != 0 && vmin != 0)
 			pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control(
 					pipe_ctx[i]->stream_res.tg,
-					event_triggers);
+					event_triggers, num_frames);
 	}
 }
 
@@ -2737,21 +2745,22 @@ void dcn10_get_position(struct pipe_ctx **pipe_ctx,
 }
 
 void dcn10_set_static_screen_control(struct pipe_ctx **pipe_ctx,
-		int num_pipes, const struct dc_static_screen_events *events)
+		int num_pipes, const struct dc_static_screen_params *params)
 {
 	unsigned int i;
-	unsigned int value = 0;
+	unsigned int triggers = 0;
 
-	if (events->surface_update)
-		value |= 0x80;
-	if (events->cursor_update)
-		value |= 0x2;
-	if (events->force_trigger)
-		value |= 0x1;
+	if (params->triggers.surface_update)
+		triggers |= 0x80;
+	if (params->triggers.cursor_update)
+		triggers |= 0x2;
+	if (params->triggers.force_trigger)
+		triggers |= 0x1;
 
 	for (i = 0; i < num_pipes; i++)
 		pipe_ctx[i]->stream_res.tg->funcs->
-			set_static_screen_control(pipe_ctx[i]->stream_res.tg, value);
+			set_static_screen_control(pipe_ctx[i]->stream_res.tg,
+					triggers, params->num_frames);
 }
 
 static void dcn10_config_stereo_parameters(
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h
index 55b8f3b2fc4e..4d20f6586bb5 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h
@@ -132,7 +132,7 @@ void dcn10_get_position(struct pipe_ctx **pipe_ctx,
 		int num_pipes,
 		struct crtc_position *position);
 void dcn10_set_static_screen_control(struct pipe_ctx **pipe_ctx,
-		int num_pipes, const struct dc_static_screen_events *events);
+		int num_pipes, const struct dc_static_screen_params *params);
 void dcn10_setup_stereo(struct pipe_ctx *pipe_ctx, struct dc *dc);
 void dce110_set_avmute(struct pipe_ctx *pipe_ctx, bool enable);
 void dcn10_log_hw_state(struct dc *dc,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
index cd7412dc42d1..a9a43b397db9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
@@ -789,21 +789,26 @@ void optc1_set_early_control(
 
 void optc1_set_static_screen_control(
 	struct timing_generator *optc,
-	uint32_t value)
+	uint32_t event_triggers,
+	uint32_t num_frames)
 {
 	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
+	// By register spec, it only takes 8 bit value
+	if (num_frames > 0xFF)
+		num_frames = 0xFF;
+
 	/* Bit 8 is no longer applicable in RV for PSR case,
 	 * set bit 8 to 0 if given
 	 */
-	if ((value & STATIC_SCREEN_EVENT_MASK_RANGETIMING_DOUBLE_BUFFER_UPDATE_EN)
+	if ((event_triggers & STATIC_SCREEN_EVENT_MASK_RANGETIMING_DOUBLE_BUFFER_UPDATE_EN)
 			!= 0)
-		value = value &
+		event_triggers = event_triggers &
 		~STATIC_SCREEN_EVENT_MASK_RANGETIMING_DOUBLE_BUFFER_UPDATE_EN;
 
 	REG_SET_2(OTG_STATIC_SCREEN_CONTROL, 0,
-			OTG_STATIC_SCREEN_EVENT_MASK, value,
-			OTG_STATIC_SCREEN_FRAME_COUNT, 2);
+			OTG_STATIC_SCREEN_EVENT_MASK, event_triggers,
+			OTG_STATIC_SCREEN_FRAME_COUNT, num_frames);
 }
 
 void optc1_setup_manual_trigger(struct timing_generator *optc)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
index 3afeb1a30f21..f277656d5464 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
@@ -625,7 +625,8 @@ void optc1_set_drr(
 
 void optc1_set_static_screen_control(
 	struct timing_generator *optc,
-	uint32_t value);
+	uint32_t event_triggers,
+	uint32_t num_frames);
 
 void optc1_program_stereo(struct timing_generator *optc,
 	const struct dc_crtc_timing *timing, struct crtc_stereo_flags *flags);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c
index 4d7e45892f08..13e057d7ee93 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c
@@ -104,7 +104,7 @@ static void dpp2_cnv_setup (
 	uint32_t pixel_format = 0;
 	uint32_t alpha_en = 1;
 	enum dc_color_space color_space = COLOR_SPACE_SRGB;
-	enum dcn10_input_csc_select select = INPUT_CSC_SELECT_BYPASS;
+	enum dcn20_input_csc_select select = DCN2_ICSC_SELECT_BYPASS;
 	bool force_disable_cursor = false;
 	struct out_csc_color_matrix tbl_entry;
 	uint32_t is_2bit = 0;
@@ -145,25 +145,25 @@ static void dpp2_cnv_setup (
 		force_disable_cursor = false;
 		pixel_format = 65;
 		color_space = COLOR_SPACE_YCBCR709;
-		select = INPUT_CSC_SELECT_ICSC;
+		select = DCN2_ICSC_SELECT_ICSC_A;
 		break;
 	case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb:
 		force_disable_cursor = true;
 		pixel_format = 64;
 		color_space = COLOR_SPACE_YCBCR709;
-		select = INPUT_CSC_SELECT_ICSC;
+		select = DCN2_ICSC_SELECT_ICSC_A;
 		break;
 	case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr:
 		force_disable_cursor = true;
 		pixel_format = 67;
 		color_space = COLOR_SPACE_YCBCR709;
-		select = INPUT_CSC_SELECT_ICSC;
+		select = DCN2_ICSC_SELECT_ICSC_A;
 		break;
 	case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb:
 		force_disable_cursor = true;
 		pixel_format = 66;
 		color_space = COLOR_SPACE_YCBCR709;
-		select = INPUT_CSC_SELECT_ICSC;
+		select = DCN2_ICSC_SELECT_ICSC_A;
 		break;
 	case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
 		pixel_format = 22;
@@ -177,7 +177,7 @@ static void dpp2_cnv_setup (
 	case SURFACE_PIXEL_FORMAT_VIDEO_AYCrCb8888:
 		pixel_format = 12;
 		color_space = COLOR_SPACE_YCBCR709;
-		select = INPUT_CSC_SELECT_ICSC;
+		select = DCN2_ICSC_SELECT_ICSC_A;
 		break;
 	case SURFACE_PIXEL_FORMAT_GRPH_RGB111110_FIX:
 		pixel_format = 112;
@@ -188,13 +188,13 @@ static void dpp2_cnv_setup (
 	case SURFACE_PIXEL_FORMAT_VIDEO_ACrYCb2101010:
 		pixel_format = 114;
 		color_space = COLOR_SPACE_YCBCR709;
-		select = INPUT_CSC_SELECT_ICSC;
+		select = DCN2_ICSC_SELECT_ICSC_A;
 		is_2bit = 1;
 		break;
 	case SURFACE_PIXEL_FORMAT_VIDEO_CrYCbA1010102:
 		pixel_format = 115;
 		color_space = COLOR_SPACE_YCBCR709;
-		select = INPUT_CSC_SELECT_ICSC;
+		select = DCN2_ICSC_SELECT_ICSC_A;
 		is_2bit = 1;
 		break;
 	case SURFACE_PIXEL_FORMAT_GRPH_RGB111110_FLOAT:
@@ -227,13 +227,13 @@ static void dpp2_cnv_setup (
 		tbl_entry.color_space = input_color_space;
 
 		if (color_space >= COLOR_SPACE_YCBCR601)
-			select = INPUT_CSC_SELECT_ICSC;
+			select = DCN2_ICSC_SELECT_ICSC_A;
 		else
-			select = INPUT_CSC_SELECT_BYPASS;
+			select = DCN2_ICSC_SELECT_BYPASS;
 
-		dpp1_program_input_csc(dpp_base, color_space, select, &tbl_entry);
+		dpp2_program_input_csc(dpp_base, color_space, select, &tbl_entry);
 	} else
-	dpp1_program_input_csc(dpp_base, color_space, select, NULL);
+	dpp2_program_input_csc(dpp_base, color_space, select, NULL);
 
 	if (force_disable_cursor) {
 		REG_UPDATE(CURSOR_CONTROL,
@@ -458,7 +458,7 @@ static struct dpp_funcs dcn20_dpp_funcs = {
 	.dpp_reset = dpp_reset,
 	.dpp_set_scaler = dpp1_dscl_set_scaler_manual_scale,
 	.dpp_get_optimal_number_of_taps = dpp1_get_optimal_number_of_taps,
-	.dpp_set_gamut_remap = dpp1_cm_set_gamut_remap,
+	.dpp_set_gamut_remap = dpp2_cm_set_gamut_remap,
 	.dpp_set_csc_adjustment = NULL,
 	.dpp_set_csc_default = NULL,
 	.dpp_program_regamma_pwl = oppn20_dummy_program_regamma_pwl,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h
index 5b03b737b1d6..27610251c57f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h
@@ -150,6 +150,16 @@
 	SRI(CM_SHAPER_RAMA_REGION_32_33, CM, id), \
 	SRI(CM_SHAPER_LUT_INDEX, CM, id)
 
+#define TF_REG_LIST_DCN20_COMMON_APPEND(id) \
+	SRI(CM_GAMUT_REMAP_B_C11_C12, CM, id),\
+	SRI(CM_GAMUT_REMAP_B_C13_C14, CM, id),\
+	SRI(CM_GAMUT_REMAP_B_C21_C22, CM, id),\
+	SRI(CM_GAMUT_REMAP_B_C23_C24, CM, id),\
+	SRI(CM_GAMUT_REMAP_B_C31_C32, CM, id),\
+	SRI(CM_GAMUT_REMAP_B_C33_C34, CM, id),\
+	SRI(CM_ICSC_B_C11_C12, CM, id), \
+	SRI(CM_ICSC_B_C33_C34, CM, id)
+
 #define TF_REG_LIST_DCN20(id) \
 	TF_REG_LIST_DCN(id), \
 	TF_REG_LIST_DCN20_COMMON(id), \
@@ -572,10 +582,29 @@
 	TF_SF(DSCL0_OBUF_MEM_PWR_CTRL, OBUF_MEM_PWR_FORCE, mask_sh),\
 	TF_SF(DSCL0_DSCL_MEM_PWR_CTRL, LUT_MEM_PWR_FORCE, mask_sh)
 
+/* DPP CM debug status register:
+ *
+ *		Status index including current ICSC, Gamut Remap Mode is 9
+ *			ICSC Mode: [4..3]
+ *			Gamut Remap Mode: [10..9]
+ */
+#define CM_TEST_DEBUG_DATA_STATUS_IDX 9
+
+#define TF_DEBUG_REG_LIST_SH_DCN20 \
+	TF_DEBUG_REG_LIST_SH_DCN10, \
+	.CM_TEST_DEBUG_DATA_ICSC_MODE = 3, \
+	.CM_TEST_DEBUG_DATA_GAMUT_REMAP_MODE = 9
+
+#define TF_DEBUG_REG_LIST_MASK_DCN20 \
+	TF_DEBUG_REG_LIST_MASK_DCN10, \
+	.CM_TEST_DEBUG_DATA_ICSC_MODE = 0x18, \
+	.CM_TEST_DEBUG_DATA_GAMUT_REMAP_MODE = 0x600
 
 #define TF_REG_FIELD_LIST_DCN2_0(type) \
 	TF_REG_FIELD_LIST(type) \
 	type CM_BLNDGAM_LUT_DATA; \
+	type CM_TEST_DEBUG_DATA_ICSC_MODE; \
+	type CM_TEST_DEBUG_DATA_GAMUT_REMAP_MODE; \
 	type FORMAT_CNV16; \
 	type CNVC_BYPASS_MSB_ALIGN; \
 	type CLAMP_POSITIVE; \
@@ -630,11 +659,22 @@ struct dcn2_dpp_mask {
 	uint32_t COLOR_KEYER_RED; \
 	uint32_t COLOR_KEYER_GREEN; \
 	uint32_t COLOR_KEYER_BLUE; \
-	uint32_t OBUF_MEM_PWR_CTRL;\
+	uint32_t OBUF_MEM_PWR_CTRL; \
 	uint32_t DSCL_MEM_PWR_CTRL
 
+#define DPP_DCN2_REG_VARIABLE_LIST_CM_APPEND \
+	uint32_t CM_GAMUT_REMAP_B_C11_C12; \
+	uint32_t CM_GAMUT_REMAP_B_C13_C14; \
+	uint32_t CM_GAMUT_REMAP_B_C21_C22; \
+	uint32_t CM_GAMUT_REMAP_B_C23_C24; \
+	uint32_t CM_GAMUT_REMAP_B_C31_C32; \
+	uint32_t CM_GAMUT_REMAP_B_C33_C34; \
+	uint32_t CM_ICSC_B_C11_C12; \
+	uint32_t CM_ICSC_B_C33_C34
+
 struct dcn2_dpp_registers {
 	DPP_DCN2_REG_VARIABLE_LIST;
+	DPP_DCN2_REG_VARIABLE_LIST_CM_APPEND;
 };
 
 struct dcn20_dpp {
@@ -656,6 +696,18 @@ struct dcn20_dpp {
 	struct pwl_params pwl_data;
 };
 
+enum dcn20_input_csc_select {
+	DCN2_ICSC_SELECT_BYPASS = 0,
+	DCN2_ICSC_SELECT_ICSC_A = 1,
+	DCN2_ICSC_SELECT_ICSC_B = 2
+};
+
+enum dcn20_gamut_remap_select {
+	DCN2_GAMUT_REMAP_BYPASS = 0,
+	DCN2_GAMUT_REMAP_COEF_A = 1,
+	DCN2_GAMUT_REMAP_COEF_B = 2
+};
+
 void dpp20_read_state(struct dpp *dpp_base,
 		struct dcn_dpp_state *s);
 
@@ -667,6 +719,16 @@ void dpp2_set_degamma(
 		struct dpp *dpp_base,
 		enum ipp_degamma_mode mode);
 
+void dpp2_cm_set_gamut_remap(
+	struct dpp *dpp_base,
+	const struct dpp_grph_csc_adjustment *adjust);
+
+void dpp2_program_input_csc(
+		struct dpp *dpp_base,
+		enum dc_color_space color_space,
+		enum dcn20_input_csc_select input_select,
+		const struct out_csc_color_matrix *tbl_entry);
+
 bool dpp20_program_blnd_lut(
 	struct dpp *dpp_base, const struct pwl_params *params);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp_cm.c
index 05a3e7f97ef0..8dc3d1f73984 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp_cm.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp_cm.c
@@ -36,6 +36,9 @@
 #define REG(reg)\
 	dpp->tf_regs->reg
 
+#define IND_REG(index) \
+	(index)
+
 #define CTX \
 	dpp->base.ctx
 
@@ -44,9 +47,6 @@
 	dpp->tf_shift->field_name, dpp->tf_mask->field_name
 
 
-
-
-
 static void dpp2_enable_cm_block(
 		struct dpp *dpp_base)
 {
@@ -158,6 +158,155 @@ void dpp2_set_degamma(
 	}
 }
 
+static void program_gamut_remap(
+		struct dcn20_dpp *dpp,
+		const uint16_t *regval,
+		enum dcn20_gamut_remap_select select)
+{
+	uint32_t cur_select = 0;
+	struct color_matrices_reg gam_regs;
+
+	if (regval == NULL || select == DCN2_GAMUT_REMAP_BYPASS) {
+		REG_SET(CM_GAMUT_REMAP_CONTROL, 0,
+				CM_GAMUT_REMAP_MODE, 0);
+		return;
+	}
+
+	/* determine which gamut_remap coefficients (A or B) we are using
+	 * currently. select the alternate set to double buffer
+	 * the update so gamut_remap is updated on frame boundary
+	 */
+	IX_REG_GET(CM_TEST_DEBUG_INDEX, CM_TEST_DEBUG_DATA,
+					CM_TEST_DEBUG_DATA_STATUS_IDX,
+					CM_TEST_DEBUG_DATA_GAMUT_REMAP_MODE, &cur_select);
+
+	/* value stored in dbg reg will be 1 greater than mode we want */
+	if (cur_select != DCN2_GAMUT_REMAP_COEF_A)
+		select = DCN2_GAMUT_REMAP_COEF_A;
+	else
+		select = DCN2_GAMUT_REMAP_COEF_B;
+
+	gam_regs.shifts.csc_c11 = dpp->tf_shift->CM_GAMUT_REMAP_C11;
+	gam_regs.masks.csc_c11  = dpp->tf_mask->CM_GAMUT_REMAP_C11;
+	gam_regs.shifts.csc_c12 = dpp->tf_shift->CM_GAMUT_REMAP_C12;
+	gam_regs.masks.csc_c12 = dpp->tf_mask->CM_GAMUT_REMAP_C12;
+
+	if (select == DCN2_GAMUT_REMAP_COEF_A) {
+		gam_regs.csc_c11_c12 = REG(CM_GAMUT_REMAP_C11_C12);
+		gam_regs.csc_c33_c34 = REG(CM_GAMUT_REMAP_C33_C34);
+	} else {
+		gam_regs.csc_c11_c12 = REG(CM_GAMUT_REMAP_B_C11_C12);
+		gam_regs.csc_c33_c34 = REG(CM_GAMUT_REMAP_B_C33_C34);
+	}
+
+	cm_helper_program_color_matrices(
+				dpp->base.ctx,
+				regval,
+				&gam_regs);
+
+	REG_SET(
+			CM_GAMUT_REMAP_CONTROL, 0,
+			CM_GAMUT_REMAP_MODE, select);
+
+}
+
+void dpp2_cm_set_gamut_remap(
+	struct dpp *dpp_base,
+	const struct dpp_grph_csc_adjustment *adjust)
+{
+	struct dcn20_dpp *dpp = TO_DCN20_DPP(dpp_base);
+	int i = 0;
+
+	if (adjust->gamut_adjust_type != GRAPHICS_GAMUT_ADJUST_TYPE_SW)
+		/* Bypass if type is bypass or hw */
+		program_gamut_remap(dpp, NULL, DCN2_GAMUT_REMAP_BYPASS);
+	else {
+		struct fixed31_32 arr_matrix[12];
+		uint16_t arr_reg_val[12];
+
+		for (i = 0; i < 12; i++)
+			arr_matrix[i] = adjust->temperature_matrix[i];
+
+		convert_float_matrix(
+			arr_reg_val, arr_matrix, 12);
+
+		program_gamut_remap(dpp, arr_reg_val, DCN2_GAMUT_REMAP_COEF_A);
+	}
+}
+
+void dpp2_program_input_csc(
+		struct dpp *dpp_base,
+		enum dc_color_space color_space,
+		enum dcn20_input_csc_select input_select,
+		const struct out_csc_color_matrix *tbl_entry)
+{
+	struct dcn20_dpp *dpp = TO_DCN20_DPP(dpp_base);
+	int i;
+	int arr_size = sizeof(dpp_input_csc_matrix)/sizeof(struct dpp_input_csc_matrix);
+	const uint16_t *regval = NULL;
+	uint32_t cur_select = 0;
+	enum dcn20_input_csc_select select;
+	struct color_matrices_reg icsc_regs;
+
+	if (input_select == DCN2_ICSC_SELECT_BYPASS) {
+		REG_SET(CM_ICSC_CONTROL, 0, CM_ICSC_MODE, 0);
+		return;
+	}
+
+	if (tbl_entry == NULL) {
+		for (i = 0; i < arr_size; i++)
+			if (dpp_input_csc_matrix[i].color_space == color_space) {
+				regval = dpp_input_csc_matrix[i].regval;
+				break;
+			}
+
+		if (regval == NULL) {
+			BREAK_TO_DEBUGGER();
+			return;
+		}
+	} else {
+		regval = tbl_entry->regval;
+	}
+
+	/* determine which CSC coefficients (A or B) we are using
+	 * currently.  select the alternate set to double buffer
+	 * the CSC update so CSC is updated on frame boundary
+	 */
+	IX_REG_GET(CM_TEST_DEBUG_INDEX, CM_TEST_DEBUG_DATA,
+					CM_TEST_DEBUG_DATA_STATUS_IDX,
+					CM_TEST_DEBUG_DATA_ICSC_MODE, &cur_select);
+
+	if (cur_select != DCN2_ICSC_SELECT_ICSC_A)
+		select = DCN2_ICSC_SELECT_ICSC_A;
+	else
+		select = DCN2_ICSC_SELECT_ICSC_B;
+
+	icsc_regs.shifts.csc_c11 = dpp->tf_shift->CM_ICSC_C11;
+	icsc_regs.masks.csc_c11  = dpp->tf_mask->CM_ICSC_C11;
+	icsc_regs.shifts.csc_c12 = dpp->tf_shift->CM_ICSC_C12;
+	icsc_regs.masks.csc_c12 = dpp->tf_mask->CM_ICSC_C12;
+
+	if (select == DCN2_ICSC_SELECT_ICSC_A) {
+
+		icsc_regs.csc_c11_c12 = REG(CM_ICSC_C11_C12);
+		icsc_regs.csc_c33_c34 = REG(CM_ICSC_C33_C34);
+
+	} else {
+
+		icsc_regs.csc_c11_c12 = REG(CM_ICSC_B_C11_C12);
+		icsc_regs.csc_c33_c34 = REG(CM_ICSC_B_C33_C34);
+
+	}
+
+	cm_helper_program_color_matrices(
+			dpp->base.ctx,
+			regval,
+			&icsc_regs);
+
+	REG_SET(CM_ICSC_CONTROL, 0,
+				CM_ICSC_MODE, select);
+}
+
 static void dpp20_power_on_blnd_lut(
 	struct dpp *dpp_base,
 	bool power_on)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c
index 8b8438566101..9235f7d29454 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c
@@ -293,6 +293,9 @@ bool hubbub2_get_dcc_compression_cap(struct hubbub *hubbub,
 		output->grph.rgb.max_compressed_blk_size = 64;
 		output->grph.rgb.independent_64b_blks = true;
 		break;
+	default:
+		ASSERT(false);
+		break;
 	}
 	output->capable = true;
 	output->const_color_support = true;
@@ -601,7 +604,8 @@ static const struct hubbub_funcs hubbub2_funcs = {
 	.wm_read_state = hubbub2_wm_read_state,
 	.get_dchub_ref_freq = hubbub2_get_dchub_ref_freq,
 	.program_watermarks = hubbub2_program_watermarks,
-	.allow_self_refresh_control = hubbub1_allow_self_refresh_control
+	.is_allow_self_refresh_enabled = hubbub1_is_allow_self_refresh_enabled,
+	.allow_self_refresh_control = hubbub1_allow_self_refresh_control,
 };
 
 void hubbub2_construct(struct dcn20_hubbub *hubbub,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
index 5b9cbedaa0de..cfbbaffa8654 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
@@ -686,9 +686,13 @@ enum dc_status dcn20_enable_stream_timing(
 	// DRR should set trigger event to monitor surface update event
 	if (stream->adjust.v_total_min != 0 && stream->adjust.v_total_max != 0)
 		event_triggers = 0x80;
+	/* Event triggers and num frames initialized for DRR, but can be
+	 * later updated for PSR use. Note DRR trigger events are generated
+	 * regardless of whether num frames met.
+	 */
 	if (pipe_ctx->stream_res.tg->funcs->set_static_screen_control)
 		pipe_ctx->stream_res.tg->funcs->set_static_screen_control(
-				pipe_ctx->stream_res.tg, event_triggers);
+				pipe_ctx->stream_res.tg, event_triggers, 2);
 
 	/* TODO program crtc source select for non-virtual signal*/
 	/* TODO program FMT */
@@ -941,6 +945,9 @@ void dcn20_blank_pixel_data(
 	int width = stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right;
 	int height = stream->timing.v_addressable + stream->timing.v_border_bottom + stream->timing.v_border_top;
 
+	if (stream->link->test_pattern_enabled)
+		return;
+
 	/* get opp dpg blank color */
 	color_space_to_black_color(dc, color_space, &black_color);
 
@@ -1638,9 +1645,9 @@ void dcn20_program_front_end_for_ctx(
 			struct hubp *hubp = pipe->plane_res.hubp;
 			int j = 0;
 
-			for (j = 0; j < TIMEOUT_FOR_PIPE_ENABLE_MS
+			for (j = 0; j < TIMEOUT_FOR_PIPE_ENABLE_MS*1000
 					&& hubp->funcs->hubp_is_flip_pending(hubp); j++)
-				msleep(1);
+				mdelay(1);
 		}
 	}
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c
index f90031ed58a6..de9c857ab3e9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c
@@ -33,6 +33,9 @@
 #define REG(reg)\
 	mpc20->mpc_regs->reg
 
+#define IND_REG(index) \
+	(index)
+
 #define CTX \
 	mpc20->base.ctx
 
@@ -132,19 +135,33 @@ void mpc2_set_output_csc(
 		const uint16_t *regval,
 		enum mpc_output_csc_mode ocsc_mode)
 {
+	uint32_t cur_mode;
 	struct dcn20_mpc *mpc20 = TO_DCN20_MPC(mpc);
 	struct color_matrices_reg ocsc_regs;
 
-	REG_SET(CSC_MODE[opp_id], 0, MPC_OCSC_MODE, ocsc_mode);
-
-	if (ocsc_mode == MPC_OUTPUT_CSC_DISABLE)
+	if (ocsc_mode == MPC_OUTPUT_CSC_DISABLE) {
+		REG_SET(CSC_MODE[opp_id], 0, MPC_OCSC_MODE, ocsc_mode);
 		return;
+	}
 
 	if (regval == NULL) {
 		BREAK_TO_DEBUGGER();
 		return;
 	}
 
+	/* determine which CSC coefficients (A or B) we are using
+	 * currently.  select the alternate set to double buffer
+	 * the CSC update so CSC is updated on frame boundary
+	 */
+	IX_REG_GET(MPC_OCSC_TEST_DEBUG_INDEX, MPC_OCSC_TEST_DEBUG_DATA,
+						MPC_OCSC_TEST_DEBUG_DATA_STATUS_IDX,
+						MPC_OCSC_TEST_DEBUG_DATA_OCSC_MODE, &cur_mode);
+
+	if (cur_mode != MPC_OUTPUT_CSC_COEF_A)
+		ocsc_mode = MPC_OUTPUT_CSC_COEF_A;
+	else
+		ocsc_mode = MPC_OUTPUT_CSC_COEF_B;
+
 	ocsc_regs.shifts.csc_c11 = mpc20->mpc_shift->MPC_OCSC_C11_A;
 	ocsc_regs.masks.csc_c11  = mpc20->mpc_mask->MPC_OCSC_C11_A;
 	ocsc_regs.shifts.csc_c12 = mpc20->mpc_shift->MPC_OCSC_C12_A;
@@ -157,10 +174,13 @@ void mpc2_set_output_csc(
 		ocsc_regs.csc_c11_c12 = REG(CSC_C11_C12_B[opp_id]);
 		ocsc_regs.csc_c33_c34 = REG(CSC_C33_C34_B[opp_id]);
 	}
+
 	cm_helper_program_color_matrices(
 			mpc20->base.ctx,
 			regval,
 			&ocsc_regs);
+
+	REG_SET(CSC_MODE[opp_id], 0, MPC_OCSC_MODE, ocsc_mode);
 }
 
 void mpc2_set_ocsc_default(
@@ -169,14 +189,16 @@ void mpc2_set_ocsc_default(
 		enum dc_color_space color_space,
 		enum mpc_output_csc_mode ocsc_mode)
 {
+	uint32_t cur_mode;
 	struct dcn20_mpc *mpc20 = TO_DCN20_MPC(mpc);
 	uint32_t arr_size;
 	struct color_matrices_reg ocsc_regs;
 	const uint16_t *regval = NULL;
 
-	REG_SET(CSC_MODE[opp_id], 0, MPC_OCSC_MODE, ocsc_mode);
-	if (ocsc_mode == MPC_OUTPUT_CSC_DISABLE)
+	if (ocsc_mode == MPC_OUTPUT_CSC_DISABLE) {
+		REG_SET(CSC_MODE[opp_id], 0, MPC_OCSC_MODE, ocsc_mode);
 		return;
+	}
 
 	regval = find_color_matrix(color_space, &arr_size);
 
@@ -185,6 +207,19 @@ void mpc2_set_ocsc_default(
 		return;
 	}
 
+	/* determine which CSC coefficients (A or B) we are using
+	 * currently.  select the alternate set to double buffer
+	 * the CSC update so CSC is updated on frame boundary
+	 */
+	IX_REG_GET(MPC_OCSC_TEST_DEBUG_INDEX, MPC_OCSC_TEST_DEBUG_DATA,
+						MPC_OCSC_TEST_DEBUG_DATA_STATUS_IDX,
+						MPC_OCSC_TEST_DEBUG_DATA_OCSC_MODE, &cur_mode);
+
+	if (cur_mode != MPC_OUTPUT_CSC_COEF_A)
+		ocsc_mode = MPC_OUTPUT_CSC_COEF_A;
+	else
+		ocsc_mode = MPC_OUTPUT_CSC_COEF_B;
+
 	ocsc_regs.shifts.csc_c11 = mpc20->mpc_shift->MPC_OCSC_C11_A;
 	ocsc_regs.masks.csc_c11  = mpc20->mpc_mask->MPC_OCSC_C11_A;
 	ocsc_regs.shifts.csc_c12 = mpc20->mpc_shift->MPC_OCSC_C12_A;
@@ -203,6 +238,8 @@ void mpc2_set_ocsc_default(
 			mpc20->base.ctx,
 			regval,
 			&ocsc_regs);
+
+	REG_SET(CSC_MODE[opp_id], 0, MPC_OCSC_MODE, ocsc_mode);
 }
 
 static void mpc2_ogam_get_reg_field(
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h
index 9f53192da2dc..c78fd5123497 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h
@@ -80,6 +80,10 @@
 	SRII(DENORM_CLAMP_G_Y, MPC_OUT, inst),\
 	SRII(DENORM_CLAMP_B_CB, MPC_OUT, inst)
 
+#define MPC_DBG_REG_LIST_DCN2_0() \
+	SR(MPC_OCSC_TEST_DEBUG_DATA),\
+	SR(MPC_OCSC_TEST_DEBUG_INDEX)
+
 #define MPC_REG_VARIABLE_LIST_DCN2_0 \
 	MPC_COMMON_REG_VARIABLE_LIST \
 	uint32_t MPCC_TOP_GAIN[MAX_MPCC]; \
@@ -118,6 +122,8 @@
 	uint32_t MPCC_OGAM_LUT_RAM_CONTROL[MAX_MPCC];\
 	uint32_t MPCC_OGAM_LUT_DATA[MAX_MPCC];\
 	uint32_t MPCC_OGAM_MODE[MAX_MPCC];\
+	uint32_t MPC_OCSC_TEST_DEBUG_DATA;\
+	uint32_t MPC_OCSC_TEST_DEBUG_INDEX;\
 	uint32_t CSC_MODE[MAX_OPP]; \
 	uint32_t CSC_C11_C12_A[MAX_OPP]; \
 	uint32_t CSC_C33_C34_A[MAX_OPP]; \
@@ -134,6 +140,7 @@
 	SF(MPCC0_MPCC_TOP_GAIN, MPCC_TOP_GAIN, mask_sh),\
 	SF(MPCC0_MPCC_BOT_GAIN_INSIDE, MPCC_BOT_GAIN_INSIDE, mask_sh),\
 	SF(MPCC0_MPCC_BOT_GAIN_OUTSIDE, MPCC_BOT_GAIN_OUTSIDE, mask_sh),\
+	SF(MPC_OCSC_TEST_DEBUG_INDEX, MPC_OCSC_TEST_DEBUG_INDEX, mask_sh),\
 	SF(MPC_OUT0_CSC_MODE, MPC_OCSC_MODE, mask_sh),\
 	SF(MPC_OUT0_CSC_C11_C12_A, MPC_OCSC_C11_A, mask_sh),\
 	SF(MPC_OUT0_CSC_C11_C12_A, MPC_OCSC_C12_A, mask_sh),\
@@ -174,6 +181,19 @@
 	SF(MPC_OUT0_DENORM_CLAMP_B_CB, MPC_OUT_DENORM_CLAMP_MAX_B_CB, mask_sh),\
 	SF(MPC_OUT0_DENORM_CLAMP_B_CB, MPC_OUT_DENORM_CLAMP_MIN_B_CB, mask_sh)
 
+/*
+ *	DCN2 MPC_OCSC debug status register:
+ *
+ *		Status index including current OCSC Mode is 1
+ *			OCSC Mode: [1..0]
+ */
+#define MPC_OCSC_TEST_DEBUG_DATA_STATUS_IDX 1
+
+#define MPC_DEBUG_REG_LIST_SH_DCN20 \
+	.MPC_OCSC_TEST_DEBUG_DATA_OCSC_MODE = 0
+
+#define MPC_DEBUG_REG_LIST_MASK_DCN20 \
+	.MPC_OCSC_TEST_DEBUG_DATA_OCSC_MODE = 0x3
 
 #define MPC_REG_FIELD_LIST_DCN2_0(type) \
 	MPC_REG_FIELD_LIST(type)\
@@ -182,6 +202,8 @@
 	type MPCC_TOP_GAIN;\
 	type MPCC_BOT_GAIN_INSIDE;\
 	type MPCC_BOT_GAIN_OUTSIDE;\
+	type MPC_OCSC_TEST_DEBUG_DATA_OCSC_MODE;\
+	type MPC_OCSC_TEST_DEBUG_INDEX;\
 	type MPC_OCSC_MODE;\
 	type MPC_OCSC_C11_A;\
 	type MPC_OCSC_C12_A;\
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
index 2dafa20d769d..85f90f3e24cb 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
@@ -638,6 +638,7 @@ static const struct dce110_aux_registers aux_engine_regs[] = {
 #define tf_regs(id)\
 [id] = {\
 	TF_REG_LIST_DCN20(id),\
+	TF_REG_LIST_DCN20_COMMON_APPEND(id),\
 }
 
 static const struct dcn2_dpp_registers tf_regs[] = {
@@ -651,12 +652,12 @@ static const struct dcn2_dpp_registers tf_regs[] = {
 
 static const struct dcn2_dpp_shift tf_shift = {
 		TF_REG_LIST_SH_MASK_DCN20(__SHIFT),
-		TF_DEBUG_REG_LIST_SH_DCN10
+		TF_DEBUG_REG_LIST_SH_DCN20
 };
 
 static const struct dcn2_dpp_mask tf_mask = {
 		TF_REG_LIST_SH_MASK_DCN20(_MASK),
-		TF_DEBUG_REG_LIST_MASK_DCN10
+		TF_DEBUG_REG_LIST_MASK_DCN20
 };
 
 #define dwbc_regs_dcn2(id)\
@@ -706,14 +707,17 @@ static const struct dcn20_mpc_registers mpc_regs = {
 		MPC_OUT_MUX_REG_LIST_DCN2_0(3),
 		MPC_OUT_MUX_REG_LIST_DCN2_0(4),
 		MPC_OUT_MUX_REG_LIST_DCN2_0(5),
+		MPC_DBG_REG_LIST_DCN2_0()
 };
 
 static const struct dcn20_mpc_shift mpc_shift = {
-	MPC_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT)
+	MPC_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT),
+	MPC_DEBUG_REG_LIST_SH_DCN20
 };
 
 static const struct dcn20_mpc_mask mpc_mask = {
-	MPC_COMMON_MASK_SH_LIST_DCN2_0(_MASK)
+	MPC_COMMON_MASK_SH_LIST_DCN2_0(_MASK),
+	MPC_DEBUG_REG_LIST_MASK_DCN20
 };
 
 #define tg_regs(id)\
@@ -1857,6 +1861,22 @@ void dcn20_populate_dml_writeback_from_context(
 
 }
 
+static int get_num_odm_heads(struct pipe_ctx *pipe)
+{
+	int odm_head_count = 0;
+	struct pipe_ctx *next_pipe = pipe->next_odm_pipe;
+	while (next_pipe) {
+		odm_head_count++;
+		next_pipe = next_pipe->next_odm_pipe;
+	}
+	pipe = pipe->prev_odm_pipe;
+	while (pipe) {
+		odm_head_count++;
+		pipe = pipe->prev_odm_pipe;
+	}
+	return odm_head_count ? odm_head_count + 1 : 0;
+}
+
 int dcn20_populate_dml_pipes_from_context(
 		struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes)
 {
@@ -1883,17 +1903,21 @@ int dcn20_populate_dml_pipes_from_context(
 	for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
 		struct dc_crtc_timing *timing = &res_ctx->pipe_ctx[i].stream->timing;
 		unsigned int v_total;
+		unsigned int front_porch;
 		int output_bpc;
 
 		if (!res_ctx->pipe_ctx[i].stream)
 			continue;
 
 		v_total = timing->v_total;
+		front_porch = timing->v_front_porch;
 		/* todo:
 		pipes[pipe_cnt].pipe.src.dynamic_metadata_enable = 0;
 		pipes[pipe_cnt].pipe.src.dcc = 0;
 		pipes[pipe_cnt].pipe.src.vm = 0;*/
 
+		pipes[pipe_cnt].clks_cfg.refclk_mhz = dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000.0;
+
 		pipes[pipe_cnt].dout.dsc_enable = res_ctx->pipe_ctx[i].stream->timing.flags.DSC;
 		/* todo: rotation?*/
 		pipes[pipe_cnt].dout.dsc_slices = res_ctx->pipe_ctx[i].stream->timing.dsc_cfg.num_slices_h;
@@ -1915,7 +1939,7 @@ int dcn20_populate_dml_pipes_from_context(
 				- timing->h_addressable
 				- timing->h_border_left
 				- timing->h_border_right;
-		pipes[pipe_cnt].pipe.dest.vblank_start = v_total - timing->v_front_porch;
+		pipes[pipe_cnt].pipe.dest.vblank_start = v_total - front_porch;
 		pipes[pipe_cnt].pipe.dest.vblank_end = pipes[pipe_cnt].pipe.dest.vblank_start
 				- timing->v_addressable
 				- timing->v_border_top
@@ -1932,8 +1956,13 @@ int dcn20_populate_dml_pipes_from_context(
 		pipes[pipe_cnt].dout.dp_lanes = 4;
 		pipes[pipe_cnt].pipe.dest.vtotal_min = res_ctx->pipe_ctx[i].stream->adjust.v_total_min;
 		pipes[pipe_cnt].pipe.dest.vtotal_max = res_ctx->pipe_ctx[i].stream->adjust.v_total_max;
-		pipes[pipe_cnt].pipe.dest.odm_combine = res_ctx->pipe_ctx[i].prev_odm_pipe
-							|| res_ctx->pipe_ctx[i].next_odm_pipe;
+		switch (get_num_odm_heads(&res_ctx->pipe_ctx[i])) {
+		case 2:
+			pipes[pipe_cnt].pipe.dest.odm_combine = dm_odm_combine_mode_2to1;
+			break;
+		default:
+			pipes[pipe_cnt].pipe.dest.odm_combine = dm_odm_combine_mode_disabled;
+		}
 		pipes[pipe_cnt].pipe.src.hsplit_grp = res_ctx->pipe_ctx[i].pipe_idx;
 		if (res_ctx->pipe_ctx[i].top_pipe && res_ctx->pipe_ctx[i].top_pipe->plane_state
 				== res_ctx->pipe_ctx[i].plane_state)
@@ -2043,6 +2072,9 @@ int dcn20_populate_dml_pipes_from_context(
 			if (pipes[pipe_cnt].pipe.src.viewport_height > 1080)
 				pipes[pipe_cnt].pipe.src.viewport_height = 1080;
 			pipes[pipe_cnt].pipe.src.surface_height_y = pipes[pipe_cnt].pipe.src.viewport_height;
+			pipes[pipe_cnt].pipe.src.surface_width_y = pipes[pipe_cnt].pipe.src.viewport_width;
+			pipes[pipe_cnt].pipe.src.surface_height_c = pipes[pipe_cnt].pipe.src.viewport_height;
+			pipes[pipe_cnt].pipe.src.surface_width_c = pipes[pipe_cnt].pipe.src.viewport_width;
 			pipes[pipe_cnt].pipe.src.data_pitch = ((pipes[pipe_cnt].pipe.src.viewport_width + 63) / 64) * 64; /* linear sw only */
 			pipes[pipe_cnt].pipe.src.source_format = dm_444_32;
 			pipes[pipe_cnt].pipe.dest.recout_width = pipes[pipe_cnt].pipe.src.viewport_width; /*vp_width/hratio*/
@@ -2076,7 +2108,10 @@ int dcn20_populate_dml_pipes_from_context(
 			pipes[pipe_cnt].pipe.src.viewport_width_c = scl->viewport_c.width;
 			pipes[pipe_cnt].pipe.src.viewport_height = scl->viewport.height;
 			pipes[pipe_cnt].pipe.src.viewport_height_c = scl->viewport_c.height;
+			pipes[pipe_cnt].pipe.src.surface_width_y = pln->plane_size.surface_size.width;
 			pipes[pipe_cnt].pipe.src.surface_height_y = pln->plane_size.surface_size.height;
+			pipes[pipe_cnt].pipe.src.surface_width_c = pln->plane_size.chroma_size.width;
+			pipes[pipe_cnt].pipe.src.surface_height_c = pln->plane_size.chroma_size.height;
 			if (pln->format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) {
 				pipes[pipe_cnt].pipe.src.data_pitch = pln->plane_size.surface_pitch;
 				pipes[pipe_cnt].pipe.src.data_pitch_c = pln->plane_size.chroma_pitch;
@@ -2490,7 +2525,7 @@ int dcn20_validate_apply_pipe_split_flags(
 			split[i] = true;
 		if (dc->debug.force_odm_combine & (1 << pipe->stream_res.tg->inst)) {
 			split[i] = true;
-			context->bw_ctx.dml.vba.ODMCombineEnablePerState[vlevel][pipe_idx] = true;
+			context->bw_ctx.dml.vba.ODMCombineEnablePerState[vlevel][pipe_idx] = dm_odm_combine_mode_2to1;
 		}
 		context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx] =
 			context->bw_ctx.dml.vba.ODMCombineEnablePerState[vlevel][pipe_idx];
@@ -2915,7 +2950,7 @@ bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context,
 
 	if (context->bw_ctx.dml.soc.dummy_pstate_latency_us == 0 ||
 		(voltage_supported && full_pstate_supported)) {
-		context->bw_ctx.bw.dcn.clk.p_state_change_support = true;
+		context->bw_ctx.bw.dcn.clk.p_state_change_support = full_pstate_supported;
 		goto restore_dml_state;
 	}
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c
index 216ae170bc50..da63fc53cc4a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c
@@ -29,6 +29,8 @@
 #include "dm_services.h"
 #include "reg_helper.h"
 
+#include "dc_dmub_srv.h"
+
 #define DC_LOGGER_INIT(logger)
 
 #define REG(reg)\
@@ -251,10 +253,20 @@ static void hubp21_apply_PLAT_54186_wa(
 			ROTATION_ANGLE, &rotation_angle,
 			H_MIRROR_EN, &h_mirror_en);
 
-	/* apply wa only for NV12 surface with scatter gather enabled with view port > 512 */
+	/* reset persistent cached data */
+	hubp21->PLAT_54186_wa_chroma_addr_offset = 0;
+	/* apply wa only for NV12 surface with scatter gather enabled with viewport > 512 along
+	 * the vertical direction*/
 	if (address->type != PLN_ADDR_TYPE_VIDEO_PROGRESSIVE ||
-			address->video_progressive.luma_addr.high_part == 0xf4
-			|| viewport_c_height <= 512)
+			address->video_progressive.luma_addr.high_part == 0xf4)
+		return;
+
+	if ((rotation_angle == 0 || rotation_angle == 180)
+			&& viewport_c_height <= 512)
+		return;
+
+	if ((rotation_angle == 90 || rotation_angle == 270)
+				&& viewport_c_width <= 512)
 		return;
 
 	switch (rotation_angle) {
@@ -678,123 +690,167 @@ void hubp21_validate_dml_output(struct hubp *hubp,
 				dml_dlg_attr->refcyc_per_meta_chunk_flip_l, dlg_attr.refcyc_per_meta_chunk_flip_l);
 }
 
-bool hubp21_program_surface_flip_and_addr(
-	struct hubp *hubp,
-	const struct dc_plane_address *address,
-	bool flip_immediate)
+static void program_surface_flip_and_addr(struct hubp *hubp, struct surface_flip_registers *flip_regs)
 {
 	struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp);
-	struct dc_debug_options *debug = &hubp->ctx->dc->debug;
 
-	//program flip type
-	REG_UPDATE(DCSURF_FLIP_CONTROL,
-			SURFACE_FLIP_TYPE, flip_immediate);
+	REG_UPDATE_3(DCSURF_FLIP_CONTROL,
+					SURFACE_FLIP_TYPE, flip_regs->immediate,
+					SURFACE_FLIP_MODE_FOR_STEREOSYNC, flip_regs->grph_stereo,
+					SURFACE_FLIP_IN_STEREOSYNC, flip_regs->grph_stereo);
 
-	// Program VMID reg
 	REG_UPDATE(VMID_SETTINGS_0,
-			VMID, address->vmid);
+				VMID, flip_regs->vmid);
 
-	if (address->type == PLN_ADDR_TYPE_GRPH_STEREO) {
-		REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0x1);
-		REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_IN_STEREOSYNC, 0x1);
+	REG_UPDATE_8(DCSURF_SURFACE_CONTROL,
+			PRIMARY_SURFACE_TMZ, flip_regs->tmz_surface,
+			PRIMARY_SURFACE_TMZ_C, flip_regs->tmz_surface,
+			PRIMARY_META_SURFACE_TMZ, flip_regs->tmz_surface,
+			PRIMARY_META_SURFACE_TMZ_C, flip_regs->tmz_surface,
+			SECONDARY_SURFACE_TMZ, flip_regs->tmz_surface,
+			SECONDARY_SURFACE_TMZ_C, flip_regs->tmz_surface,
+			SECONDARY_META_SURFACE_TMZ, flip_regs->tmz_surface,
+			SECONDARY_META_SURFACE_TMZ_C, flip_regs->tmz_surface);
 
-	} else {
-		// turn off stereo if not in stereo
-		REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0x0);
-		REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_IN_STEREOSYNC, 0x0);
-	}
+	REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C, 0,
+			PRIMARY_META_SURFACE_ADDRESS_HIGH_C,
+			flip_regs->DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C);
 
+	REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_C, 0,
+			PRIMARY_META_SURFACE_ADDRESS_C,
+			flip_regs->DCSURF_PRIMARY_META_SURFACE_ADDRESS_C);
 
+	REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH, 0,
+			PRIMARY_META_SURFACE_ADDRESS_HIGH,
+			flip_regs->DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH);
+
+	REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS, 0,
+			PRIMARY_META_SURFACE_ADDRESS,
+			flip_regs->DCSURF_PRIMARY_META_SURFACE_ADDRESS);
+
+	REG_SET(DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH, 0,
+			SECONDARY_META_SURFACE_ADDRESS_HIGH,
+			flip_regs->DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH);
+
+	REG_SET(DCSURF_SECONDARY_META_SURFACE_ADDRESS, 0,
+			SECONDARY_META_SURFACE_ADDRESS,
+			flip_regs->DCSURF_SECONDARY_META_SURFACE_ADDRESS);
+
+
+	REG_SET(DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH, 0,
+			SECONDARY_SURFACE_ADDRESS_HIGH,
+			flip_regs->DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH);
+
+	REG_SET(DCSURF_SECONDARY_SURFACE_ADDRESS, 0,
+			SECONDARY_SURFACE_ADDRESS,
+			flip_regs->DCSURF_SECONDARY_SURFACE_ADDRESS);
+
+
+	REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C, 0,
+			PRIMARY_SURFACE_ADDRESS_HIGH_C,
+			flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C);
+
+	REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_C, 0,
+			PRIMARY_SURFACE_ADDRESS_C,
+			flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_C);
+
+	REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, 0,
+			PRIMARY_SURFACE_ADDRESS_HIGH,
+			flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH);
+
+	REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS, 0,
+			PRIMARY_SURFACE_ADDRESS,
+			flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS);
+}
+
+void dmcub_PLAT_54186_wa(struct hubp *hubp, struct surface_flip_registers *flip_regs)
+{
+	struct dc_dmub_srv *dmcub = hubp->ctx->dmub_srv;
+	struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp);
+	struct dmub_rb_cmd_PLAT_54186_wa PLAT_54186_wa = { 0 };
+
+	PLAT_54186_wa.header.type = DMUB_CMD__PLAT_54186_WA;
+	PLAT_54186_wa.flip.DCSURF_PRIMARY_SURFACE_ADDRESS = flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS;
+	PLAT_54186_wa.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_C = flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_C;
+	PLAT_54186_wa.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH = flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH;
+	PLAT_54186_wa.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C = flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C;
+	PLAT_54186_wa.flip.flip_params.grph_stereo = flip_regs->grph_stereo;
+	PLAT_54186_wa.flip.flip_params.hubp_inst = hubp->inst;
+	PLAT_54186_wa.flip.flip_params.immediate = flip_regs->immediate;
+	PLAT_54186_wa.flip.flip_params.tmz_surface = flip_regs->tmz_surface;
+	PLAT_54186_wa.flip.flip_params.vmid = flip_regs->vmid;
+
+	PERF_TRACE();  // TODO: remove after performance is stable.
+	dc_dmub_srv_cmd_queue(dmcub, &PLAT_54186_wa.header);
+	PERF_TRACE();  // TODO: remove after performance is stable.
+	dc_dmub_srv_cmd_execute(dmcub);
+	PERF_TRACE();  // TODO: remove after performance is stable.
+	dc_dmub_srv_wait_idle(dmcub);
+	PERF_TRACE();  // TODO: remove after performance is stable.
+}
+
+bool hubp21_program_surface_flip_and_addr(
+		struct hubp *hubp,
+		const struct dc_plane_address *address,
+		bool flip_immediate)
+{
+	struct dc_debug_options *debug = &hubp->ctx->dc->debug;
+	struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp);
+	struct surface_flip_registers flip_regs = { 0 };
+
+	flip_regs.vmid = address->vmid;
 
-	/* HW automatically latch rest of address register on write to
-	 * DCSURF_PRIMARY_SURFACE_ADDRESS if SURFACE_UPDATE_LOCK is not used
-	 *
-	 * program high first and then the low addr, order matters!
-	 */
 	switch (address->type) {
 	case PLN_ADDR_TYPE_GRAPHICS:
-		/* DCN1.0 does not support const color
-		 * TODO: program DCHUBBUB_RET_PATH_DCC_CFGx_0/1
-		 * base on address->grph.dcc_const_color
-		 * x = 0, 2, 4, 6 for pipe 0, 1, 2, 3 for rgb and luma
-		 * x = 1, 3, 5, 7 for pipe 0, 1, 2, 3 for chroma
-		 */
-
-		if (address->grph.addr.quad_part == 0)
+		if (address->grph.addr.quad_part == 0) {
+			BREAK_TO_DEBUGGER();
 			break;
-
-		REG_UPDATE_2(DCSURF_SURFACE_CONTROL,
-				PRIMARY_SURFACE_TMZ, address->tmz_surface,
-				PRIMARY_META_SURFACE_TMZ, address->tmz_surface);
+		}
 
 		if (address->grph.meta_addr.quad_part != 0) {
-			REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH, 0,
-					PRIMARY_META_SURFACE_ADDRESS_HIGH,
-					address->grph.meta_addr.high_part);
-
-			REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS, 0,
-					PRIMARY_META_SURFACE_ADDRESS,
-					address->grph.meta_addr.low_part);
+			flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS =
+					address->grph.meta_addr.low_part;
+			flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH =
+					address->grph.meta_addr.high_part;
 		}
 
-		REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, 0,
-				PRIMARY_SURFACE_ADDRESS_HIGH,
-				address->grph.addr.high_part);
-
-		REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS, 0,
-				PRIMARY_SURFACE_ADDRESS,
-				address->grph.addr.low_part);
+		flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS =
+				address->grph.addr.low_part;
+		flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH =
+				address->grph.addr.high_part;
 		break;
 	case PLN_ADDR_TYPE_VIDEO_PROGRESSIVE:
 		if (address->video_progressive.luma_addr.quad_part == 0
 				|| address->video_progressive.chroma_addr.quad_part == 0)
 			break;
 
-		REG_UPDATE_4(DCSURF_SURFACE_CONTROL,
-				PRIMARY_SURFACE_TMZ, address->tmz_surface,
-				PRIMARY_SURFACE_TMZ_C, address->tmz_surface,
-				PRIMARY_META_SURFACE_TMZ, address->tmz_surface,
-				PRIMARY_META_SURFACE_TMZ_C, address->tmz_surface);
-
 		if (address->video_progressive.luma_meta_addr.quad_part != 0) {
-			REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C, 0,
-					PRIMARY_META_SURFACE_ADDRESS_HIGH_C,
-					address->video_progressive.chroma_meta_addr.high_part);
-
-			REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_C, 0,
-					PRIMARY_META_SURFACE_ADDRESS_C,
-					address->video_progressive.chroma_meta_addr.low_part);
-
-			REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH, 0,
-					PRIMARY_META_SURFACE_ADDRESS_HIGH,
-					address->video_progressive.luma_meta_addr.high_part);
-
-			REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS, 0,
-					PRIMARY_META_SURFACE_ADDRESS,
-					address->video_progressive.luma_meta_addr.low_part);
+			flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS =
+					address->video_progressive.luma_meta_addr.low_part;
+			flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH =
+					address->video_progressive.luma_meta_addr.high_part;
+
+			flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS_C =
+					address->video_progressive.chroma_meta_addr.low_part;
+			flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C =
+					address->video_progressive.chroma_meta_addr.high_part;
 		}
 
-		REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C, 0,
-				PRIMARY_SURFACE_ADDRESS_HIGH_C,
-				address->video_progressive.chroma_addr.high_part);
+		flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS =
+				address->video_progressive.luma_addr.low_part;
+		flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH =
+				address->video_progressive.luma_addr.high_part;
 
 		if (debug->nv12_iflip_vm_wa) {
-			REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_C, 0,
-					PRIMARY_SURFACE_ADDRESS_C,
-					address->video_progressive.chroma_addr.low_part + hubp21->PLAT_54186_wa_chroma_addr_offset);
-		} else {
-			REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_C, 0,
-					PRIMARY_SURFACE_ADDRESS_C,
-					address->video_progressive.chroma_addr.low_part);
-		}
+			flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS_C =
+					address->video_progressive.chroma_addr.low_part + hubp21->PLAT_54186_wa_chroma_addr_offset;
+		} else
+			flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS_C =
+					address->video_progressive.chroma_addr.low_part;
 
-		REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, 0,
-				PRIMARY_SURFACE_ADDRESS_HIGH,
-				address->video_progressive.luma_addr.high_part);
+		flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C =
+				address->video_progressive.chroma_addr.high_part;
 
-		REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS, 0,
-				PRIMARY_SURFACE_ADDRESS,
-				address->video_progressive.luma_addr.low_part);
 		break;
 	case PLN_ADDR_TYPE_GRPH_STEREO:
 		if (address->grph_stereo.left_addr.quad_part == 0)
@@ -802,58 +858,46 @@ bool hubp21_program_surface_flip_and_addr(
 		if (address->grph_stereo.right_addr.quad_part == 0)
 			break;
 
-		REG_UPDATE_8(DCSURF_SURFACE_CONTROL,
-				PRIMARY_SURFACE_TMZ, address->tmz_surface,
-				PRIMARY_SURFACE_TMZ_C, address->tmz_surface,
-				PRIMARY_META_SURFACE_TMZ, address->tmz_surface,
-				PRIMARY_META_SURFACE_TMZ_C, address->tmz_surface,
-				SECONDARY_SURFACE_TMZ, address->tmz_surface,
-				SECONDARY_SURFACE_TMZ_C, address->tmz_surface,
-				SECONDARY_META_SURFACE_TMZ, address->tmz_surface,
-				SECONDARY_META_SURFACE_TMZ_C, address->tmz_surface);
+		flip_regs.grph_stereo = true;
 
 		if (address->grph_stereo.right_meta_addr.quad_part != 0) {
-
-			REG_SET(DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH, 0,
-					SECONDARY_META_SURFACE_ADDRESS_HIGH,
-					address->grph_stereo.right_meta_addr.high_part);
-
-			REG_SET(DCSURF_SECONDARY_META_SURFACE_ADDRESS, 0,
-					SECONDARY_META_SURFACE_ADDRESS,
-					address->grph_stereo.right_meta_addr.low_part);
+			flip_regs.DCSURF_SECONDARY_META_SURFACE_ADDRESS =
+					address->grph_stereo.right_meta_addr.low_part;
+			flip_regs.DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH =
+					address->grph_stereo.right_meta_addr.high_part;
 		}
-		if (address->grph_stereo.left_meta_addr.quad_part != 0) {
-
-			REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH, 0,
-					PRIMARY_META_SURFACE_ADDRESS_HIGH,
-					address->grph_stereo.left_meta_addr.high_part);
 
-			REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS, 0,
-					PRIMARY_META_SURFACE_ADDRESS,
-					address->grph_stereo.left_meta_addr.low_part);
+		if (address->grph_stereo.left_meta_addr.quad_part != 0) {
+			flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS =
+					address->grph_stereo.left_meta_addr.low_part;
+			flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH =
+					address->grph_stereo.left_meta_addr.high_part;
 		}
 
-		REG_SET(DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH, 0,
-				SECONDARY_SURFACE_ADDRESS_HIGH,
-				address->grph_stereo.right_addr.high_part);
+		flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS =
+				address->grph_stereo.left_addr.low_part;
+		flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH =
+				address->grph_stereo.left_addr.high_part;
 
-		REG_SET(DCSURF_SECONDARY_SURFACE_ADDRESS, 0,
-				SECONDARY_SURFACE_ADDRESS,
-				address->grph_stereo.right_addr.low_part);
+		flip_regs.DCSURF_SECONDARY_SURFACE_ADDRESS =
+				address->grph_stereo.right_addr.low_part;
+		flip_regs.DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH =
+				address->grph_stereo.right_addr.high_part;
 
-		REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, 0,
-				PRIMARY_SURFACE_ADDRESS_HIGH,
-				address->grph_stereo.left_addr.high_part);
-
-		REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS, 0,
-				PRIMARY_SURFACE_ADDRESS,
-				address->grph_stereo.left_addr.low_part);
 		break;
 	default:
 		BREAK_TO_DEBUGGER();
 		break;
 	}
 
+	flip_regs.tmz_surface = address->tmz_surface;
+	flip_regs.immediate = flip_immediate;
+
+	if (hubp->ctx->dc->debug.enable_dmcub_surface_flip && address->type == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE)
+		dmcub_PLAT_54186_wa(hubp, &flip_regs);
+	else
+		program_surface_flip_and_addr(hubp, &flip_regs);
+
 	hubp->request_address = *address;
 
 	return true;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
index c76449f58064..1d741bca2211 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
@@ -83,6 +83,7 @@
 #include "dcn21_resource.h"
 #include "vm_helper.h"
 #include "dcn20/dcn20_vmid.h"
+#include "../dce/dmub_psr.h"
 
 #define SOC_BOUNDING_BOX_VALID false
 #define DC_LOGGER_INIT(logger)
@@ -465,15 +466,18 @@ static const struct dcn20_mpc_registers mpc_regs = {
 		MPC_OUT_MUX_REG_LIST_DCN2_0(0),
 		MPC_OUT_MUX_REG_LIST_DCN2_0(1),
 		MPC_OUT_MUX_REG_LIST_DCN2_0(2),
-		MPC_OUT_MUX_REG_LIST_DCN2_0(3)
+		MPC_OUT_MUX_REG_LIST_DCN2_0(3),
+		MPC_DBG_REG_LIST_DCN2_0()
 };
 
 static const struct dcn20_mpc_shift mpc_shift = {
-	MPC_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT)
+	MPC_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT),
+	MPC_DEBUG_REG_LIST_SH_DCN20
 };
 
 static const struct dcn20_mpc_mask mpc_mask = {
-	MPC_COMMON_MASK_SH_LIST_DCN2_0(_MASK)
+	MPC_COMMON_MASK_SH_LIST_DCN2_0(_MASK),
+	MPC_DEBUG_REG_LIST_MASK_DCN20
 };
 
 #define hubp_regs(id)\
@@ -608,6 +612,7 @@ static const struct dce110_aux_registers aux_engine_regs[] = {
 #define tf_regs(id)\
 [id] = {\
 	TF_REG_LIST_DCN20(id),\
+	TF_REG_LIST_DCN20_COMMON_APPEND(id),\
 }
 
 static const struct dcn2_dpp_registers tf_regs[] = {
@@ -618,11 +623,13 @@ static const struct dcn2_dpp_registers tf_regs[] = {
 };
 
 static const struct dcn2_dpp_shift tf_shift = {
-		TF_REG_LIST_SH_MASK_DCN20(__SHIFT)
+		TF_REG_LIST_SH_MASK_DCN20(__SHIFT),
+		TF_DEBUG_REG_LIST_SH_DCN20
 };
 
 static const struct dcn2_dpp_mask tf_mask = {
-		TF_REG_LIST_SH_MASK_DCN20(_MASK)
+		TF_REG_LIST_SH_MASK_DCN20(_MASK),
+		TF_DEBUG_REG_LIST_MASK_DCN20
 };
 
 #define stream_enc_regs(id)\
@@ -828,7 +835,8 @@ static const struct dc_debug_options debug_defaults_drv = {
 		.scl_reset_length10 = true,
 		.sanity_checks = true,
 		.disable_48mhz_pwrdwn = false,
-		.nv12_iflip_vm_wa = true
+		.nv12_iflip_vm_wa = true,
+		.usbc_combo_phy_reset_wa = true
 };
 
 static const struct dc_debug_options debug_defaults_diags = {
@@ -1341,6 +1349,10 @@ static void update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param
 	}
 	dcn2_1_soc.clock_limits[i] = dcn2_1_soc.clock_limits[i - 1];
 	dcn2_1_soc.num_states = i;
+
+	// diags does not retrieve proper values from SMU, do not update DML instance for diags
+	if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment) && !IS_DIAG_DC(dc->ctx->dce_environment))
+		dml_init_instance(&dc->dml, &dcn2_1_soc, &dcn2_1_ip, DML_PROJECT_DCN21);
 }
 
 /* Temporary Place holder until we can get them from fuse */
@@ -1740,6 +1752,10 @@ static bool dcn21_resource_construct(
 		goto create_fail;
 	}
 
+	// Leave as NULL to not affect current dmcu psr programming sequence
+	// Will be uncommented when functionality is confirmed to be working
+	pool->base.psr = NULL;
+
 	pool->base.abm = dce_abm_create(ctx,
 			&abm_regs,
 			&abm_shift,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c
index 3b224b155e8c..e7a8ac7a1f22 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c
@@ -937,7 +937,7 @@ static unsigned int CalculateVMAndRowBytes(
 		*MetaRowByte = 0;
 	}
 
-	if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_lvp) {
+	if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_l_vp) {
 		MacroTileSizeBytes = 256;
 		MacroTileHeight = BlockHeight256Bytes;
 	} else if (SurfaceTiling == dm_sw_4kb_s || SurfaceTiling == dm_sw_4kb_s_x
@@ -1335,11 +1335,11 @@ static void dml20_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPer
 		else
 			mode_lib->vba.SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportHeight[k];
 
-		if (mode_lib->vba.ODMCombineEnabled[k] == true)
+		if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
 			MainPlaneDoesODMCombine = true;
 		for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j)
 			if (mode_lib->vba.BlendingAndTiming[k] == j
-					&& mode_lib->vba.ODMCombineEnabled[j] == true)
+					&& mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
 				MainPlaneDoesODMCombine = true;
 
 		if (MainPlaneDoesODMCombine == true)
@@ -2848,12 +2848,12 @@ static void dml20_DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
 			SwathWidth = mode_lib->vba.ViewportHeight[k];
 		}
 
-		if (mode_lib->vba.ODMCombineEnabled[k] == true) {
+		if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
 			MainPlaneDoesODMCombine = true;
 		}
 		for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) {
 			if (mode_lib->vba.BlendingAndTiming[k] == j
-					&& mode_lib->vba.ODMCombineEnabled[j] == true) {
+					&& mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
 				MainPlaneDoesODMCombine = true;
 			}
 		}
@@ -3348,7 +3348,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 										== dm_420_10))
 				|| (((mode_lib->vba.SurfaceTiling[k] == dm_sw_gfx7_2d_thin_gl
 						|| mode_lib->vba.SurfaceTiling[k]
-								== dm_sw_gfx7_2d_thin_lvp)
+								== dm_sw_gfx7_2d_thin_l_vp)
 						&& !((mode_lib->vba.SourcePixelFormat[k]
 								== dm_444_64
 								|| mode_lib->vba.SourcePixelFormat[k]
@@ -3446,10 +3446,10 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				locals->FabricAndDRAMBandwidthPerState[i] * 1000)
 				* locals->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100;
 
-		locals->ReturnBWPerState[i] = locals->ReturnBWToDCNPerState;
+		locals->ReturnBWPerState[i][0] = locals->ReturnBWToDCNPerState;
 
 		if (locals->DCCEnabledInAnyPlane == true && locals->ReturnBWToDCNPerState > locals->DCFCLKPerState[i] * locals->ReturnBusWidth / 4) {
-			locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i],
+			locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0],
 					locals->ReturnBWToDCNPerState * 4 * (1 - locals->UrgentLatency /
 					((locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024
 					/ (locals->ReturnBWToDCNPerState - locals->DCFCLKPerState[i]
@@ -3460,7 +3460,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				+ (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024);
 
 		if (locals->DCCEnabledInAnyPlane && locals->CriticalPoint > 1 && locals->CriticalPoint < 4) {
-			locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i],
+			locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0],
 				4 * locals->ReturnBWToDCNPerState *
 				(locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024
 				* locals->ReturnBusWidth * locals->DCFCLKPerState[i] * locals->UrgentLatency /
@@ -3472,7 +3472,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				locals->DCFCLKPerState[i], locals->FabricAndDRAMBandwidthPerState[i] * 1000);
 
 		if (locals->DCCEnabledInAnyPlane == true && locals->ReturnBWToDCNPerState > locals->DCFCLKPerState[i] * locals->ReturnBusWidth / 4) {
-			locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i],
+			locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0],
 					locals->ReturnBWToDCNPerState * 4 * (1 - locals->UrgentLatency /
 					((locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024
 					/ (locals->ReturnBWToDCNPerState - locals->DCFCLKPerState[i]
@@ -3483,7 +3483,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				+ (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024);
 
 		if (locals->DCCEnabledInAnyPlane && locals->CriticalPoint > 1 && locals->CriticalPoint < 4) {
-			locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i],
+			locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0],
 				4 * locals->ReturnBWToDCNPerState *
 				(locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024
 				* locals->ReturnBusWidth * locals->DCFCLKPerState[i] * locals->UrgentLatency /
@@ -3521,12 +3521,12 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 	for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
 		locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i] =
 				(mode_lib->vba.RoundTripPingLatencyCycles + 32.0) / mode_lib->vba.DCFCLKPerState[i]
-				+ locals->UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i];
-		if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i]
+				+ locals->UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i][0];
+		if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i][0]
 				> locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i]) {
-			locals->ROBSupport[i] = true;
+			locals->ROBSupport[i][0] = true;
 		} else {
-			locals->ROBSupport[i] = false;
+			locals->ROBSupport[i][0] = false;
 		}
 	}
 	/*Writeback Mode Support Check*/
@@ -3903,7 +3903,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				}
 				if (locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity
 						&& locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k]
-						&& locals->ODMCombineEnablePerState[i][k] == false) {
+						&& locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
 					locals->NoOfDPP[i][j][k] = 1;
 					locals->RequiredDPPCLK[i][j][k] =
 						locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
@@ -3992,16 +3992,16 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 	/*Viewport Size Check*/
 
 	for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
-		locals->ViewportSizeSupport[i] = true;
+		locals->ViewportSizeSupport[i][0] = true;
 		for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
-			if (locals->ODMCombineEnablePerState[i][k] == true) {
+			if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
 				if (dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k]))
 						> locals->MaximumSwathWidth[k]) {
-					locals->ViewportSizeSupport[i] = false;
+					locals->ViewportSizeSupport[i][0] = false;
 				}
 			} else {
 				if (locals->SwathWidthYSingleDPP[k] / 2.0 > locals->MaximumSwathWidth[k]) {
-					locals->ViewportSizeSupport[i] = false;
+					locals->ViewportSizeSupport[i][0] = false;
 				}
 			}
 		}
@@ -4183,8 +4183,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 						mode_lib->vba.DSCFormatFactor = 1;
 					}
 					if (locals->RequiresDSC[i][k] == true) {
-						if (locals->ODMCombineEnablePerState[i][k]
-								== true) {
+						if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
 							if (mode_lib->vba.PixelClockBackEnd[k] / 6.0 / mode_lib->vba.DSCFormatFactor
 									> (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) {
 								locals->DSCCLKRequiredMoreThanSupported[i] =
@@ -4207,7 +4206,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 		mode_lib->vba.TotalDSCUnitsRequired = 0.0;
 		for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
 			if (locals->RequiresDSC[i][k] == true) {
-				if (locals->ODMCombineEnablePerState[i][k] == true) {
+				if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
 					mode_lib->vba.TotalDSCUnitsRequired =
 							mode_lib->vba.TotalDSCUnitsRequired + 2.0;
 				} else {
@@ -4249,7 +4248,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				mode_lib->vba.bpp = locals->OutputBppPerState[i][k];
 			}
 			if (locals->RequiresDSC[i][k] == true && mode_lib->vba.bpp != 0.0) {
-				if (locals->ODMCombineEnablePerState[i][k] == false) {
+				if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
 					locals->DSCDelayPerState[i][k] =
 							dscceComputeDelay(
 									mode_lib->vba.DSCInputBitPerComponent[k],
@@ -4292,7 +4291,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 	for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
 		for (j = 0; j < 2; j++) {
 			for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
-				if (locals->ODMCombineEnablePerState[i][k] == true)
+				if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1)
 					locals->SwathWidthYPerState[i][j][k] = dml_min(locals->SwathWidthYSingleDPP[k], dml_round(locals->HActive[k] / 2 * locals->HRatio[k]));
 				else
 					locals->SwathWidthYPerState[i][j][k] = locals->SwathWidthYSingleDPP[k] / locals->NoOfDPP[i][j][k];
@@ -4345,28 +4344,28 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 
 				locals->EffectiveDETLBLinesLuma = dml_floor(locals->LinesInDETLuma +  dml_min(
 						locals->LinesInDETLuma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETY[k] *
-						locals->PSCL_FACTOR[k] / locals->ReturnBWPerState[i],
+						locals->PSCL_FACTOR[k] / locals->ReturnBWPerState[i][0],
 						locals->EffectiveLBLatencyHidingSourceLinesLuma),
 						locals->SwathHeightYPerState[i][j][k]);
 
 				locals->EffectiveDETLBLinesChroma = dml_floor(locals->LinesInDETChroma + dml_min(
 						locals->LinesInDETChroma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETC[k] *
-						locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i],
+						locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i][0],
 						locals->EffectiveLBLatencyHidingSourceLinesChroma),
 						locals->SwathHeightCPerState[i][j][k]);
 
 				if (locals->BytePerPixelInDETC[k] == 0) {
 					locals->UrgentLatencySupportUsPerState[i][j][k] = locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k])
 							/ locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] *
-								dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k]);
+								dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k]);
 				} else {
 					locals->UrgentLatencySupportUsPerState[i][j][k] = dml_min(
 						locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k])
 						/ locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] *
-						dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k]),
+						dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k]),
 							locals->EffectiveDETLBLinesChroma * (locals->HTotal[k] / locals->PixelClock[k]) / (locals->VRatio[k] / 2) -
 							locals->EffectiveDETLBLinesChroma * locals->SwathWidthYPerState[i][j][k] / 2 *
-							dml_ceil(locals->BytePerPixelInDETC[k], 2) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k]));
+							dml_ceil(locals->BytePerPixelInDETC[k], 2) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k]));
 				}
 			}
 		}
@@ -4406,14 +4405,14 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				locals->SwathHeightYThisState[k] = locals->SwathHeightYPerState[i][j][k];
 				locals->SwathHeightCThisState[k] = locals->SwathHeightCPerState[i][j][k];
 				locals->SwathWidthYThisState[k] = locals->SwathWidthYPerState[i][j][k];
-				mode_lib->vba.ProjectedDCFCLKDeepSleep = dml_max(
-						mode_lib->vba.ProjectedDCFCLKDeepSleep,
+				mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max(
+						mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
 						mode_lib->vba.PixelClock[k] / 16.0);
 				if (mode_lib->vba.BytePerPixelInDETC[k] == 0.0) {
 					if (mode_lib->vba.VRatio[k] <= 1.0) {
-						mode_lib->vba.ProjectedDCFCLKDeepSleep =
+						mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] =
 								dml_max(
-										mode_lib->vba.ProjectedDCFCLKDeepSleep,
+										mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
 										1.1
 												* dml_ceil(
 														mode_lib->vba.BytePerPixelInDETY[k],
@@ -4423,9 +4422,9 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 												* mode_lib->vba.PixelClock[k]
 												/ mode_lib->vba.NoOfDPP[i][j][k]);
 					} else {
-						mode_lib->vba.ProjectedDCFCLKDeepSleep =
+						mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] =
 								dml_max(
-										mode_lib->vba.ProjectedDCFCLKDeepSleep,
+										mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
 										1.1
 												* dml_ceil(
 														mode_lib->vba.BytePerPixelInDETY[k],
@@ -4436,9 +4435,9 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 					}
 				} else {
 					if (mode_lib->vba.VRatio[k] <= 1.0) {
-						mode_lib->vba.ProjectedDCFCLKDeepSleep =
+						mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] =
 								dml_max(
-										mode_lib->vba.ProjectedDCFCLKDeepSleep,
+										mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
 										1.1
 												* dml_ceil(
 														mode_lib->vba.BytePerPixelInDETY[k],
@@ -4448,9 +4447,9 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 												* mode_lib->vba.PixelClock[k]
 												/ mode_lib->vba.NoOfDPP[i][j][k]);
 					} else {
-						mode_lib->vba.ProjectedDCFCLKDeepSleep =
+						mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] =
 								dml_max(
-										mode_lib->vba.ProjectedDCFCLKDeepSleep,
+										mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
 										1.1
 												* dml_ceil(
 														mode_lib->vba.BytePerPixelInDETY[k],
@@ -4460,9 +4459,9 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 												* mode_lib->vba.RequiredDPPCLK[i][j][k]);
 					}
 					if (mode_lib->vba.VRatio[k] / 2.0 <= 1.0) {
-						mode_lib->vba.ProjectedDCFCLKDeepSleep =
+						mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] =
 								dml_max(
-										mode_lib->vba.ProjectedDCFCLKDeepSleep,
+										mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
 										1.1
 												* dml_ceil(
 														mode_lib->vba.BytePerPixelInDETC[k],
@@ -4473,9 +4472,9 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 												* mode_lib->vba.PixelClock[k]
 												/ mode_lib->vba.NoOfDPP[i][j][k]);
 					} else {
-						mode_lib->vba.ProjectedDCFCLKDeepSleep =
+						mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] =
 								dml_max(
-										mode_lib->vba.ProjectedDCFCLKDeepSleep,
+										mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
 										1.1
 												* dml_ceil(
 														mode_lib->vba.BytePerPixelInDETC[k],
@@ -4511,7 +4510,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 						&mode_lib->vba.PTEBufferSizeNotExceededY[i][j][k],
 						&mode_lib->vba.dpte_row_height[k],
 						&mode_lib->vba.meta_row_height[k]);
-				mode_lib->vba.PrefetchLinesY[k] = CalculatePrefetchSourceLines(
+				mode_lib->vba.PrefetchLinesY[0][0][k] = CalculatePrefetchSourceLines(
 						mode_lib,
 						mode_lib->vba.VRatio[k],
 						mode_lib->vba.vtaps[k],
@@ -4550,7 +4549,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 							&mode_lib->vba.PTEBufferSizeNotExceededC[i][j][k],
 							&mode_lib->vba.dpte_row_height_chroma[k],
 							&mode_lib->vba.meta_row_height_chroma[k]);
-					mode_lib->vba.PrefetchLinesC[k] = CalculatePrefetchSourceLines(
+					mode_lib->vba.PrefetchLinesC[0][0][k] = CalculatePrefetchSourceLines(
 							mode_lib,
 							mode_lib->vba.VRatio[k] / 2.0,
 							mode_lib->vba.VTAPsChroma[k],
@@ -4564,14 +4563,14 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 					mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = 0.0;
 					mode_lib->vba.MetaRowBytesC = 0.0;
 					mode_lib->vba.DPTEBytesPerRowC = 0.0;
-					locals->PrefetchLinesC[k] = 0.0;
+					locals->PrefetchLinesC[0][0][k] = 0.0;
 					locals->PTEBufferSizeNotExceededC[i][j][k] = true;
 					locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma;
 				}
-				locals->PDEAndMetaPTEBytesPerFrame[k] =
+				locals->PDEAndMetaPTEBytesPerFrame[0][0][k] =
 						mode_lib->vba.PDEAndMetaPTEBytesPerFrameY + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC;
-				locals->MetaRowBytes[k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC;
-				locals->DPTEBytesPerRow[k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC;
+				locals->MetaRowBytes[0][0][k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC;
+				locals->DPTEBytesPerRow[0][0][k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC;
 
 				CalculateActiveRowBandwidth(
 						mode_lib->vba.GPUVMEnable,
@@ -4598,14 +4597,14 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 									+ mode_lib->vba.TotalNumberOfDCCActiveDPP[i][j]
 											* mode_lib->vba.MetaChunkSize)
 									* 1024.0
-									/ mode_lib->vba.ReturnBWPerState[i];
+									/ mode_lib->vba.ReturnBWPerState[i][0];
 			if (mode_lib->vba.GPUVMEnable == true) {
 				mode_lib->vba.ExtraLatency = mode_lib->vba.ExtraLatency
 						+ mode_lib->vba.TotalNumberOfActiveDPP[i][j]
 								* mode_lib->vba.PTEGroupSize
-								/ mode_lib->vba.ReturnBWPerState[i];
+								/ mode_lib->vba.ReturnBWPerState[i][0];
 			}
-			mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep;
+			mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0];
 
 			for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
 				if (mode_lib->vba.BlendingAndTiming[k] == k) {
@@ -4655,7 +4654,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 			}
 
 			for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
-				locals->MaximumVStartup[k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]
+				locals->MaximumVStartup[0][0][k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]
 					- dml_max(1.0, dml_ceil(locals->WritebackDelay[i][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1.0));
 			}
 
@@ -4700,7 +4699,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 									mode_lib->vba.RequiredDPPCLK[i][j][k],
 									mode_lib->vba.RequiredDISPCLK[i][j],
 									mode_lib->vba.PixelClock[k],
-									mode_lib->vba.ProjectedDCFCLKDeepSleep,
+									mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
 									mode_lib->vba.DSCDelayPerState[i][k],
 									mode_lib->vba.NoOfDPP[i][j][k],
 									mode_lib->vba.ScalerEnabled[k],
@@ -4718,7 +4717,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 											- mode_lib->vba.VActive[k],
 									mode_lib->vba.HTotal[k],
 									mode_lib->vba.MaxInterDCNTileRepeaters,
-									mode_lib->vba.MaximumVStartup[k],
+									mode_lib->vba.MaximumVStartup[0][0][k],
 									mode_lib->vba.GPUVMMaxPageTableLevels,
 									mode_lib->vba.GPUVMEnable,
 									mode_lib->vba.DynamicMetadataEnable[k],
@@ -4728,15 +4727,15 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 									mode_lib->vba.UrgentLatencyPixelDataOnly,
 									mode_lib->vba.ExtraLatency,
 									mode_lib->vba.TimeCalc,
-									mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k],
-									mode_lib->vba.MetaRowBytes[k],
-									mode_lib->vba.DPTEBytesPerRow[k],
-									mode_lib->vba.PrefetchLinesY[k],
+									mode_lib->vba.PDEAndMetaPTEBytesPerFrame[0][0][k],
+									mode_lib->vba.MetaRowBytes[0][0][k],
+									mode_lib->vba.DPTEBytesPerRow[0][0][k],
+									mode_lib->vba.PrefetchLinesY[0][0][k],
 									mode_lib->vba.SwathWidthYPerState[i][j][k],
 									mode_lib->vba.BytePerPixelInDETY[k],
 									mode_lib->vba.PrefillY[k],
 									mode_lib->vba.MaxNumSwY[k],
-									mode_lib->vba.PrefetchLinesC[k],
+									mode_lib->vba.PrefetchLinesC[0][0][k],
 									mode_lib->vba.BytePerPixelInDETC[k],
 									mode_lib->vba.PrefillC[k],
 									mode_lib->vba.MaxNumSwC[k],
@@ -4767,19 +4766,19 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				locals->prefetch_vm_bw_valid = true;
 				locals->prefetch_row_bw_valid = true;
 				for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
-					if (locals->PDEAndMetaPTEBytesPerFrame[k] == 0)
+					if (locals->PDEAndMetaPTEBytesPerFrame[0][0][k] == 0)
 						locals->prefetch_vm_bw[k] = 0;
 					else if (locals->LinesForMetaPTE[k] > 0)
-						locals->prefetch_vm_bw[k] = locals->PDEAndMetaPTEBytesPerFrame[k]
+						locals->prefetch_vm_bw[k] = locals->PDEAndMetaPTEBytesPerFrame[0][0][k]
 							/ (locals->LinesForMetaPTE[k] * locals->HTotal[k] / locals->PixelClock[k]);
 					else {
 						locals->prefetch_vm_bw[k] = 0;
 						locals->prefetch_vm_bw_valid = false;
 					}
-					if (locals->MetaRowBytes[k] + locals->DPTEBytesPerRow[k] == 0)
+					if (locals->MetaRowBytes[0][0][k] + locals->DPTEBytesPerRow[0][0][k] == 0)
 						locals->prefetch_row_bw[k] = 0;
 					else if (locals->LinesForMetaAndDPTERow[k] > 0)
-						locals->prefetch_row_bw[k] = (locals->MetaRowBytes[k] + locals->DPTEBytesPerRow[k])
+						locals->prefetch_row_bw[k] = (locals->MetaRowBytes[0][0][k] + locals->DPTEBytesPerRow[0][0][k])
 							/ (locals->LinesForMetaAndDPTERow[k] * locals->HTotal[k] / locals->PixelClock[k]);
 					else {
 						locals->prefetch_row_bw[k] = 0;
@@ -4798,13 +4797,13 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 											mode_lib->vba.RequiredPrefetchPixelDataBWLuma[i][j][k])
 											+ mode_lib->vba.meta_row_bw[k] + mode_lib->vba.dpte_row_bw[k]);
 				}
-				locals->BandwidthWithoutPrefetchSupported[i] = true;
-				if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i]) {
-					locals->BandwidthWithoutPrefetchSupported[i] = false;
+				locals->BandwidthWithoutPrefetchSupported[i][0] = true;
+				if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i][0]) {
+					locals->BandwidthWithoutPrefetchSupported[i][0] = false;
 				}
 
 				locals->PrefetchSupported[i][j] = true;
-				if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i]) {
+				if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i][0]) {
 					locals->PrefetchSupported[i][j] = false;
 				}
 				for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
@@ -4829,7 +4828,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 			if (mode_lib->vba.PrefetchSupported[i][j] == true
 					&& mode_lib->vba.VRatioInPrefetchSupported[i][j] == true) {
 				mode_lib->vba.BandwidthAvailableForImmediateFlip =
-						mode_lib->vba.ReturnBWPerState[i];
+						mode_lib->vba.ReturnBWPerState[i][0];
 				for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
 					mode_lib->vba.BandwidthAvailableForImmediateFlip =
 							mode_lib->vba.BandwidthAvailableForImmediateFlip
@@ -4843,9 +4842,9 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 					if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8
 							&& mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) {
 						mode_lib->vba.ImmediateFlipBytes[k] =
-								mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k]
-										+ mode_lib->vba.MetaRowBytes[k]
-										+ mode_lib->vba.DPTEBytesPerRow[k];
+								mode_lib->vba.PDEAndMetaPTEBytesPerFrame[0][0][k]
+										+ mode_lib->vba.MetaRowBytes[0][0][k]
+										+ mode_lib->vba.DPTEBytesPerRow[0][0][k];
 					}
 				}
 				mode_lib->vba.TotImmediateFlipBytes = 0.0;
@@ -4873,9 +4872,9 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 									/ mode_lib->vba.PixelClock[k],
 							mode_lib->vba.VRatio[k],
 							mode_lib->vba.Tno_bw[k],
-							mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k],
-							mode_lib->vba.MetaRowBytes[k],
-							mode_lib->vba.DPTEBytesPerRow[k],
+							mode_lib->vba.PDEAndMetaPTEBytesPerFrame[0][0][k],
+							mode_lib->vba.MetaRowBytes[0][0][k],
+							mode_lib->vba.DPTEBytesPerRow[0][0][k],
 							mode_lib->vba.DCCEnable[k],
 							mode_lib->vba.dpte_row_height[k],
 							mode_lib->vba.meta_row_height[k],
@@ -4900,7 +4899,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				}
 				mode_lib->vba.ImmediateFlipSupportedForState[i][j] = true;
 				if (mode_lib->vba.total_dcn_read_bw_with_flip
-						> mode_lib->vba.ReturnBWPerState[i]) {
+						> mode_lib->vba.ReturnBWPerState[i][0]) {
 					mode_lib->vba.ImmediateFlipSupportedForState[i][j] = false;
 				}
 				for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
@@ -4919,13 +4918,13 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 	for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; k++)
 		mode_lib->vba.MaxTotalVActiveRDBandwidth = mode_lib->vba.MaxTotalVActiveRDBandwidth + mode_lib->vba.ReadBandwidth[k];
 	for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
-		mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i] = dml_min(mode_lib->vba.ReturnBusWidth *
+		mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i][0] = dml_min(mode_lib->vba.ReturnBusWidth *
 				mode_lib->vba.DCFCLKPerState[i], mode_lib->vba.FabricAndDRAMBandwidthPerState[i] * 1000) *
 				mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100;
-		if (mode_lib->vba.MaxTotalVActiveRDBandwidth <= mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i])
-			mode_lib->vba.TotalVerticalActiveBandwidthSupport[i] = true;
+		if (mode_lib->vba.MaxTotalVActiveRDBandwidth <= mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i][0])
+			mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][0] = true;
 		else
-			mode_lib->vba.TotalVerticalActiveBandwidthSupport[i] = false;
+			mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][0] = false;
 	}
 
 	/*PTE Buffer Size Check*/
@@ -5013,7 +5012,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				status = DML_FAIL_SCALE_RATIO_TAP;
 			} else if (mode_lib->vba.SourceFormatPixelAndScanSupport != true) {
 				status = DML_FAIL_SOURCE_PIXEL_FORMAT;
-			} else if (locals->ViewportSizeSupport[i] != true) {
+			} else if (locals->ViewportSizeSupport[i][0] != true) {
 				status = DML_FAIL_VIEWPORT_SIZE;
 			} else if (locals->DIOSupport[i] != true) {
 				status = DML_FAIL_DIO_SUPPORT;
@@ -5023,7 +5022,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				status = DML_FAIL_DSC_CLK_REQUIRED;
 			} else if (locals->UrgentLatencySupport[i][j] != true) {
 				status = DML_FAIL_URGENT_LATENCY;
-			} else if (locals->ROBSupport[i] != true) {
+			} else if (locals->ROBSupport[i][0] != true) {
 				status = DML_FAIL_REORDERING_BUFFER;
 			} else if (locals->DISPCLK_DPPCLK_Support[i][j] != true) {
 				status = DML_FAIL_DISPCLK_DPPCLK;
@@ -5043,7 +5042,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				status = DML_FAIL_PITCH_SUPPORT;
 			} else if (locals->PrefetchSupported[i][j] != true) {
 				status = DML_FAIL_PREFETCH_SUPPORT;
-			} else if (locals->TotalVerticalActiveBandwidthSupport[i] != true) {
+			} else if (locals->TotalVerticalActiveBandwidthSupport[i][0] != true) {
 				status = DML_FAIL_TOTAL_V_ACTIVE_BW;
 			} else if (locals->VRatioInPrefetchSupported[i][j] != true) {
 				status = DML_FAIL_V_RATIO_PREFETCH;
@@ -5089,7 +5088,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 	mode_lib->vba.DRAMSpeed = mode_lib->vba.DRAMSpeedPerState[mode_lib->vba.VoltageLevel];
 	mode_lib->vba.FabricClock = mode_lib->vba.FabricClockPerState[mode_lib->vba.VoltageLevel];
 	mode_lib->vba.SOCCLK = mode_lib->vba.SOCCLKPerState[mode_lib->vba.VoltageLevel];
-	mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel];
+	mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel][0];
 	mode_lib->vba.FabricAndDRAMBandwidth = locals->FabricAndDRAMBandwidthPerState[mode_lib->vba.VoltageLevel];
 	for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
 		if (mode_lib->vba.BlendingAndTiming[k] == k) {
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
index 6482d7b99bae..22f3b5a4b3b9 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
@@ -997,7 +997,7 @@ static unsigned int CalculateVMAndRowBytes(
 		*MetaRowByte = 0;
 	}
 
-	if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_lvp) {
+	if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_l_vp) {
 		MacroTileSizeBytes = 256;
 		MacroTileHeight = BlockHeight256Bytes;
 	} else if (SurfaceTiling == dm_sw_4kb_s || SurfaceTiling == dm_sw_4kb_s_x
@@ -1395,11 +1395,11 @@ static void dml20v2_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndP
 		else
 			mode_lib->vba.SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportHeight[k];
 
-		if (mode_lib->vba.ODMCombineEnabled[k] == true)
+		if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
 			MainPlaneDoesODMCombine = true;
 		for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j)
 			if (mode_lib->vba.BlendingAndTiming[k] == j
-					&& mode_lib->vba.ODMCombineEnabled[j] == true)
+					&& mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
 				MainPlaneDoesODMCombine = true;
 
 		if (MainPlaneDoesODMCombine == true)
@@ -2885,12 +2885,12 @@ static void dml20v2_DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
 			SwathWidth = mode_lib->vba.ViewportHeight[k];
 		}
 
-		if (mode_lib->vba.ODMCombineEnabled[k] == true) {
+		if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
 			MainPlaneDoesODMCombine = true;
 		}
 		for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) {
 			if (mode_lib->vba.BlendingAndTiming[k] == j
-					&& mode_lib->vba.ODMCombineEnabled[j] == true) {
+					&& mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
 				MainPlaneDoesODMCombine = true;
 			}
 		}
@@ -3385,7 +3385,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 										== dm_420_10))
 				|| (((mode_lib->vba.SurfaceTiling[k] == dm_sw_gfx7_2d_thin_gl
 						|| mode_lib->vba.SurfaceTiling[k]
-								== dm_sw_gfx7_2d_thin_lvp)
+								== dm_sw_gfx7_2d_thin_l_vp)
 						&& !((mode_lib->vba.SourcePixelFormat[k]
 								== dm_444_64
 								|| mode_lib->vba.SourcePixelFormat[k]
@@ -3483,10 +3483,10 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 				locals->FabricAndDRAMBandwidthPerState[i] * 1000)
 				* locals->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100;
 
-		locals->ReturnBWPerState[i] = locals->ReturnBWToDCNPerState;
+		locals->ReturnBWPerState[i][0] = locals->ReturnBWToDCNPerState;
 
 		if (locals->DCCEnabledInAnyPlane == true && locals->ReturnBWToDCNPerState > locals->DCFCLKPerState[i] * locals->ReturnBusWidth / 4) {
-			locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i],
+			locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0],
 					locals->ReturnBWToDCNPerState * 4 * (1 - locals->UrgentLatency /
 					((locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024
 					/ (locals->ReturnBWToDCNPerState - locals->DCFCLKPerState[i]
@@ -3497,7 +3497,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 				+ (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024);
 
 		if (locals->DCCEnabledInAnyPlane && locals->CriticalPoint > 1 && locals->CriticalPoint < 4) {
-			locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i],
+			locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0],
 				4 * locals->ReturnBWToDCNPerState *
 				(locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024
 				* locals->ReturnBusWidth * locals->DCFCLKPerState[i] * locals->UrgentLatency /
@@ -3509,7 +3509,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 				locals->DCFCLKPerState[i], locals->FabricAndDRAMBandwidthPerState[i] * 1000);
 
 		if (locals->DCCEnabledInAnyPlane == true && locals->ReturnBWToDCNPerState > locals->DCFCLKPerState[i] * locals->ReturnBusWidth / 4) {
-			locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i],
+			locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0],
 					locals->ReturnBWToDCNPerState * 4 * (1 - locals->UrgentLatency /
 					((locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024
 					/ (locals->ReturnBWToDCNPerState - locals->DCFCLKPerState[i]
@@ -3520,7 +3520,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 				+ (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024);
 
 		if (locals->DCCEnabledInAnyPlane && locals->CriticalPoint > 1 && locals->CriticalPoint < 4) {
-			locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i],
+			locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0],
 				4 * locals->ReturnBWToDCNPerState *
 				(locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024
 				* locals->ReturnBusWidth * locals->DCFCLKPerState[i] * locals->UrgentLatency /
@@ -3558,12 +3558,12 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 	for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
 		locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i] =
 				(mode_lib->vba.RoundTripPingLatencyCycles + 32.0) / mode_lib->vba.DCFCLKPerState[i]
-				+ locals->UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i];
-		if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i]
+				+ locals->UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i][0];
+		if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i][0]
 				> locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i]) {
-			locals->ROBSupport[i] = true;
+			locals->ROBSupport[i][0] = true;
 		} else {
-			locals->ROBSupport[i] = false;
+			locals->ROBSupport[i][0] = false;
 		}
 	}
 	/*Writeback Mode Support Check*/
@@ -3946,7 +3946,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 				}
 				if (locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity
 						&& locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k]
-						&& locals->ODMCombineEnablePerState[i][k] == false) {
+						&& locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
 					locals->NoOfDPP[i][j][k] = 1;
 					locals->RequiredDPPCLK[i][j][k] =
 						locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
@@ -4035,16 +4035,16 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 	/*Viewport Size Check*/
 
 	for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
-		locals->ViewportSizeSupport[i] = true;
+		locals->ViewportSizeSupport[i][0] = true;
 		for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
-			if (locals->ODMCombineEnablePerState[i][k] == true) {
+			if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
 				if (dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k]))
 						> locals->MaximumSwathWidth[k]) {
-					locals->ViewportSizeSupport[i] = false;
+					locals->ViewportSizeSupport[i][0] = false;
 				}
 			} else {
 				if (locals->SwathWidthYSingleDPP[k] / 2.0 > locals->MaximumSwathWidth[k]) {
-					locals->ViewportSizeSupport[i] = false;
+					locals->ViewportSizeSupport[i][0] = false;
 				}
 			}
 		}
@@ -4226,8 +4226,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 						mode_lib->vba.DSCFormatFactor = 1;
 					}
 					if (locals->RequiresDSC[i][k] == true) {
-						if (locals->ODMCombineEnablePerState[i][k]
-								== true) {
+						if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
 							if (mode_lib->vba.PixelClockBackEnd[k] / 6.0 / mode_lib->vba.DSCFormatFactor
 									> (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) {
 								locals->DSCCLKRequiredMoreThanSupported[i] =
@@ -4250,7 +4249,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 		mode_lib->vba.TotalDSCUnitsRequired = 0.0;
 		for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
 			if (locals->RequiresDSC[i][k] == true) {
-				if (locals->ODMCombineEnablePerState[i][k] == true) {
+				if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
 					mode_lib->vba.TotalDSCUnitsRequired =
 							mode_lib->vba.TotalDSCUnitsRequired + 2.0;
 				} else {
@@ -4292,7 +4291,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 				mode_lib->vba.bpp = locals->OutputBppPerState[i][k];
 			}
 			if (locals->RequiresDSC[i][k] == true && mode_lib->vba.bpp != 0.0) {
-				if (locals->ODMCombineEnablePerState[i][k] == false) {
+				if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
 					locals->DSCDelayPerState[i][k] =
 							dscceComputeDelay(
 									mode_lib->vba.DSCInputBitPerComponent[k],
@@ -4335,7 +4334,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 	for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
 		for (j = 0; j < 2; j++) {
 			for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
-				if (locals->ODMCombineEnablePerState[i][k] == true)
+				if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1)
 					locals->SwathWidthYPerState[i][j][k] = dml_min(locals->SwathWidthYSingleDPP[k], dml_round(locals->HActive[k] / 2 * locals->HRatio[k]));
 				else
 					locals->SwathWidthYPerState[i][j][k] = locals->SwathWidthYSingleDPP[k] / locals->NoOfDPP[i][j][k];
@@ -4388,28 +4387,28 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 
 				locals->EffectiveDETLBLinesLuma = dml_floor(locals->LinesInDETLuma +  dml_min(
 						locals->LinesInDETLuma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETY[k] *
-						locals->PSCL_FACTOR[k] / locals->ReturnBWPerState[i],
+						locals->PSCL_FACTOR[k] / locals->ReturnBWPerState[i][0],
 						locals->EffectiveLBLatencyHidingSourceLinesLuma),
 						locals->SwathHeightYPerState[i][j][k]);
 
 				locals->EffectiveDETLBLinesChroma = dml_floor(locals->LinesInDETChroma + dml_min(
 						locals->LinesInDETChroma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETC[k] *
-						locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i],
+						locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i][0],
 						locals->EffectiveLBLatencyHidingSourceLinesChroma),
 						locals->SwathHeightCPerState[i][j][k]);
 
 				if (locals->BytePerPixelInDETC[k] == 0) {
 					locals->UrgentLatencySupportUsPerState[i][j][k] = locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k])
 							/ locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] *
-								dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k]);
+								dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k]);
 				} else {
 					locals->UrgentLatencySupportUsPerState[i][j][k] = dml_min(
 						locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k])
 						/ locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] *
-						dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k]),
+						dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k]),
 							locals->EffectiveDETLBLinesChroma * (locals->HTotal[k] / locals->PixelClock[k]) / (locals->VRatio[k] / 2) -
 							locals->EffectiveDETLBLinesChroma * locals->SwathWidthYPerState[i][j][k] / 2 *
-							dml_ceil(locals->BytePerPixelInDETC[k], 2) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k]));
+							dml_ceil(locals->BytePerPixelInDETC[k], 2) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k]));
 				}
 			}
 		}
@@ -4454,14 +4453,14 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 				locals->SwathHeightYThisState[k] = locals->SwathHeightYPerState[i][j][k];
 				locals->SwathHeightCThisState[k] = locals->SwathHeightCPerState[i][j][k];
 				locals->SwathWidthYThisState[k] = locals->SwathWidthYPerState[i][j][k];
-				mode_lib->vba.ProjectedDCFCLKDeepSleep = dml_max(
-						mode_lib->vba.ProjectedDCFCLKDeepSleep,
+				mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max(
+						mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
 						mode_lib->vba.PixelClock[k] / 16.0);
 				if (mode_lib->vba.BytePerPixelInDETC[k] == 0.0) {
 					if (mode_lib->vba.VRatio[k] <= 1.0) {
-						mode_lib->vba.ProjectedDCFCLKDeepSleep =
+						mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] =
 								dml_max(
-										mode_lib->vba.ProjectedDCFCLKDeepSleep,
+										mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
 										1.1
 												* dml_ceil(
 														mode_lib->vba.BytePerPixelInDETY[k],
@@ -4471,9 +4470,9 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 												* mode_lib->vba.PixelClock[k]
 												/ mode_lib->vba.NoOfDPP[i][j][k]);
 					} else {
-						mode_lib->vba.ProjectedDCFCLKDeepSleep =
+						mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] =
 								dml_max(
-										mode_lib->vba.ProjectedDCFCLKDeepSleep,
+										mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
 										1.1
 												* dml_ceil(
 														mode_lib->vba.BytePerPixelInDETY[k],
@@ -4484,9 +4483,9 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 					}
 				} else {
 					if (mode_lib->vba.VRatio[k] <= 1.0) {
-						mode_lib->vba.ProjectedDCFCLKDeepSleep =
+						mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] =
 								dml_max(
-										mode_lib->vba.ProjectedDCFCLKDeepSleep,
+										mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
 										1.1
 												* dml_ceil(
 														mode_lib->vba.BytePerPixelInDETY[k],
@@ -4496,9 +4495,9 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 												* mode_lib->vba.PixelClock[k]
 												/ mode_lib->vba.NoOfDPP[i][j][k]);
 					} else {
-						mode_lib->vba.ProjectedDCFCLKDeepSleep =
+						mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] =
 								dml_max(
-										mode_lib->vba.ProjectedDCFCLKDeepSleep,
+										mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
 										1.1
 												* dml_ceil(
 														mode_lib->vba.BytePerPixelInDETY[k],
@@ -4508,9 +4507,9 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 												* mode_lib->vba.RequiredDPPCLK[i][j][k]);
 					}
 					if (mode_lib->vba.VRatio[k] / 2.0 <= 1.0) {
-						mode_lib->vba.ProjectedDCFCLKDeepSleep =
+						mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] =
 								dml_max(
-										mode_lib->vba.ProjectedDCFCLKDeepSleep,
+										mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
 										1.1
 												* dml_ceil(
 														mode_lib->vba.BytePerPixelInDETC[k],
@@ -4521,9 +4520,9 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 												* mode_lib->vba.PixelClock[k]
 												/ mode_lib->vba.NoOfDPP[i][j][k]);
 					} else {
-						mode_lib->vba.ProjectedDCFCLKDeepSleep =
+						mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] =
 								dml_max(
-										mode_lib->vba.ProjectedDCFCLKDeepSleep,
+										mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
 										1.1
 												* dml_ceil(
 														mode_lib->vba.BytePerPixelInDETC[k],
@@ -4559,7 +4558,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 						&mode_lib->vba.PTEBufferSizeNotExceededY[i][j][k],
 						&mode_lib->vba.dpte_row_height[k],
 						&mode_lib->vba.meta_row_height[k]);
-				mode_lib->vba.PrefetchLinesY[k] = CalculatePrefetchSourceLines(
+				mode_lib->vba.PrefetchLinesY[0][0][k] = CalculatePrefetchSourceLines(
 						mode_lib,
 						mode_lib->vba.VRatio[k],
 						mode_lib->vba.vtaps[k],
@@ -4598,7 +4597,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 							&mode_lib->vba.PTEBufferSizeNotExceededC[i][j][k],
 							&mode_lib->vba.dpte_row_height_chroma[k],
 							&mode_lib->vba.meta_row_height_chroma[k]);
-					mode_lib->vba.PrefetchLinesC[k] = CalculatePrefetchSourceLines(
+					mode_lib->vba.PrefetchLinesC[0][0][k] = CalculatePrefetchSourceLines(
 							mode_lib,
 							mode_lib->vba.VRatio[k] / 2.0,
 							mode_lib->vba.VTAPsChroma[k],
@@ -4612,14 +4611,14 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 					mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = 0.0;
 					mode_lib->vba.MetaRowBytesC = 0.0;
 					mode_lib->vba.DPTEBytesPerRowC = 0.0;
-					locals->PrefetchLinesC[k] = 0.0;
+					locals->PrefetchLinesC[0][0][k] = 0.0;
 					locals->PTEBufferSizeNotExceededC[i][j][k] = true;
 					locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma;
 				}
-				locals->PDEAndMetaPTEBytesPerFrame[k] =
+				locals->PDEAndMetaPTEBytesPerFrame[0][0][k] =
 						mode_lib->vba.PDEAndMetaPTEBytesPerFrameY + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC;
-				locals->MetaRowBytes[k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC;
-				locals->DPTEBytesPerRow[k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC;
+				locals->MetaRowBytes[0][0][k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC;
+				locals->DPTEBytesPerRow[0][0][k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC;
 
 				CalculateActiveRowBandwidth(
 						mode_lib->vba.GPUVMEnable,
@@ -4646,14 +4645,14 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 									+ mode_lib->vba.TotalNumberOfDCCActiveDPP[i][j]
 											* mode_lib->vba.MetaChunkSize)
 									* 1024.0
-									/ mode_lib->vba.ReturnBWPerState[i];
+									/ mode_lib->vba.ReturnBWPerState[i][0];
 			if (mode_lib->vba.GPUVMEnable == true) {
 				mode_lib->vba.ExtraLatency = mode_lib->vba.ExtraLatency
 						+ mode_lib->vba.TotalNumberOfActiveDPP[i][j]
 								* mode_lib->vba.PTEGroupSize
-								/ mode_lib->vba.ReturnBWPerState[i];
+								/ mode_lib->vba.ReturnBWPerState[i][0];
 			}
-			mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep;
+			mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0];
 
 			for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
 				if (mode_lib->vba.BlendingAndTiming[k] == k) {
@@ -4703,7 +4702,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 			}
 
 			for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
-				locals->MaximumVStartup[k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]
+				locals->MaximumVStartup[0][0][k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]
 					- dml_max(1.0, dml_ceil(locals->WritebackDelay[i][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1.0));
 			}
 
@@ -4743,7 +4742,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 						mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0.0;
 					}
 
-					CalculateDelayAfterScaler(mode_lib, mode_lib->vba.ReturnBWPerState[i], mode_lib->vba.ReadBandwidthLuma[k], mode_lib->vba.ReadBandwidthChroma[k], mode_lib->vba.MaxTotalVActiveRDBandwidth,
+					CalculateDelayAfterScaler(mode_lib, mode_lib->vba.ReturnBWPerState[i][0], mode_lib->vba.ReadBandwidthLuma[k], mode_lib->vba.ReadBandwidthChroma[k], mode_lib->vba.MaxTotalVActiveRDBandwidth,
 						mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k], mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k],
 						mode_lib->vba.RequiredDPPCLK[i][j][k], mode_lib->vba.RequiredDISPCLK[i][j], mode_lib->vba.PixelClock[k], mode_lib->vba.DSCDelayPerState[i][k], mode_lib->vba.NoOfDPP[i][j][k], mode_lib->vba.ScalerEnabled[k], mode_lib->vba.NumberOfCursors[k],
 						mode_lib->vba.DPPCLKDelaySubtotal, mode_lib->vba.DPPCLKDelaySCL, mode_lib->vba.DPPCLKDelaySCLLBOnly, mode_lib->vba.DPPCLKDelayCNVCFormater, mode_lib->vba.DPPCLKDelayCNVCCursor, mode_lib->vba.DISPCLKDelaySubtotal,
@@ -4757,14 +4756,14 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 									mode_lib->vba.RequiredDPPCLK[i][j][k],
 									mode_lib->vba.RequiredDISPCLK[i][j],
 									mode_lib->vba.PixelClock[k],
-									mode_lib->vba.ProjectedDCFCLKDeepSleep,
+									mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
 									mode_lib->vba.NoOfDPP[i][j][k],
 									mode_lib->vba.NumberOfCursors[k],
 									mode_lib->vba.VTotal[k]
 											- mode_lib->vba.VActive[k],
 									mode_lib->vba.HTotal[k],
 									mode_lib->vba.MaxInterDCNTileRepeaters,
-									mode_lib->vba.MaximumVStartup[k],
+									mode_lib->vba.MaximumVStartup[0][0][k],
 									mode_lib->vba.GPUVMMaxPageTableLevels,
 									mode_lib->vba.GPUVMEnable,
 									mode_lib->vba.DynamicMetadataEnable[k],
@@ -4774,15 +4773,15 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 									mode_lib->vba.UrgentLatencyPixelDataOnly,
 									mode_lib->vba.ExtraLatency,
 									mode_lib->vba.TimeCalc,
-									mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k],
-									mode_lib->vba.MetaRowBytes[k],
-									mode_lib->vba.DPTEBytesPerRow[k],
-									mode_lib->vba.PrefetchLinesY[k],
+									mode_lib->vba.PDEAndMetaPTEBytesPerFrame[0][0][k],
+									mode_lib->vba.MetaRowBytes[0][0][k],
+									mode_lib->vba.DPTEBytesPerRow[0][0][k],
+									mode_lib->vba.PrefetchLinesY[0][0][k],
 									mode_lib->vba.SwathWidthYPerState[i][j][k],
 									mode_lib->vba.BytePerPixelInDETY[k],
 									mode_lib->vba.PrefillY[k],
 									mode_lib->vba.MaxNumSwY[k],
-									mode_lib->vba.PrefetchLinesC[k],
+									mode_lib->vba.PrefetchLinesC[0][0][k],
 									mode_lib->vba.BytePerPixelInDETC[k],
 									mode_lib->vba.PrefillC[k],
 									mode_lib->vba.MaxNumSwC[k],
@@ -4812,19 +4811,19 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 				locals->prefetch_vm_bw_valid = true;
 				locals->prefetch_row_bw_valid = true;
 				for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
-					if (locals->PDEAndMetaPTEBytesPerFrame[k] == 0)
+					if (locals->PDEAndMetaPTEBytesPerFrame[0][0][k] == 0)
 						locals->prefetch_vm_bw[k] = 0;
 					else if (locals->LinesForMetaPTE[k] > 0)
-						locals->prefetch_vm_bw[k] = locals->PDEAndMetaPTEBytesPerFrame[k]
+						locals->prefetch_vm_bw[k] = locals->PDEAndMetaPTEBytesPerFrame[0][0][k]
 							/ (locals->LinesForMetaPTE[k] * locals->HTotal[k] / locals->PixelClock[k]);
 					else {
 						locals->prefetch_vm_bw[k] = 0;
 						locals->prefetch_vm_bw_valid = false;
 					}
-					if (locals->MetaRowBytes[k] + locals->DPTEBytesPerRow[k] == 0)
+					if (locals->MetaRowBytes[0][0][k] + locals->DPTEBytesPerRow[0][0][k] == 0)
 						locals->prefetch_row_bw[k] = 0;
 					else if (locals->LinesForMetaAndDPTERow[k] > 0)
-						locals->prefetch_row_bw[k] = (locals->MetaRowBytes[k] + locals->DPTEBytesPerRow[k])
+						locals->prefetch_row_bw[k] = (locals->MetaRowBytes[0][0][k] + locals->DPTEBytesPerRow[0][0][k])
 							/ (locals->LinesForMetaAndDPTERow[k] * locals->HTotal[k] / locals->PixelClock[k]);
 					else {
 						locals->prefetch_row_bw[k] = 0;
@@ -4843,13 +4842,13 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 											mode_lib->vba.RequiredPrefetchPixelDataBWLuma[i][j][k])
 											+ mode_lib->vba.meta_row_bw[k] + mode_lib->vba.dpte_row_bw[k]);
 				}
-				locals->BandwidthWithoutPrefetchSupported[i] = true;
-				if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i]) {
-					locals->BandwidthWithoutPrefetchSupported[i] = false;
+				locals->BandwidthWithoutPrefetchSupported[i][0] = true;
+				if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i][0]) {
+					locals->BandwidthWithoutPrefetchSupported[i][0] = false;
 				}
 
 				locals->PrefetchSupported[i][j] = true;
-				if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i]) {
+				if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i][0]) {
 					locals->PrefetchSupported[i][j] = false;
 				}
 				for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
@@ -4874,7 +4873,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 			if (mode_lib->vba.PrefetchSupported[i][j] == true
 					&& mode_lib->vba.VRatioInPrefetchSupported[i][j] == true) {
 				mode_lib->vba.BandwidthAvailableForImmediateFlip =
-						mode_lib->vba.ReturnBWPerState[i];
+						mode_lib->vba.ReturnBWPerState[i][0];
 				for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
 					mode_lib->vba.BandwidthAvailableForImmediateFlip =
 							mode_lib->vba.BandwidthAvailableForImmediateFlip
@@ -4888,9 +4887,9 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 					if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8
 							&& mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) {
 						mode_lib->vba.ImmediateFlipBytes[k] =
-								mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k]
-										+ mode_lib->vba.MetaRowBytes[k]
-										+ mode_lib->vba.DPTEBytesPerRow[k];
+								mode_lib->vba.PDEAndMetaPTEBytesPerFrame[0][0][k]
+										+ mode_lib->vba.MetaRowBytes[0][0][k]
+										+ mode_lib->vba.DPTEBytesPerRow[0][0][k];
 					}
 				}
 				mode_lib->vba.TotImmediateFlipBytes = 0.0;
@@ -4918,9 +4917,9 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 									/ mode_lib->vba.PixelClock[k],
 							mode_lib->vba.VRatio[k],
 							mode_lib->vba.Tno_bw[k],
-							mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k],
-							mode_lib->vba.MetaRowBytes[k],
-							mode_lib->vba.DPTEBytesPerRow[k],
+							mode_lib->vba.PDEAndMetaPTEBytesPerFrame[0][0][k],
+							mode_lib->vba.MetaRowBytes[0][0][k],
+							mode_lib->vba.DPTEBytesPerRow[0][0][k],
 							mode_lib->vba.DCCEnable[k],
 							mode_lib->vba.dpte_row_height[k],
 							mode_lib->vba.meta_row_height[k],
@@ -4945,7 +4944,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 				}
 				mode_lib->vba.ImmediateFlipSupportedForState[i][j] = true;
 				if (mode_lib->vba.total_dcn_read_bw_with_flip
-						> mode_lib->vba.ReturnBWPerState[i]) {
+						> mode_lib->vba.ReturnBWPerState[i][0]) {
 					mode_lib->vba.ImmediateFlipSupportedForState[i][j] = false;
 				}
 				for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
@@ -4961,13 +4960,13 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 
 	/*Vertical Active BW support*/
 	for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
-		mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i] = dml_min(mode_lib->vba.ReturnBusWidth *
+		mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i][0] = dml_min(mode_lib->vba.ReturnBusWidth *
 				mode_lib->vba.DCFCLKPerState[i], mode_lib->vba.FabricAndDRAMBandwidthPerState[i] * 1000) *
 				mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100;
-		if (mode_lib->vba.MaxTotalVActiveRDBandwidth <= mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i])
-			mode_lib->vba.TotalVerticalActiveBandwidthSupport[i] = true;
+		if (mode_lib->vba.MaxTotalVActiveRDBandwidth <= mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i][0])
+			mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][0] = true;
 		else
-			mode_lib->vba.TotalVerticalActiveBandwidthSupport[i] = false;
+			mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][0] = false;
 	}
 
 	/*PTE Buffer Size Check*/
@@ -5055,7 +5054,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 				status = DML_FAIL_SCALE_RATIO_TAP;
 			} else if (mode_lib->vba.SourceFormatPixelAndScanSupport != true) {
 				status = DML_FAIL_SOURCE_PIXEL_FORMAT;
-			} else if (locals->ViewportSizeSupport[i] != true) {
+			} else if (locals->ViewportSizeSupport[i][0] != true) {
 				status = DML_FAIL_VIEWPORT_SIZE;
 			} else if (locals->DIOSupport[i] != true) {
 				status = DML_FAIL_DIO_SUPPORT;
@@ -5065,7 +5064,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 				status = DML_FAIL_DSC_CLK_REQUIRED;
 			} else if (locals->UrgentLatencySupport[i][j] != true) {
 				status = DML_FAIL_URGENT_LATENCY;
-			} else if (locals->ROBSupport[i] != true) {
+			} else if (locals->ROBSupport[i][0] != true) {
 				status = DML_FAIL_REORDERING_BUFFER;
 			} else if (locals->DISPCLK_DPPCLK_Support[i][j] != true) {
 				status = DML_FAIL_DISPCLK_DPPCLK;
@@ -5085,7 +5084,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 				status = DML_FAIL_PITCH_SUPPORT;
 			} else if (locals->PrefetchSupported[i][j] != true) {
 				status = DML_FAIL_PREFETCH_SUPPORT;
-			} else if (locals->TotalVerticalActiveBandwidthSupport[i] != true) {
+			} else if (locals->TotalVerticalActiveBandwidthSupport[i][0] != true) {
 				status = DML_FAIL_TOTAL_V_ACTIVE_BW;
 			} else if (locals->VRatioInPrefetchSupported[i][j] != true) {
 				status = DML_FAIL_V_RATIO_PREFETCH;
@@ -5131,7 +5130,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
 	mode_lib->vba.DRAMSpeed = mode_lib->vba.DRAMSpeedPerState[mode_lib->vba.VoltageLevel];
 	mode_lib->vba.FabricClock = mode_lib->vba.FabricClockPerState[mode_lib->vba.VoltageLevel];
 	mode_lib->vba.SOCCLK = mode_lib->vba.SOCCLKPerState[mode_lib->vba.VoltageLevel];
-	mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel];
+	mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel][0];
 	mode_lib->vba.FabricAndDRAMBandwidth = locals->FabricAndDRAMBandwidthPerState[mode_lib->vba.VoltageLevel];
 	for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
 		if (mode_lib->vba.BlendingAndTiming[k] == k) {
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
index b6d34669cddf..af35b3bea909 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
@@ -197,7 +197,7 @@ static unsigned int CalculateVMAndRowBytes(
 		unsigned int *meta_row_width,
 		unsigned int *meta_row_height,
 		unsigned int *vm_group_bytes,
-		long         *dpte_group_bytes,
+		unsigned int *dpte_group_bytes,
 		unsigned int *PixelPTEReqWidth,
 		unsigned int *PixelPTEReqHeight,
 		unsigned int *PTERequestSize,
@@ -295,7 +295,7 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
 		double UrgentOutOfOrderReturn,
 		double ReturnBW,
 		bool GPUVMEnable,
-		long dpte_group_bytes[],
+		int dpte_group_bytes[],
 		unsigned int MetaChunkSize,
 		double UrgentLatency,
 		double ExtraLatency,
@@ -309,13 +309,13 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
 		int DPPPerPlane[],
 		bool DCCEnable[],
 		double DPPCLK[],
-		unsigned int SwathWidthSingleDPPY[],
+		double SwathWidthSingleDPPY[],
 		unsigned int SwathHeightY[],
 		double ReadBandwidthPlaneLuma[],
 		unsigned int SwathHeightC[],
 		double ReadBandwidthPlaneChroma[],
 		unsigned int LBBitPerPixel[],
-		unsigned int SwathWidthY[],
+		double SwathWidthY[],
 		double HRatio[],
 		unsigned int vtaps[],
 		unsigned int VTAPsChroma[],
@@ -344,7 +344,7 @@ static void CalculateDCFCLKDeepSleep(
 		double BytePerPixelDETY[],
 		double BytePerPixelDETC[],
 		double VRatio[],
-		unsigned int SwathWidthY[],
+		double SwathWidthY[],
 		int DPPPerPlane[],
 		double HRatio[],
 		double PixelClock[],
@@ -435,7 +435,7 @@ static void CalculateMetaAndPTETimes(
 		unsigned int           meta_row_height[],
 		unsigned int           meta_req_width[],
 		unsigned int           meta_req_height[],
-		long                   dpte_group_bytes[],
+		int                   dpte_group_bytes[],
 		unsigned int           PTERequestSizeY[],
 		unsigned int           PTERequestSizeC[],
 		unsigned int           PixelPTEReqWidthY[],
@@ -477,7 +477,7 @@ static double CalculateExtraLatency(
 		bool HostVMEnable,
 		int NumberOfActivePlanes,
 		int NumberOfDPP[],
-		long dpte_group_bytes[],
+		int dpte_group_bytes[],
 		double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
 		double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
 		int HostVMMaxPageTableLevels,
@@ -1280,7 +1280,7 @@ static unsigned int CalculateVMAndRowBytes(
 		unsigned int *meta_row_width,
 		unsigned int *meta_row_height,
 		unsigned int *vm_group_bytes,
-		long         *dpte_group_bytes,
+		unsigned int *dpte_group_bytes,
 		unsigned int *PixelPTEReqWidth,
 		unsigned int *PixelPTEReqHeight,
 		unsigned int *PTERequestSize,
@@ -1338,7 +1338,7 @@ static unsigned int CalculateVMAndRowBytes(
 		*MetaRowByte = 0;
 	}
 
-	if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_lvp) {
+	if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_l_vp) {
 		MacroTileSizeBytes = 256;
 		MacroTileHeight = BlockHeight256Bytes;
 	} else if (SurfaceTiling == dm_sw_4kb_s || SurfaceTiling == dm_sw_4kb_s_x
@@ -1683,11 +1683,11 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
 		else
 			locals->SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportHeight[k];
 
-		if (mode_lib->vba.ODMCombineEnabled[k] == true)
+		if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
 			MainPlaneDoesODMCombine = true;
 		for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j)
 			if (mode_lib->vba.BlendingAndTiming[k] == j
-					&& mode_lib->vba.ODMCombineEnabled[j] == true)
+					&& mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
 				MainPlaneDoesODMCombine = true;
 
 		if (MainPlaneDoesODMCombine == true)
@@ -2940,12 +2940,12 @@ static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
 			SwathWidth = mode_lib->vba.ViewportHeight[k];
 		}
 
-		if (mode_lib->vba.ODMCombineEnabled[k] == true) {
+		if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
 			MainPlaneDoesODMCombine = true;
 		}
 		for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) {
 			if (mode_lib->vba.BlendingAndTiming[k] == j
-					&& mode_lib->vba.ODMCombineEnabled[j] == true) {
+					&& mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
 				MainPlaneDoesODMCombine = true;
 			}
 		}
@@ -3453,7 +3453,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 										== dm_420_10))
 				|| (((mode_lib->vba.SurfaceTiling[k] == dm_sw_gfx7_2d_thin_gl
 						|| mode_lib->vba.SurfaceTiling[k]
-								== dm_sw_gfx7_2d_thin_lvp)
+								== dm_sw_gfx7_2d_thin_l_vp)
 						&& !((mode_lib->vba.SourcePixelFormat[k]
 								== dm_444_64
 								|| mode_lib->vba.SourcePixelFormat[k]
@@ -3542,17 +3542,17 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 		}
 	}
 	for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
-		locals->IdealSDPPortBandwidthPerState[i] = dml_min3(
+		locals->IdealSDPPortBandwidthPerState[i][0] = dml_min3(
 				mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLKPerState[i],
 				mode_lib->vba.DRAMSpeedPerState[i] * mode_lib->vba.NumberOfChannels
 						* mode_lib->vba.DRAMChannelWidth,
 				mode_lib->vba.FabricClockPerState[i]
 						* mode_lib->vba.FabricDatapathToDCNDataReturn);
 		if (mode_lib->vba.HostVMEnable == false) {
-			locals->ReturnBWPerState[i] = locals->IdealSDPPortBandwidthPerState[i]
+			locals->ReturnBWPerState[i][0] = locals->IdealSDPPortBandwidthPerState[i][0]
 					* mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100.0;
 		} else {
-			locals->ReturnBWPerState[i] = locals->IdealSDPPortBandwidthPerState[i]
+			locals->ReturnBWPerState[i][0] = locals->IdealSDPPortBandwidthPerState[i][0]
 					* mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0;
 		}
 	}
@@ -3589,12 +3589,12 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				+ dml_max3(mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly,
 						mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
 						mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly)
-					* mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i];
-		if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i]
+					* mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i][0];
+		if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i][0]
 				> locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i]) {
-			locals->ROBSupport[i] = true;
+			locals->ROBSupport[i][0] = true;
 		} else {
-			locals->ROBSupport[i] = false;
+			locals->ROBSupport[i][0] = false;
 		}
 	}
 	/*Writeback Mode Support Check*/
@@ -3982,7 +3982,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				}
 				if (locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity
 						&& locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k]
-						&& locals->ODMCombineEnablePerState[i][k] == false) {
+						&& locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
 					locals->NoOfDPP[i][j][k] = 1;
 					locals->RequiredDPPCLK[i][j][k] =
 						locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
@@ -4071,16 +4071,16 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 	/*Viewport Size Check*/
 
 	for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
-		locals->ViewportSizeSupport[i] = true;
+		locals->ViewportSizeSupport[i][0] = true;
 		for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
-			if (locals->ODMCombineEnablePerState[i][k] == true) {
+			if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
 				if (dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k]))
 						> locals->MaximumSwathWidth[k]) {
-					locals->ViewportSizeSupport[i] = false;
+					locals->ViewportSizeSupport[i][0] = false;
 				}
 			} else {
 				if (locals->SwathWidthYSingleDPP[k] / 2.0 > locals->MaximumSwathWidth[k]) {
-					locals->ViewportSizeSupport[i] = false;
+					locals->ViewportSizeSupport[i][0] = false;
 				}
 			}
 		}
@@ -4269,8 +4269,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 						mode_lib->vba.DSCFormatFactor = 1;
 					}
 					if (locals->RequiresDSC[i][k] == true) {
-						if (locals->ODMCombineEnablePerState[i][k]
-								== true) {
+						if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
 							if (mode_lib->vba.PixelClockBackEnd[k] / 6.0 / mode_lib->vba.DSCFormatFactor
 									> (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) {
 								locals->DSCCLKRequiredMoreThanSupported[i] =
@@ -4293,7 +4292,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 		mode_lib->vba.TotalDSCUnitsRequired = 0.0;
 		for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
 			if (locals->RequiresDSC[i][k] == true) {
-				if (locals->ODMCombineEnablePerState[i][k] == true) {
+				if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
 					mode_lib->vba.TotalDSCUnitsRequired =
 							mode_lib->vba.TotalDSCUnitsRequired + 2.0;
 				} else {
@@ -4335,7 +4334,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				mode_lib->vba.bpp = locals->OutputBppPerState[i][k];
 			}
 			if (locals->RequiresDSC[i][k] == true && mode_lib->vba.bpp != 0.0) {
-				if (locals->ODMCombineEnablePerState[i][k] == false) {
+				if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
 					locals->DSCDelayPerState[i][k] =
 							dscceComputeDelay(
 									mode_lib->vba.DSCInputBitPerComponent[k],
@@ -4399,7 +4398,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 			for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
 				locals->RequiredDPPCLKThisState[k] = locals->RequiredDPPCLK[i][j][k];
 				locals->NoOfDPPThisState[k]        = locals->NoOfDPP[i][j][k];
-				if (locals->ODMCombineEnablePerState[i][k] == true) {
+				if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
 					locals->SwathWidthYThisState[k] =
 						dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k]));
 				} else {
@@ -4451,7 +4450,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 					locals->PSCL_FACTOR,
 					locals->PSCL_FACTOR_CHROMA,
 					locals->RequiredDPPCLKThisState,
-					&mode_lib->vba.ProjectedDCFCLKDeepSleep);
+					&mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0]);
 
 			for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
 				if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64
@@ -4496,7 +4495,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 							locals->PTERequestSizeC,
 							locals->dpde0_bytes_per_frame_ub_c,
 							locals->meta_pte_bytes_per_frame_ub_c);
-					locals->PrefetchLinesC[k] = CalculatePrefetchSourceLines(
+					locals->PrefetchLinesC[0][0][k] = CalculatePrefetchSourceLines(
 							mode_lib,
 							mode_lib->vba.VRatio[k]/2,
 							mode_lib->vba.VTAPsChroma[k],
@@ -4511,7 +4510,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 					mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = 0.0;
 					mode_lib->vba.MetaRowBytesC = 0.0;
 					mode_lib->vba.DPTEBytesPerRowC = 0.0;
-					locals->PrefetchLinesC[k] = 0.0;
+					locals->PrefetchLinesC[0][0][k] = 0.0;
 					locals->PTEBufferSizeNotExceededC[i][j][k] = true;
 					locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma;
 				}
@@ -4552,7 +4551,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 						locals->PTERequestSizeY,
 						locals->dpde0_bytes_per_frame_ub_l,
 						locals->meta_pte_bytes_per_frame_ub_l);
-				locals->PrefetchLinesY[k] = CalculatePrefetchSourceLines(
+				locals->PrefetchLinesY[0][0][k] = CalculatePrefetchSourceLines(
 						mode_lib,
 						mode_lib->vba.VRatio[k],
 						mode_lib->vba.vtaps[k],
@@ -4562,10 +4561,10 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 						mode_lib->vba.ViewportYStartY[k],
 						&locals->PrefillY[k],
 						&locals->MaxNumSwY[k]);
-				locals->PDEAndMetaPTEBytesPerFrame[k] =
+				locals->PDEAndMetaPTEBytesPerFrame[0][0][k] =
 						mode_lib->vba.PDEAndMetaPTEBytesPerFrameY + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC;
-				locals->MetaRowBytes[k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC;
-				locals->DPTEBytesPerRow[k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC;
+				locals->MetaRowBytes[0][0][k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC;
+				locals->DPTEBytesPerRow[0][0][k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC;
 
 				CalculateActiveRowBandwidth(
 						mode_lib->vba.GPUVMEnable,
@@ -4591,7 +4590,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 					mode_lib->vba.PixelChunkSizeInKByte,
 					locals->TotalNumberOfDCCActiveDPP[i][j],
 					mode_lib->vba.MetaChunkSize,
-					locals->ReturnBWPerState[i],
+					locals->ReturnBWPerState[i][0],
 					mode_lib->vba.GPUVMEnable,
 					mode_lib->vba.HostVMEnable,
 					mode_lib->vba.NumberOfActivePlanes,
@@ -4602,7 +4601,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 					mode_lib->vba.HostVMMaxPageTableLevels,
 					mode_lib->vba.HostVMCachedPageTableLevels);
 
-			mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep;
+			mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0];
 			for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
 				if (mode_lib->vba.BlendingAndTiming[k] == k) {
 					if (mode_lib->vba.WritebackEnable[k] == true) {
@@ -4644,15 +4643,15 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 					}
 				}
 			}
-			mode_lib->vba.MaxMaxVStartup = 0;
+			mode_lib->vba.MaxMaxVStartup[0][0] = 0;
 			for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
-				locals->MaximumVStartup[k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]
+				locals->MaximumVStartup[0][0][k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]
 					- dml_max(1.0, dml_ceil(locals->WritebackDelay[i][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1.0));
-				mode_lib->vba.MaxMaxVStartup = dml_max(mode_lib->vba.MaxMaxVStartup, locals->MaximumVStartup[k]);
+				mode_lib->vba.MaxMaxVStartup[0][0] = dml_max(mode_lib->vba.MaxMaxVStartup[0][0], locals->MaximumVStartup[0][0][k]);
 			}
 
 			mode_lib->vba.NextPrefetchMode = mode_lib->vba.MinPrefetchMode;
-			mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup;
+			mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup[0][0];
 			do {
 				mode_lib->vba.PrefetchMode[i][j] = mode_lib->vba.NextPrefetchMode;
 				mode_lib->vba.MaxVStartup = mode_lib->vba.NextMaxVStartup;
@@ -4693,7 +4692,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 					myPipe.DPPCLK = locals->RequiredDPPCLK[i][j][k];
 					myPipe.DISPCLK = locals->RequiredDISPCLK[i][j];
 					myPipe.PixelClock = mode_lib->vba.PixelClock[k];
-					myPipe.DCFCLKDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep;
+					myPipe.DCFCLKDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0];
 					myPipe.DPPPerPlane = locals->NoOfDPP[i][j][k];
 					myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k];
 					myPipe.SourceScan = mode_lib->vba.SourceScan[k];
@@ -4727,8 +4726,8 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 							locals->SwathWidthYThisState[k] / mode_lib->vba.HRatio[k],
 							mode_lib->vba.OutputFormat[k],
 							mode_lib->vba.MaxInterDCNTileRepeaters,
-							dml_min(mode_lib->vba.MaxVStartup, locals->MaximumVStartup[k]),
-							locals->MaximumVStartup[k],
+							dml_min(mode_lib->vba.MaxVStartup, locals->MaximumVStartup[0][0][k]),
+							locals->MaximumVStartup[0][0][k],
 							mode_lib->vba.GPUVMMaxPageTableLevels,
 							mode_lib->vba.GPUVMEnable,
 							&myHostVM,
@@ -4739,15 +4738,15 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 							mode_lib->vba.UrgentLatency,
 							mode_lib->vba.ExtraLatency,
 							mode_lib->vba.TimeCalc,
-							locals->PDEAndMetaPTEBytesPerFrame[k],
-							locals->MetaRowBytes[k],
-							locals->DPTEBytesPerRow[k],
-							locals->PrefetchLinesY[k],
+							locals->PDEAndMetaPTEBytesPerFrame[0][0][k],
+							locals->MetaRowBytes[0][0][k],
+							locals->DPTEBytesPerRow[0][0][k],
+							locals->PrefetchLinesY[0][0][k],
 							locals->SwathWidthYThisState[k],
 							locals->BytePerPixelInDETY[k],
 							locals->PrefillY[k],
 							locals->MaxNumSwY[k],
-							locals->PrefetchLinesC[k],
+							locals->PrefetchLinesC[0][0][k],
 							locals->BytePerPixelInDETC[k],
 							locals->PrefillC[k],
 							locals->MaxNumSwC[k],
@@ -4836,14 +4835,14 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 						+ locals->RequiredPrefetchPixelDataBWChroma[i][j][k] * locals->UrgentBurstFactorChromaPre[k]
 						+ locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]);
 				}
-				locals->BandwidthWithoutPrefetchSupported[i] = true;
-				if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i]
+				locals->BandwidthWithoutPrefetchSupported[i][0] = true;
+				if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i][0]
 						|| locals->NotEnoughUrgentLatencyHiding == 1) {
-					locals->BandwidthWithoutPrefetchSupported[i] = false;
+					locals->BandwidthWithoutPrefetchSupported[i][0] = false;
 				}
 
 				locals->PrefetchSupported[i][j] = true;
-				if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i]
+				if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i][0]
 						|| locals->NotEnoughUrgentLatencyHiding == 1
 						|| locals->NotEnoughUrgentLatencyHidingPre == 1) {
 					locals->PrefetchSupported[i][j] = false;
@@ -4872,17 +4871,17 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				}
 
 				if (mode_lib->vba.MaxVStartup <= 13 || mode_lib->vba.AnyLinesForVMOrRowTooLarge == false) {
-					mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup;
+					mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup[0][0];
 					mode_lib->vba.NextPrefetchMode = mode_lib->vba.NextPrefetchMode + 1;
 				} else {
 					mode_lib->vba.NextMaxVStartup = mode_lib->vba.NextMaxVStartup - 1;
 				}
 			} while ((locals->PrefetchSupported[i][j] != true || locals->VRatioInPrefetchSupported[i][j] != true)
-					&& (mode_lib->vba.NextMaxVStartup != mode_lib->vba.MaxMaxVStartup
+					&& (mode_lib->vba.NextMaxVStartup != mode_lib->vba.MaxMaxVStartup[0][0]
 						|| mode_lib->vba.NextPrefetchMode < mode_lib->vba.MaxPrefetchMode));
 
 			if (locals->PrefetchSupported[i][j] == true && locals->VRatioInPrefetchSupported[i][j] == true) {
-				mode_lib->vba.BandwidthAvailableForImmediateFlip = locals->ReturnBWPerState[i];
+				mode_lib->vba.BandwidthAvailableForImmediateFlip = locals->ReturnBWPerState[i][0];
 				for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
 					mode_lib->vba.BandwidthAvailableForImmediateFlip = mode_lib->vba.BandwidthAvailableForImmediateFlip
 						- dml_max(locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k]
@@ -4895,7 +4894,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				mode_lib->vba.TotImmediateFlipBytes = 0.0;
 				for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
 					mode_lib->vba.TotImmediateFlipBytes = mode_lib->vba.TotImmediateFlipBytes
-						+ locals->PDEAndMetaPTEBytesPerFrame[k] + locals->MetaRowBytes[k] + locals->DPTEBytesPerRow[k];
+						+ locals->PDEAndMetaPTEBytesPerFrame[0][0][k] + locals->MetaRowBytes[0][0][k] + locals->DPTEBytesPerRow[0][0][k];
 				}
 
 				for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
@@ -4910,9 +4909,9 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 							mode_lib->vba.HostVMMaxPageTableLevels,
 							mode_lib->vba.HostVMCachedPageTableLevels,
 							mode_lib->vba.GPUVMEnable,
-							locals->PDEAndMetaPTEBytesPerFrame[k],
-							locals->MetaRowBytes[k],
-							locals->DPTEBytesPerRow[k],
+							locals->PDEAndMetaPTEBytesPerFrame[0][0][k],
+							locals->MetaRowBytes[0][0][k],
+							locals->DPTEBytesPerRow[0][0][k],
 							mode_lib->vba.BandwidthAvailableForImmediateFlip,
 							mode_lib->vba.TotImmediateFlipBytes,
 							mode_lib->vba.SourcePixelFormat[k],
@@ -4943,7 +4942,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				}
 				locals->ImmediateFlipSupportedForState[i][j] = true;
 				if (mode_lib->vba.total_dcn_read_bw_with_flip
-						> locals->ReturnBWPerState[i]) {
+						> locals->ReturnBWPerState[i][0]) {
 					locals->ImmediateFlipSupportedForState[i][j] = false;
 				}
 				for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
@@ -4970,7 +4969,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 					mode_lib->vba.WritebackInterfaceChromaBufferSize,
 					mode_lib->vba.DCFCLKPerState[i],
 					mode_lib->vba.UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels,
-					locals->ReturnBWPerState[i],
+					locals->ReturnBWPerState[i][0],
 					mode_lib->vba.GPUVMEnable,
 					locals->dpte_group_bytes,
 					mode_lib->vba.MetaChunkSize,
@@ -4982,7 +4981,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 					mode_lib->vba.DRAMClockChangeLatency,
 					mode_lib->vba.SRExitTime,
 					mode_lib->vba.SREnterPlusExitTime,
-					mode_lib->vba.ProjectedDCFCLKDeepSleep,
+					mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
 					locals->NoOfDPPThisState,
 					mode_lib->vba.DCCEnable,
 					locals->RequiredDPPCLKThisState,
@@ -5025,8 +5024,8 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + locals->ReadBandwidth[k];
 		}
 		for (i = 0; i <= mode_lib->vba.soc.num_states; ++i) {
-			locals->MaxTotalVerticalActiveAvailableBandwidth[i] = dml_min(
-				locals->IdealSDPPortBandwidthPerState[i] *
+			locals->MaxTotalVerticalActiveAvailableBandwidth[i][0] = dml_min(
+				locals->IdealSDPPortBandwidthPerState[i][0] *
 				mode_lib->vba.MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation
 				/ 100.0, mode_lib->vba.DRAMSpeedPerState[i] *
 				mode_lib->vba.NumberOfChannels *
@@ -5034,10 +5033,10 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation
 				/ 100.0);
 
-			if (MaxTotalVActiveRDBandwidth <= locals->MaxTotalVerticalActiveAvailableBandwidth[i]) {
-				locals->TotalVerticalActiveBandwidthSupport[i] = true;
+			if (MaxTotalVActiveRDBandwidth <= locals->MaxTotalVerticalActiveAvailableBandwidth[i][0]) {
+				locals->TotalVerticalActiveBandwidthSupport[i][0] = true;
 			} else {
-				locals->TotalVerticalActiveBandwidthSupport[i] = false;
+				locals->TotalVerticalActiveBandwidthSupport[i][0] = false;
 			}
 		}
 	}
@@ -5116,7 +5115,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				status = DML_FAIL_SCALE_RATIO_TAP;
 			} else if (mode_lib->vba.SourceFormatPixelAndScanSupport != true) {
 				status = DML_FAIL_SOURCE_PIXEL_FORMAT;
-			} else if (locals->ViewportSizeSupport[i] != true) {
+			} else if (locals->ViewportSizeSupport[i][0] != true) {
 				status = DML_FAIL_VIEWPORT_SIZE;
 			} else if (locals->DIOSupport[i] != true) {
 				status = DML_FAIL_DIO_SUPPORT;
@@ -5124,7 +5123,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				status = DML_FAIL_NOT_ENOUGH_DSC;
 			} else if (locals->DSCCLKRequiredMoreThanSupported[i] != false) {
 				status = DML_FAIL_DSC_CLK_REQUIRED;
-			} else if (locals->ROBSupport[i] != true) {
+			} else if (locals->ROBSupport[i][0] != true) {
 				status = DML_FAIL_REORDERING_BUFFER;
 			} else if (locals->DISPCLK_DPPCLK_Support[i][j] != true) {
 				status = DML_FAIL_DISPCLK_DPPCLK;
@@ -5142,7 +5141,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				status = DML_FAIL_CURSOR_SUPPORT;
 			} else if (mode_lib->vba.PitchSupport != true) {
 				status = DML_FAIL_PITCH_SUPPORT;
-			} else if (locals->TotalVerticalActiveBandwidthSupport[i] != true) {
+			} else if (locals->TotalVerticalActiveBandwidthSupport[i][0] != true) {
 				status = DML_FAIL_TOTAL_V_ACTIVE_BW;
 			} else if (locals->PTEBufferSizeNotExceeded[i][j] != true) {
 				status = DML_FAIL_PTE_BUFFER_SIZE;
@@ -5198,7 +5197,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 	mode_lib->vba.DRAMSpeed = mode_lib->vba.DRAMSpeedPerState[mode_lib->vba.VoltageLevel];
 	mode_lib->vba.FabricClock = mode_lib->vba.FabricClockPerState[mode_lib->vba.VoltageLevel];
 	mode_lib->vba.SOCCLK = mode_lib->vba.SOCCLKPerState[mode_lib->vba.VoltageLevel];
-	mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel];
+	mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel][0];
 	for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
 		if (mode_lib->vba.BlendingAndTiming[k] == k) {
 			mode_lib->vba.ODMCombineEnabled[k] =
@@ -5227,7 +5226,7 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
 		double UrgentOutOfOrderReturn,
 		double ReturnBW,
 		bool GPUVMEnable,
-		long dpte_group_bytes[],
+		int dpte_group_bytes[],
 		unsigned int MetaChunkSize,
 		double UrgentLatency,
 		double ExtraLatency,
@@ -5241,13 +5240,13 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
 		int DPPPerPlane[],
 		bool DCCEnable[],
 		double DPPCLK[],
-		unsigned int SwathWidthSingleDPPY[],
+		double SwathWidthSingleDPPY[],
 		unsigned int SwathHeightY[],
 		double ReadBandwidthPlaneLuma[],
 		unsigned int SwathHeightC[],
 		double ReadBandwidthPlaneChroma[],
 		unsigned int LBBitPerPixel[],
-		unsigned int SwathWidthY[],
+		double SwathWidthY[],
 		double HRatio[],
 		unsigned int vtaps[],
 		unsigned int VTAPsChroma[],
@@ -5503,7 +5502,7 @@ static void CalculateDCFCLKDeepSleep(
 		double BytePerPixelDETY[],
 		double BytePerPixelDETC[],
 		double VRatio[],
-		unsigned int SwathWidthY[],
+		double SwathWidthY[],
 		int DPPPerPlane[],
 		double HRatio[],
 		double PixelClock[],
@@ -5831,7 +5830,7 @@ static void CalculateMetaAndPTETimes(
 		unsigned int meta_row_height[],
 		unsigned int meta_req_width[],
 		unsigned int meta_req_height[],
-		long dpte_group_bytes[],
+		int dpte_group_bytes[],
 		unsigned int PTERequestSizeY[],
 		unsigned int PTERequestSizeC[],
 		unsigned int PixelPTEReqWidthY[],
@@ -6087,7 +6086,7 @@ static double CalculateExtraLatency(
 		bool HostVMEnable,
 		int NumberOfActivePlanes,
 		int NumberOfDPP[],
-		long dpte_group_bytes[],
+		int dpte_group_bytes[],
 		double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
 		double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
 		int HostVMMaxPageTableLevels,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h
index 55d4cb23a073..bfc2f39bd1ef 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h
@@ -85,7 +85,7 @@ enum dm_swizzle_mode {
 	dm_sw_var_s_x = 29,
 	dm_sw_var_d_x = 30,
 	dm_sw_64kb_r_x,
-	dm_sw_gfx7_2d_thin_lvp,
+	dm_sw_gfx7_2d_thin_l_vp,
 	dm_sw_gfx7_2d_thin_gl,
 };
 enum lb_depth {
@@ -119,6 +119,10 @@ enum mpc_combine_affinity {
 	dm_mpc_never
 };
 
+enum RequestType {
+	REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA
+};
+
 enum self_refresh_affinity {
 	dm_try_to_allow_self_refresh_and_mclk_switch,
 	dm_allow_self_refresh_and_mclk_switch,
@@ -165,4 +169,16 @@ enum odm_combine_mode {
 	dm_odm_combine_mode_4to1,
 };
 
+enum odm_combine_policy {
+	dm_odm_combine_policy_dal,
+	dm_odm_combine_policy_none,
+	dm_odm_combine_policy_2to1,
+	dm_odm_combine_policy_4to1,
+};
+
+enum immediate_flip_requirement {
+	dm_immediate_flip_not_required,
+	dm_immediate_flip_required,
+};
+
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
index dbf6a021d0d8..658f81e757e9 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
@@ -63,6 +63,7 @@ struct _vcs_dpi_voltage_scaling_st {
 	double dispclk_mhz;
 	double phyclk_mhz;
 	double dppclk_mhz;
+	double dtbclk_mhz;
 };
 
 struct _vcs_dpi_soc_bounding_box_st {
@@ -214,6 +215,7 @@ struct _vcs_dpi_display_pipe_source_params_st {
 	int source_format;
 	unsigned char dcc;
 	unsigned int dcc_rate;
+	unsigned int dcc_rate_chroma;
 	unsigned char dcc_use_global;
 	unsigned char vm;
 	bool gpuvm;    // gpuvm enabled
@@ -225,7 +227,10 @@ struct _vcs_dpi_display_pipe_source_params_st {
 	int source_scan;
 	int sw_mode;
 	int macro_tile_size;
+	unsigned int surface_width_y;
 	unsigned int surface_height_y;
+	unsigned int surface_width_c;
+	unsigned int surface_height_c;
 	unsigned int viewport_width;
 	unsigned int viewport_height;
 	unsigned int viewport_y_y;
@@ -324,7 +329,7 @@ struct _vcs_dpi_display_pipe_dest_params_st {
 	double pixel_rate_mhz;
 	unsigned char synchronized_vblank_all_planes;
 	unsigned char otg_inst;
-	unsigned char odm_combine;
+	unsigned int odm_combine;
 	unsigned char use_maximum_vstartup;
 	unsigned int vtotal_max;
 	unsigned int vtotal_min;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
index 66ca014a6b92..b3c96d9b472f 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
@@ -264,7 +264,10 @@ static void fetch_socbb_params(struct display_mode_lib *mode_lib)
 		mode_lib->vba.DRAMSpeedPerState[i] = soc->clock_limits[i].dram_speed_mts;
 		//mode_lib->vba.DRAMSpeedPerState[i] = soc->clock_limits[i].dram_speed_mhz;
 		mode_lib->vba.MaxDispclk[i] = soc->clock_limits[i].dispclk_mhz;
+		mode_lib->vba.DTBCLKPerState[i] = soc->clock_limits[i].dtbclk_mhz;
 	}
+	mode_lib->vba.MinVoltageLevel = 0;
+	mode_lib->vba.MaxVoltageLevel = mode_lib->vba.soc.num_states;
 
 	mode_lib->vba.DoUrgentLatencyAdjustment =
 		soc->do_urgent_latency_adjustment;
@@ -306,8 +309,6 @@ static void fetch_ip_params(struct display_mode_lib *mode_lib)
 
 	mode_lib->vba.WritebackInterfaceBufferSize = ip->writeback_interface_buffer_size_kbytes;
 	mode_lib->vba.WritebackLineBufferSize = ip->writeback_line_buffer_buffer_size;
-	mode_lib->vba.MinVoltageLevel = 0;
-	mode_lib->vba.MaxVoltageLevel = 5;
 
 	mode_lib->vba.WritebackChromaLineBufferWidth =
 			ip->writeback_chroma_line_buffer_width_pixels;
@@ -423,8 +424,8 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
 						ip->dcc_supported : src->dcc && ip->dcc_supported;
 		mode_lib->vba.DCCRate[mode_lib->vba.NumberOfActivePlanes] = src->dcc_rate;
 		/* TODO: Needs to be set based on src->dcc_rate_luma/chroma */
-		mode_lib->vba.DCCRateLuma[mode_lib->vba.NumberOfActivePlanes] = 0;
-		mode_lib->vba.DCCRateChroma[mode_lib->vba.NumberOfActivePlanes] = 0;
+		mode_lib->vba.DCCRateLuma[mode_lib->vba.NumberOfActivePlanes] = src->dcc_rate;
+		mode_lib->vba.DCCRateChroma[mode_lib->vba.NumberOfActivePlanes] = src->dcc_rate_chroma;
 
 		mode_lib->vba.SourcePixelFormat[mode_lib->vba.NumberOfActivePlanes] =
 				(enum source_format_class) (src->source_format);
@@ -436,8 +437,6 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
 				dst->recout_width; // TODO: or should this be full_recout_width???...maybe only when in hsplit mode?
 		mode_lib->vba.ODMCombineEnabled[mode_lib->vba.NumberOfActivePlanes] =
 				dst->odm_combine;
-		mode_lib->vba.ODMCombineTypeEnabled[mode_lib->vba.NumberOfActivePlanes] =
-				dst->odm_combine;
 		mode_lib->vba.OutputFormat[mode_lib->vba.NumberOfActivePlanes] =
 				(enum output_format_class) (dout->output_format);
 		mode_lib->vba.OutputBpp[mode_lib->vba.NumberOfActivePlanes] =
@@ -590,6 +589,7 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
 			for (k = j + 1; k < mode_lib->vba.cache_num_pipes; ++k) {
 				display_pipe_source_params_st *src_k = &pipes[k].pipe.src;
 				display_pipe_dest_params_st *dst_k = &pipes[k].pipe.dest;
+				display_output_params_st *dout_k = &pipes[j].dout;
 
 				if (src_k->is_hsplit && !visited[k]
 						&& src->hsplit_grp == src_k->hsplit_grp) {
@@ -600,12 +600,18 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
 							== dm_horz) {
 						mode_lib->vba.ViewportWidth[mode_lib->vba.NumberOfActivePlanes] +=
 								src_k->viewport_width;
+						mode_lib->vba.ViewportWidthChroma[mode_lib->vba.NumberOfActivePlanes] +=
+								src_k->viewport_width;
 						mode_lib->vba.ScalerRecoutWidth[mode_lib->vba.NumberOfActivePlanes] +=
 								dst_k->recout_width;
 					} else {
 						mode_lib->vba.ViewportHeight[mode_lib->vba.NumberOfActivePlanes] +=
 								src_k->viewport_height;
+						mode_lib->vba.ViewportHeightChroma[mode_lib->vba.NumberOfActivePlanes] +=
+								src_k->viewport_height;
 					}
+					mode_lib->vba.NumberOfDSCSlices[mode_lib->vba.NumberOfActivePlanes] +=
+							dout_k->dsc_slices;
 
 					visited[k] = true;
 				}
@@ -811,7 +817,9 @@ void ModeSupportAndSystemConfiguration(struct display_mode_lib *mode_lib)
 	unsigned int total_pipes = 0;
 
 	mode_lib->vba.VoltageLevel = mode_lib->vba.cache_pipes[0].clks_cfg.voltage;
-	mode_lib->vba.ReturnBW = mode_lib->vba.ReturnBWPerState[mode_lib->vba.VoltageLevel];
+	mode_lib->vba.ReturnBW = mode_lib->vba.ReturnBWPerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb];
+	if (mode_lib->vba.ReturnBW == 0)
+		mode_lib->vba.ReturnBW = mode_lib->vba.ReturnBWPerState[mode_lib->vba.VoltageLevel][0];
 	mode_lib->vba.FabricAndDRAMBandwidth = mode_lib->vba.FabricAndDRAMBandwidthPerState[mode_lib->vba.VoltageLevel];
 
 	fetch_socbb_params(mode_lib);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
index 3eb657ed5714..e7a44df676ca 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
@@ -157,6 +157,7 @@ struct vba_vars_st {
 	bool DummyPStateCheck;
 	bool DRAMClockChangeSupportsVActive;
 	bool PrefetchModeSupported;
+	bool PrefetchAndImmediateFlipSupported;
 	enum self_refresh_affinity AllowDRAMSelfRefreshOrDRAMClockChangeInVblank; // Mode Support only
 	double XFCRemoteSurfaceFlipDelay;
 	double TInitXFill;
@@ -318,8 +319,7 @@ struct vba_vars_st {
 	unsigned int DynamicMetadataTransmittedBytes[DC__NUM_DPP__MAX];
 	double DCCRate[DC__NUM_DPP__MAX];
 	double AverageDCCCompressionRate;
-	bool ODMCombineEnabled[DC__NUM_DPP__MAX];
-	enum odm_combine_mode ODMCombineTypeEnabled[DC__NUM_DPP__MAX];
+	enum odm_combine_mode ODMCombineEnabled[DC__NUM_DPP__MAX];
 	double OutputBpp[DC__NUM_DPP__MAX];
 	bool DSCEnabled[DC__NUM_DPP__MAX];
 	unsigned int DSCInputBitPerComponent[DC__NUM_DPP__MAX];
@@ -347,6 +347,7 @@ struct vba_vars_st {
 	unsigned int EffectiveLBLatencyHidingSourceLinesChroma;
 	double BandwidthAvailableForImmediateFlip;
 	unsigned int PrefetchMode[DC__VOLTAGE_STATES + 1][2];
+	unsigned int PrefetchModePerState[DC__VOLTAGE_STATES + 1][2];
 	unsigned int MinPrefetchMode;
 	unsigned int MaxPrefetchMode;
 	bool AnyLinesForVMOrRowTooLarge;
@@ -396,6 +397,7 @@ struct vba_vars_st {
 	bool WritebackLumaAndChromaScalingSupported;
 	bool Cursor64BppSupport;
 	double DCFCLKPerState[DC__VOLTAGE_STATES + 1];
+	double DCFCLKState[DC__VOLTAGE_STATES + 1][2];
 	double FabricClockPerState[DC__VOLTAGE_STATES + 1];
 	double SOCCLKPerState[DC__VOLTAGE_STATES + 1];
 	double PHYCLKPerState[DC__VOLTAGE_STATES + 1];
@@ -444,7 +446,7 @@ struct vba_vars_st {
 	double OutputLinkDPLanes[DC__NUM_DPP__MAX];
 	double ForcedOutputLinkBPP[DC__NUM_DPP__MAX]; // Mode Support only
 	double ImmediateFlipBW[DC__NUM_DPP__MAX];
-	double MaxMaxVStartup;
+	double MaxMaxVStartup[DC__VOLTAGE_STATES + 1][2];
 
 	double WritebackLumaVExtra;
 	double WritebackChromaVExtra;
@@ -471,7 +473,7 @@ struct vba_vars_st {
 	double RoundedUpMaxSwathSizeBytesC;
 	double EffectiveDETLBLinesLuma;
 	double EffectiveDETLBLinesChroma;
-	double ProjectedDCFCLKDeepSleep;
+	double ProjectedDCFCLKDeepSleep[DC__VOLTAGE_STATES + 1][2];
 	double PDEAndMetaPTEBytesPerFrameY;
 	double PDEAndMetaPTEBytesPerFrameC;
 	unsigned int MetaRowBytesY;
@@ -489,12 +491,11 @@ struct vba_vars_st {
 	double FractionOfUrgentBandwidthImmediateFlip; // Mode Support debugging output
 
 	/* ms locals */
-	double IdealSDPPortBandwidthPerState[DC__VOLTAGE_STATES + 1];
+	double IdealSDPPortBandwidthPerState[DC__VOLTAGE_STATES + 1][2];
 	unsigned int NoOfDPP[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
 	int NoOfDPPThisState[DC__NUM_DPP__MAX];
-	bool ODMCombineEnablePerState[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX];
-	enum odm_combine_mode ODMCombineTypeEnablePerState[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX];
-	unsigned int SwathWidthYThisState[DC__NUM_DPP__MAX];
+	enum odm_combine_mode ODMCombineEnablePerState[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX];
+	double SwathWidthYThisState[DC__NUM_DPP__MAX];
 	unsigned int SwathHeightCPerState[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
 	unsigned int SwathHeightYThisState[DC__NUM_DPP__MAX];
 	unsigned int SwathHeightCThisState[DC__NUM_DPP__MAX];
@@ -506,7 +507,7 @@ struct vba_vars_st {
 	double RequiredDPPCLKThisState[DC__NUM_DPP__MAX];
 	bool PTEBufferSizeNotExceededY[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
 	bool PTEBufferSizeNotExceededC[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
-	bool BandwidthWithoutPrefetchSupported[DC__VOLTAGE_STATES + 1];
+	bool BandwidthWithoutPrefetchSupported[DC__VOLTAGE_STATES + 1][2];
 	bool PrefetchSupported[DC__VOLTAGE_STATES + 1][2];
 	bool VRatioInPrefetchSupported[DC__VOLTAGE_STATES + 1][2];
 	double RequiredDISPCLK[DC__VOLTAGE_STATES + 1][2];
@@ -515,22 +516,22 @@ struct vba_vars_st {
 	unsigned int TotalNumberOfActiveDPP[DC__VOLTAGE_STATES + 1][2];
 	unsigned int TotalNumberOfDCCActiveDPP[DC__VOLTAGE_STATES + 1][2];
 	bool ModeSupport[DC__VOLTAGE_STATES + 1][2];
-	double ReturnBWPerState[DC__VOLTAGE_STATES + 1];
+	double ReturnBWPerState[DC__VOLTAGE_STATES + 1][2];
 	bool DIOSupport[DC__VOLTAGE_STATES + 1];
 	bool NotEnoughDSCUnits[DC__VOLTAGE_STATES + 1];
 	bool DSCCLKRequiredMoreThanSupported[DC__VOLTAGE_STATES + 1];
 	bool DTBCLKRequiredMoreThanSupported[DC__VOLTAGE_STATES + 1];
 	double UrgentRoundTripAndOutOfOrderLatencyPerState[DC__VOLTAGE_STATES + 1];
-	bool ROBSupport[DC__VOLTAGE_STATES + 1];
+	bool ROBSupport[DC__VOLTAGE_STATES + 1][2];
 	bool PTEBufferSizeNotExceeded[DC__VOLTAGE_STATES + 1][2];
-	bool TotalVerticalActiveBandwidthSupport[DC__VOLTAGE_STATES + 1];
-	double MaxTotalVerticalActiveAvailableBandwidth[DC__VOLTAGE_STATES + 1];
+	bool TotalVerticalActiveBandwidthSupport[DC__VOLTAGE_STATES + 1][2];
+	double MaxTotalVerticalActiveAvailableBandwidth[DC__VOLTAGE_STATES + 1][2];
 	double PrefetchBW[DC__NUM_DPP__MAX];
-	double PDEAndMetaPTEBytesPerFrame[DC__NUM_DPP__MAX];
-	double MetaRowBytes[DC__NUM_DPP__MAX];
-	double DPTEBytesPerRow[DC__NUM_DPP__MAX];
-	double PrefetchLinesY[DC__NUM_DPP__MAX];
-	double PrefetchLinesC[DC__NUM_DPP__MAX];
+	double PDEAndMetaPTEBytesPerFrame[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
+	double MetaRowBytes[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
+	double DPTEBytesPerRow[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
+	double PrefetchLinesY[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
+	double PrefetchLinesC[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
 	unsigned int MaxNumSwY[DC__NUM_DPP__MAX];
 	unsigned int MaxNumSwC[DC__NUM_DPP__MAX];
 	double PrefillY[DC__NUM_DPP__MAX];
@@ -539,7 +540,7 @@ struct vba_vars_st {
 	double LinesForMetaPTE[DC__NUM_DPP__MAX];
 	double LinesForMetaAndDPTERow[DC__NUM_DPP__MAX];
 	double MinDPPCLKUsingSingleDPP[DC__NUM_DPP__MAX];
-	unsigned int SwathWidthYSingleDPP[DC__NUM_DPP__MAX];
+	double SwathWidthYSingleDPP[DC__NUM_DPP__MAX];
 	double BytePerPixelInDETY[DC__NUM_DPP__MAX];
 	double BytePerPixelInDETC[DC__NUM_DPP__MAX];
 	bool RequiresDSC[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX];
@@ -547,7 +548,7 @@ struct vba_vars_st {
 	double RequiresFEC[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX];
 	double OutputBppPerState[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX];
 	double DSCDelayPerState[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX];
-	bool ViewportSizeSupport[DC__VOLTAGE_STATES + 1];
+	bool ViewportSizeSupport[DC__VOLTAGE_STATES + 1][2];
 	unsigned int Read256BlockHeightY[DC__NUM_DPP__MAX];
 	unsigned int Read256BlockWidthY[DC__NUM_DPP__MAX];
 	unsigned int Read256BlockHeightC[DC__NUM_DPP__MAX];
@@ -562,7 +563,7 @@ struct vba_vars_st {
 	double WriteBandwidth[DC__NUM_DPP__MAX];
 	double PSCL_FACTOR[DC__NUM_DPP__MAX];
 	double PSCL_FACTOR_CHROMA[DC__NUM_DPP__MAX];
-	double MaximumVStartup[DC__NUM_DPP__MAX];
+	double MaximumVStartup[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
 	unsigned int MacroTileWidthY[DC__NUM_DPP__MAX];
 	unsigned int MacroTileWidthC[DC__NUM_DPP__MAX];
 	double AlignedDCCMetaPitch[DC__NUM_DPP__MAX];
@@ -579,7 +580,7 @@ struct vba_vars_st {
 	bool ImmediateFlipSupportedForState[DC__VOLTAGE_STATES + 1][2];
 	double WritebackDelay[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX];
 	unsigned int vm_group_bytes[DC__NUM_DPP__MAX];
-	long dpte_group_bytes[DC__NUM_DPP__MAX];
+	unsigned int dpte_group_bytes[DC__NUM_DPP__MAX];
 	unsigned int dpte_row_height[DC__NUM_DPP__MAX];
 	unsigned int meta_req_height[DC__NUM_DPP__MAX];
 	unsigned int meta_req_width[DC__NUM_DPP__MAX];
@@ -605,14 +606,14 @@ struct vba_vars_st {
 	double UrgentBurstFactorChroma[DC__NUM_DPP__MAX];
 	double UrgentBurstFactorChromaPre[DC__NUM_DPP__MAX];
 
+
 	bool           MPCCombine[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
 	double         SwathWidthCSingleDPP[DC__NUM_DPP__MAX];
 	double         MaximumSwathWidthInLineBufferLuma;
 	double         MaximumSwathWidthInLineBufferChroma;
 	double         MaximumSwathWidthLuma[DC__NUM_DPP__MAX];
 	double         MaximumSwathWidthChroma[DC__NUM_DPP__MAX];
-	bool odm_combine_dummy[DC__NUM_DPP__MAX];
-	enum odm_combine_mode odm_combine_mode_dummy[DC__NUM_DPP__MAX];
+	enum odm_combine_mode odm_combine_dummy[DC__NUM_DPP__MAX];
 	double         dummy1[DC__NUM_DPP__MAX];
 	double         dummy2[DC__NUM_DPP__MAX];
 	double         dummy3[DC__NUM_DPP__MAX];
@@ -622,9 +623,9 @@ struct vba_vars_st {
 	double         dummy7[DC__NUM_DPP__MAX];
 	double         dummy8[DC__NUM_DPP__MAX];
 	unsigned int        dummyinteger1ms[DC__NUM_DPP__MAX];
-	unsigned int        dummyinteger2ms[DC__NUM_DPP__MAX];
+	double        dummyinteger2ms[DC__NUM_DPP__MAX];
 	unsigned int        dummyinteger3[DC__NUM_DPP__MAX];
-	unsigned int        dummyinteger4;
+	unsigned int        dummyinteger4[DC__NUM_DPP__MAX];
 	unsigned int        dummyinteger5;
 	unsigned int        dummyinteger6;
 	unsigned int        dummyinteger7;
@@ -637,7 +638,6 @@ struct vba_vars_st {
 	unsigned int        dummyintegerarr2[DC__NUM_DPP__MAX];
 	unsigned int        dummyintegerarr3[DC__NUM_DPP__MAX];
 	unsigned int        dummyintegerarr4[DC__NUM_DPP__MAX];
-	long                dummylongarr1[DC__NUM_DPP__MAX];
 	bool           dummysinglestring;
 	bool           SingleDPPViewportSizeSupportPerPlane[DC__NUM_DPP__MAX];
 	double         PlaneRequiredDISPCLKWithODMCombine2To1;
@@ -645,20 +645,19 @@ struct vba_vars_st {
 	unsigned int   TotalNumberOfSingleDPPPlanes[DC__VOLTAGE_STATES + 1][2];
 	bool           LinkDSCEnable;
 	bool           ODMCombine4To1SupportCheckOK[DC__VOLTAGE_STATES + 1];
-	bool ODMCombineEnableThisState[DC__NUM_DPP__MAX];
-	enum odm_combine_mode ODMCombineEnableTypeThisState[DC__NUM_DPP__MAX];
-	unsigned int   SwathWidthCThisState[DC__NUM_DPP__MAX];
+	enum odm_combine_mode ODMCombineEnableThisState[DC__NUM_DPP__MAX];
+	double   SwathWidthCThisState[DC__NUM_DPP__MAX];
 	bool           ViewportSizeSupportPerPlane[DC__NUM_DPP__MAX];
 	double         AlignedDCCMetaPitchY[DC__NUM_DPP__MAX];
 	double         AlignedDCCMetaPitchC[DC__NUM_DPP__MAX];
 
 	unsigned int NotEnoughUrgentLatencyHiding;
 	unsigned int NotEnoughUrgentLatencyHidingPre;
-	long PTEBufferSizeInRequestsForLuma;
-	long PTEBufferSizeInRequestsForChroma;
+	int PTEBufferSizeInRequestsForLuma;
+	int PTEBufferSizeInRequestsForChroma;
 
 	// Missing from VBA
-	long dpte_group_bytes_chroma;
+	int dpte_group_bytes_chroma;
 	unsigned int vm_group_bytes_chroma;
 	double dst_x_after_scaler;
 	double dst_y_after_scaler;
@@ -683,8 +682,8 @@ struct vba_vars_st {
 	double MinTTUVBlank[DC__NUM_DPP__MAX];
 	double BytePerPixelDETY[DC__NUM_DPP__MAX];
 	double BytePerPixelDETC[DC__NUM_DPP__MAX];
-	unsigned int SwathWidthY[DC__NUM_DPP__MAX];
-	unsigned int SwathWidthSingleDPPY[DC__NUM_DPP__MAX];
+	double SwathWidthY[DC__NUM_DPP__MAX];
+	double SwathWidthSingleDPPY[DC__NUM_DPP__MAX];
 	double CursorRequestDeliveryTime[DC__NUM_DPP__MAX];
 	double CursorRequestDeliveryTimePrefetch[DC__NUM_DPP__MAX];
 	double ReadBandwidthPlaneLuma[DC__NUM_DPP__MAX];
@@ -760,8 +759,8 @@ struct vba_vars_st {
 	double LinesInDETY[DC__NUM_DPP__MAX];
 	double LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
 
-	unsigned int SwathWidthSingleDPPC[DC__NUM_DPP__MAX];
-	unsigned int SwathWidthC[DC__NUM_DPP__MAX];
+	double SwathWidthSingleDPPC[DC__NUM_DPP__MAX];
+	double SwathWidthC[DC__NUM_DPP__MAX];
 	unsigned int BytePerPixelY[DC__NUM_DPP__MAX];
 	unsigned int BytePerPixelC[DC__NUM_DPP__MAX];
 	long dummyinteger1;
@@ -779,6 +778,7 @@ struct vba_vars_st {
 	unsigned int DCCCMaxCompressedBlock[DC__NUM_DPP__MAX];
 	unsigned int DCCCIndependent64ByteBlock[DC__NUM_DPP__MAX];
 	double VStartupMargin;
+	bool NotEnoughTimeForDynamicMetadata;
 
 	/* Missing from VBA */
 	unsigned int MaximumMaxVStartupLines;
@@ -814,7 +814,7 @@ struct vba_vars_st {
 	unsigned int ViewportHeightChroma[DC__NUM_DPP__MAX];
 	double HRatioChroma[DC__NUM_DPP__MAX];
 	double VRatioChroma[DC__NUM_DPP__MAX];
-	long WritebackSourceWidth[DC__NUM_DPP__MAX];
+	int WritebackSourceWidth[DC__NUM_DPP__MAX];
 
 	bool ModeIsSupported;
 	bool ODMCombine4To1Supported;
@@ -850,6 +850,58 @@ struct vba_vars_st {
 	unsigned int MaxNumHDMIFRLOutputs;
 	int    AudioSampleRate[DC__NUM_DPP__MAX];
 	int    AudioSampleLayout[DC__NUM_DPP__MAX];
+
+	int PercentMarginOverMinimumRequiredDCFCLK;
+	bool DynamicMetadataSupported[DC__VOLTAGE_STATES + 1][2];
+	enum immediate_flip_requirement ImmediateFlipRequirement;
+	double DETBufferSizeYThisState[DC__NUM_DPP__MAX];
+	double DETBufferSizeCThisState[DC__NUM_DPP__MAX];
+	bool NoUrgentLatencyHiding[DC__NUM_DPP__MAX];
+	bool NoUrgentLatencyHidingPre[DC__NUM_DPP__MAX];
+	int swath_width_luma_ub_this_state[DC__NUM_DPP__MAX];
+	int swath_width_chroma_ub_this_state[DC__NUM_DPP__MAX];
+	double UrgLatency[DC__VOLTAGE_STATES + 1];
+	double VActiveCursorBandwidth[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
+	double VActivePixelBandwidth[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
+	bool NoTimeForPrefetch[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
+	bool NoTimeForDynamicMetadata[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
+	double dpte_row_bandwidth[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
+	double meta_row_bandwidth[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
+	double DETBufferSizeYAllStates[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
+	double DETBufferSizeCAllStates[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
+	int swath_width_luma_ub_all_states[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
+	int swath_width_chroma_ub_all_states[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
+	bool NotUrgentLatencyHiding[DC__VOLTAGE_STATES + 1][2];
+	unsigned int SwathHeightYAllStates[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
+	unsigned int SwathHeightCAllStates[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
+	unsigned int SwathWidthYAllStates[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
+	unsigned int SwathWidthCAllStates[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX];
+	double TotalDPTERowBandwidth[DC__VOLTAGE_STATES + 1][2];
+	double TotalMetaRowBandwidth[DC__VOLTAGE_STATES + 1][2];
+	double TotalVActiveCursorBandwidth[DC__VOLTAGE_STATES + 1][2];
+	double TotalVActivePixelBandwidth[DC__VOLTAGE_STATES + 1][2];
+	bool UseMinimumRequiredDCFCLK;
+	double WritebackDelayTime[DC__NUM_DPP__MAX];
+	unsigned int DCCYIndependentBlock[DC__NUM_DPP__MAX];
+	unsigned int DCCCIndependentBlock[DC__NUM_DPP__MAX];
+	unsigned int dummyinteger15;
+	unsigned int dummyinteger16;
+	unsigned int dummyinteger17;
+	unsigned int dummyinteger18;
+	unsigned int dummyinteger19;
+	unsigned int dummyinteger20;
+	unsigned int dummyinteger21;
+	unsigned int dummyinteger22;
+	unsigned int dummyinteger23;
+	unsigned int dummyinteger24;
+	unsigned int dummyinteger25;
+	unsigned int dummyinteger26;
+	unsigned int dummyinteger27;
+	unsigned int dummyinteger28;
+	unsigned int dummyinteger29;
+	bool dummystring[DC__NUM_DPP__MAX];
+	double BPP;
+	enum odm_combine_policy ODMCombinePolicy;
 };
 
 bool CalculateMinAndMaxPrefetchMode(
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_common_defs.c b/drivers/gpu/drm/amd/display/dc/dml/dml_common_defs.c
index b953b02a1512..723af0b2dda0 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dml_common_defs.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dml_common_defs.c
@@ -24,7 +24,7 @@
  */
 
 #include "dml_common_defs.h"
-#include "../calcs/dcn_calc_math.h"
+#include "dcn_calc_math.h"
 
 #include "dml_inline_defs.h"
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h
index eca140da13d8..ded71ea82413 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h
@@ -27,7 +27,7 @@
 #define __DML_INLINE_DEFS_H__
 
 #include "dml_common_defs.h"
-#include "../calcs/dcn_calc_math.h"
+#include "dcn_calc_math.h"
 #include "dml_logger.h"
 
 static inline double dml_min(double a, double b)
diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
index 16f6ef22367b..f285b76888fb 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
@@ -212,6 +212,7 @@ struct resource_pool {
 
 	struct abm *abm;
 	struct dmcu *dmcu;
+	struct dmub_psr *psr;
 
 	const struct resource_funcs *funcs;
 	const struct resource_caps *res_cap;
diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h
index 6198bccd6199..8b1f0ce6c2a7 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h
@@ -76,6 +76,8 @@ void dp_enable_mst_on_sink(struct dc_link *link, bool enable);
 enum dp_panel_mode dp_get_panel_mode(struct dc_link *link);
 void dp_set_panel_mode(struct dc_link *link, enum dp_panel_mode panel_mode);
 
+bool dp_overwrite_extended_receiver_cap(struct dc_link *link);
+
 void dp_set_fec_ready(struct dc_link *link, bool ready);
 void dp_set_fec_enable(struct dc_link *link, bool enable);
 bool dp_set_dsc_enable(struct pipe_ctx *pipe_ctx, bool enable);
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_math.h b/drivers/gpu/drm/amd/display/dc/inc/dcn_calc_math.h
index 45a07eeffbb6..45a07eeffbb6 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_math.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/dcn_calc_math.h
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h
index 125e42dbd3c5..45ef390ae052 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h
@@ -47,6 +47,26 @@ struct dpp_input_csc_matrix {
 	uint16_t regval[12];
 };
 
+static const struct dpp_input_csc_matrix dpp_input_csc_matrix[] = {
+	{COLOR_SPACE_SRGB,
+		{0x2000, 0, 0, 0, 0, 0x2000, 0, 0, 0, 0, 0x2000, 0} },
+	{COLOR_SPACE_SRGB_LIMITED,
+		{0x2000, 0, 0, 0, 0, 0x2000, 0, 0, 0, 0, 0x2000, 0} },
+	{COLOR_SPACE_YCBCR601,
+		{0x2cdd, 0x2000, 0, 0xe991, 0xe926, 0x2000, 0xf4fd, 0x10ef,
+						0, 0x2000, 0x38b4, 0xe3a6} },
+	{COLOR_SPACE_YCBCR601_LIMITED,
+		{0x3353, 0x2568, 0, 0xe400, 0xe5dc, 0x2568, 0xf367, 0x1108,
+						0, 0x2568, 0x40de, 0xdd3a} },
+	{COLOR_SPACE_YCBCR709,
+		{0x3265, 0x2000, 0, 0xe6ce, 0xf105, 0x2000, 0xfa01, 0xa7d, 0,
+						0x2000, 0x3b61, 0xe24f} },
+
+	{COLOR_SPACE_YCBCR709_LIMITED,
+		{0x39a6, 0x2568, 0, 0xe0d6, 0xeedd, 0x2568, 0xf925, 0x9a8, 0,
+						0x2568, 0x43ee, 0xdbb2} }
+};
+
 struct dpp_grph_csc_adjustment {
 	struct fixed31_32 temperature_matrix[CSC_TEMPERATURE_MATRIX_SIZE];
 	enum graphics_gamut_adjust_type gamut_adjust_type;
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
index 686145933335..2cb8466e657b 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
@@ -63,6 +63,26 @@ struct hubp {
 	bool power_gated;
 };
 
+struct surface_flip_registers {
+	uint32_t DCSURF_SURFACE_CONTROL;
+	uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH;
+	uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS;
+	uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH;
+	uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS;
+	uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C;
+	uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_C;
+	uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C;
+	uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_C;
+	uint32_t DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH;
+	uint32_t DCSURF_SECONDARY_META_SURFACE_ADDRESS;
+	uint32_t DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH;
+	uint32_t DCSURF_SECONDARY_SURFACE_ADDRESS;
+	bool tmz_surface;
+	bool immediate;
+	uint8_t vmid;
+	bool grph_stereo;
+};
+
 struct hubp_funcs {
 	void (*hubp_setup)(
 			struct hubp *hubp,
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
index 2d3efd71fa51..e5e7d94026fc 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
@@ -208,7 +208,8 @@ struct timing_generator_funcs {
 					bool enable, const struct dc_crtc_timing *timing);
 	void (*set_drr)(struct timing_generator *tg, const struct drr_params *params);
 	void (*set_static_screen_control)(struct timing_generator *tg,
-							uint32_t value);
+						uint32_t event_triggers,
+						uint32_t num_frames);
 	void (*set_test_pattern)(
 		struct timing_generator *tg,
 		enum controller_dp_test_pattern test_pattern,
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
index df3204645c6b..209118f9f193 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
@@ -42,7 +42,7 @@ struct dc_state;
 struct dc_stream_status;
 struct dc_writeback_info;
 struct dchub_init_data;
-struct dc_static_screen_events;
+struct dc_static_screen_params;
 struct resource_pool;
 struct dc_phy_addr_space_config;
 struct dc_virtual_addr_space_config;
@@ -102,7 +102,7 @@ struct hw_sequencer_funcs {
 			unsigned int vmid, unsigned int vmid_frame_number);
 	void (*set_static_screen_control)(struct pipe_ctx **pipe_ctx,
 			int num_pipes,
-			const struct dc_static_screen_events *events);
+			const struct dc_static_screen_params *events);
 
 	/* Stream Related */
 	void (*enable_stream)(struct pipe_ctx *pipe_ctx);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h
index 8ba06f015975..ecf566378ccd 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h
@@ -51,7 +51,7 @@ struct dc_state;
 struct dc_stream_status;
 struct dc_writeback_info;
 struct dchub_init_data;
-struct dc_static_screen_events;
+struct dc_static_screen_params;
 struct resource_pool;
 struct resource_context;
 struct stream_resource;
diff --git a/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h b/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h
index 47e307388581..2470405e996b 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h
@@ -458,7 +458,14 @@ uint32_t generic_reg_get8(const struct dc_context *ctx, uint32_t addr,
 #define IX_REG_READ(index_reg_name, data_reg_name, index) \
 		generic_read_indirect_reg(CTX, REG(index_reg_name), REG(data_reg_name), IND_REG(index))
 
+#define IX_REG_GET_N(index_reg_name, data_reg_name, index, n, ...) \
+		generic_indirect_reg_get(CTX, REG(index_reg_name), REG(data_reg_name), \
+				IND_REG(index), \
+				n, __VA_ARGS__)
 
+#define IX_REG_GET(index_reg_name, data_reg_name, index, field, val) \
+		IX_REG_GET_N(index_reg_name, data_reg_name, index, 1, \
+				FN(data_reg_name, field), val)
 
 #define IX_REG_UPDATE_N(index_reg_name, data_reg_name, index, n, ...)	\
 		generic_indirect_reg_update_ex(CTX, \
@@ -479,6 +486,12 @@ uint32_t generic_read_indirect_reg(const struct dc_context *ctx,
 		uint32_t addr_index, uint32_t addr_data,
 		uint32_t index);
 
+uint32_t generic_indirect_reg_get(const struct dc_context *ctx,
+		uint32_t addr_index, uint32_t addr_data,
+		uint32_t index, int n,
+		uint8_t shift1, uint32_t mask1, uint32_t *field_value1,
+		...);
+
 uint32_t generic_indirect_reg_update_ex(const struct dc_context *ctx,
 		uint32_t addr_index, uint32_t addr_data,
 		uint32_t index, uint32_t reg_val, int n,
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
index b10728f33f62..cd9532b4f14d 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@@ -36,6 +36,7 @@
 #define DMUB_RB_SIZE (DMUB_RB_CMD_SIZE * DMUB_RB_MAX_ENTRY)
 #define REG_SET_MASK 0xFFFF
 
+
 /*
  * Command IDs should be treated as stable ABI.
  * Do not reuse or modify IDs.
@@ -47,6 +48,7 @@ enum dmub_cmd_type {
 	DMUB_CMD__REG_SEQ_FIELD_UPDATE_SEQ = 2,
 	DMUB_CMD__REG_SEQ_BURST_WRITE = 3,
 	DMUB_CMD__REG_REG_WAIT = 4,
+	DMUB_CMD__PLAT_54186_WA = 5,
 	DMUB_CMD__PSR = 64,
 	DMUB_CMD__VBIOS = 128,
 };
@@ -145,6 +147,32 @@ struct dmub_rb_cmd_reg_wait {
 	struct dmub_cmd_reg_wait_data reg_wait;
 };
 
+#ifndef PHYSICAL_ADDRESS_LOC
+#define PHYSICAL_ADDRESS_LOC union large_integer
+#endif
+
+struct dmub_cmd_PLAT_54186_wa {
+	uint32_t DCSURF_SURFACE_CONTROL;
+	uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH;
+	uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS;
+	uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C;
+	uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_C;
+	struct {
+		uint8_t hubp_inst : 4;
+		uint8_t tmz_surface : 1;
+		uint8_t immediate :1;
+		uint8_t vmid : 4;
+		uint8_t grph_stereo : 1;
+		uint32_t reserved : 21;
+	} flip_params;
+	uint32_t reserved[9];
+};
+
+struct dmub_rb_cmd_PLAT_54186_wa {
+	struct dmub_cmd_header header;
+	struct dmub_cmd_PLAT_54186_wa flip;
+};
+
 struct dmub_cmd_digx_encoder_control_data {
 	union dig_encoder_control_parameters_v1_5 dig;
 };
@@ -187,9 +215,28 @@ struct dmub_rb_cmd_dpphy_init {
 };
 
 struct dmub_cmd_psr_copy_settings_data {
-	uint32_t reg1;
-	uint32_t reg2;
-	uint32_t reg3;
+	uint16_t psr_level;
+	uint8_t hubp_inst;
+	uint8_t dpp_inst;
+	uint8_t mpcc_inst;
+	uint8_t opp_inst;
+	uint8_t otg_inst;
+	uint8_t digfe_inst;
+	uint8_t digbe_inst;
+	uint8_t dpphy_inst;
+	uint8_t aux_inst;
+	uint8_t hyst_frames;
+	uint8_t hyst_lines;
+	uint8_t phy_num;
+	uint8_t phy_type;
+	uint8_t aux_repeat;
+	uint8_t smu_optimizations_en;
+	uint8_t skip_wait_for_pll_lock;
+	uint8_t frame_delay;
+	uint8_t smu_phy_id;
+	uint8_t num_of_controllers;
+	uint8_t link_rate;
+	uint8_t frame_cap_ind;
 };
 
 struct dmub_rb_cmd_psr_copy_settings {
@@ -206,31 +253,17 @@ struct dmub_rb_cmd_psr_set_level {
 	struct dmub_cmd_psr_set_level_data psr_set_level_data;
 };
 
-struct dmub_rb_cmd_psr_disable {
-	struct dmub_cmd_header header;
-};
-
 struct dmub_rb_cmd_psr_enable {
 	struct dmub_cmd_header header;
 };
 
-struct dmub_cmd_psr_notify_vblank_data {
-	uint32_t vblank_int; // Which vblank interrupt was triggered
-};
-
-struct dmub_rb_cmd_notify_vblank {
-	struct dmub_cmd_header header;
-	struct dmub_cmd_psr_notify_vblank_data psr_notify_vblank_data;
-};
-
-struct dmub_cmd_psr_notify_static_state_data {
-	uint32_t ss_int;   // Which static screen interrupt was triggered
-	uint32_t ss_enter; // Enter (1) or exit (0) static screen
+struct dmub_cmd_psr_setup_data {
+	enum psr_version version; // PSR version 1 or 2
 };
 
-struct dmub_rb_cmd_psr_notify_static_state {
+struct dmub_rb_cmd_psr_setup {
 	struct dmub_cmd_header header;
-	struct dmub_cmd_psr_notify_static_state_data psr_notify_static_state_data;
+	struct dmub_cmd_psr_setup_data psr_setup_data;
 };
 
 union dmub_rb_cmd {
@@ -245,9 +278,10 @@ union dmub_rb_cmd {
 	struct dmub_rb_cmd_dpphy_init dpphy_init;
 	struct dmub_rb_cmd_dig1_transmitter_control dig1_transmitter_control;
 	struct dmub_rb_cmd_psr_enable psr_enable;
-	struct dmub_rb_cmd_psr_disable psr_disable;
 	struct dmub_rb_cmd_psr_copy_settings psr_copy_settings;
 	struct dmub_rb_cmd_psr_set_level psr_set_level;
+	struct dmub_rb_cmd_PLAT_54186_wa PLAT_54186_wa;
+	struct dmub_rb_cmd_psr_setup psr_setup;
 };
 
 #pragma pack(pop)
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd_dal.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd_dal.h
index 14f13e8a6f3b..7b69eb37f762 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd_dal.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd_dal.h
@@ -32,10 +32,17 @@
  */
 
 enum dmub_cmd_psr_type {
-	DMUB_CMD__PSR_ENABLE = 0,
-	DMUB_CMD__PSR_DISABLE = 1,
-	DMUB_CMD__PSR_COPY_SETTINGS = 2,
-	DMUB_CMD__PSR_SET_LEVEL = 3,
+	DMUB_CMD__PSR_SETUP		= 0,
+	DMUB_CMD__PSR_COPY_SETTINGS	= 1,
+	DMUB_CMD__PSR_ENABLE		= 2,
+	DMUB_CMD__PSR_DISABLE		= 3,
+	DMUB_CMD__PSR_SET_LEVEL		= 4,
+};
+
+enum psr_version {
+	PSR_VERSION_1			= 0x10, // PSR Version 1
+	PSR_VERSION_2			= 0x20, // PSR Version 2, includes selective update
+	PSR_VERSION_2_Y_COORD		= 0x21, // PSR Version 2, includes Y-coordinate support for SU
 };
 
 #endif /* _DMUB_CMD_DAL_H_ */
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_rb.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_rb.h
index ac22744eaa94..df875fdd2ab0 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_rb.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_rb.h
@@ -73,12 +73,17 @@ static inline bool dmub_rb_full(struct dmub_rb *rb)
 static inline bool dmub_rb_push_front(struct dmub_rb *rb,
 				      const struct dmub_cmd_header *cmd)
 {
-	uint8_t *wt_ptr = (uint8_t *)(rb->base_address) + rb->wrpt;
+	uint64_t volatile *dst = (uint64_t volatile *)(rb->base_address) + rb->wrpt / sizeof(uint64_t);
+	const uint64_t *src = (const uint64_t *)cmd;
+	int i;
 
 	if (dmub_rb_full(rb))
 		return false;
 
-	dmub_memcpy(wt_ptr, cmd, DMUB_RB_CMD_SIZE);
+	// copying data
+	for (i = 0; i < DMUB_RB_CMD_SIZE / sizeof(uint64_t); i++)
+		*dst++ = *src++;
+
 	rb->wrpt += DMUB_RB_CMD_SIZE;
 
 	if (rb->wrpt >= rb->capacity)
@@ -113,6 +118,26 @@ static inline bool dmub_rb_pop_front(struct dmub_rb *rb)
 	return true;
 }
 
+static inline void dmub_rb_flush_pending(const struct dmub_rb *rb)
+{
+	uint32_t rptr = rb->rptr;
+	uint32_t wptr = rb->wrpt;
+
+	while (rptr != wptr) {
+		uint64_t volatile *data = (uint64_t volatile *)rb->base_address + rptr / sizeof(uint64_t);
+		//uint64_t volatile *p = (uint64_t volatile *)data;
+		uint64_t temp;
+		int i;
+
+		for (i = 0; i < DMUB_RB_CMD_SIZE / sizeof(uint64_t); i++)
+			temp = *data++;
+
+		rptr += DMUB_RB_CMD_SIZE;
+		if (rptr >= rb->capacity)
+			rptr %= rb->capacity;
+	}
+}
+
 static inline void dmub_rb_init(struct dmub_rb *rb,
 				struct dmub_rb_init_params *init_params)
 {
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_srv.h
index f34a50dd36ea..8e23a7017588 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_srv.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_srv.h
@@ -445,25 +445,6 @@ enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub,
 enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub);
 
 /**
- * dmub_srv_cmd_submit() - submits a command to the DMUB immediately
- * @dmub: the dmub service
- * @cmd: the command to submit
- * @timeout_us: the maximum number of microseconds to wait
- *
- * Submits a command to the DMUB with an optional timeout.
- * If timeout_us is given then the service will attempt to
- * resubmit for the given number of microseconds.
- *
- * Return:
- *   DMUB_STATUS_OK - success
- *   DMUB_STATUS_TIMEOUT - wait for submit timed out
- *   DMUB_STATUS_INVALID - unspecified error
- */
-enum dmub_status dmub_srv_cmd_submit(struct dmub_srv *dmub,
-				     const struct dmub_cmd_header *cmd,
-				     uint32_t timeout_us);
-
-/**
  * dmub_srv_wait_for_auto_load() - Waits for firmware auto load to complete
  * @dmub: the dmub service
  * @timeout_us: the maximum number of microseconds to wait
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c
index f45e14ada685..cd51c6138894 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c
@@ -66,10 +66,12 @@ void dmub_dcn20_reset(struct dmub_srv *dmub)
 {
 	REG_UPDATE(DMCUB_CNTL, DMCUB_SOFT_RESET, 1);
 	REG_UPDATE(DMCUB_CNTL, DMCUB_ENABLE, 0);
+	REG_UPDATE(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET, 1);
 }
 
 void dmub_dcn20_reset_release(struct dmub_srv *dmub)
 {
+	REG_UPDATE(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET, 0);
 	REG_WRITE(DMCUB_SCRATCH15, dmub->psp_version & 0x001100FF);
 	REG_UPDATE_2(DMCUB_CNTL, DMCUB_ENABLE, 1, DMCUB_TRACEPORT_EN, 1);
 	REG_UPDATE(DMCUB_CNTL, DMCUB_SOFT_RESET, 0);
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h
index 68af9b190288..53bfd4da69ad 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h
@@ -91,7 +91,8 @@ struct dmub_srv;
 	DMUB_SR(DMCUB_SCRATCH13) \
 	DMUB_SR(DMCUB_SCRATCH14) \
 	DMUB_SR(DMCUB_SCRATCH15) \
-	DMUB_SR(CC_DC_PIPE_DIS)
+	DMUB_SR(CC_DC_PIPE_DIS) \
+	DMUB_SR(MMHUBBUB_SOFT_RESET)
 
 #define DMUB_COMMON_FIELDS() \
 	DMUB_SF(DMCUB_CNTL, DMCUB_ENABLE) \
@@ -119,7 +120,8 @@ struct dmub_srv;
 	DMUB_SF(DMCUB_REGION3_CW7_TOP_ADDRESS, DMCUB_REGION3_CW7_ENABLE) \
 	DMUB_SF(DMCUB_REGION4_TOP_ADDRESS, DMCUB_REGION4_TOP_ADDRESS) \
 	DMUB_SF(DMCUB_REGION4_TOP_ADDRESS, DMCUB_REGION4_ENABLE) \
-	DMUB_SF(CC_DC_PIPE_DIS, DC_DMCUB_ENABLE)
+	DMUB_SF(CC_DC_PIPE_DIS, DC_DMCUB_ENABLE) \
+	DMUB_SF(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET)
 
 struct dmub_srv_common_reg_offset {
 #define DMUB_SR(reg) uint32_t reg;
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
index 9a959f871f11..dee676335d73 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
@@ -67,6 +67,26 @@ static inline uint32_t dmub_align(uint32_t val, uint32_t factor)
 	return (val + factor - 1) / factor * factor;
 }
 
+static void dmub_flush_buffer_mem(const struct dmub_fb *fb)
+{
+	const uint8_t *base = (const uint8_t *)fb->cpu_addr;
+	uint8_t buf[64];
+	uint32_t pos, end;
+
+	/**
+	 * Read 64-byte chunks since we don't want to store a
+	 * large temporary buffer for this purpose.
+	 */
+	end = fb->size / sizeof(buf) * sizeof(buf);
+
+	for (pos = 0; pos < end; pos += sizeof(buf))
+		dmub_memcpy(buf, base + pos, sizeof(buf));
+
+	/* Read anything leftover into the buffer. */
+	if (end < fb->size)
+		dmub_memcpy(buf, base + pos, fb->size - end);
+}
+
 static const struct dmub_fw_meta_info *
 dmub_get_fw_meta_info(const uint8_t *fw_bss_data, uint32_t fw_bss_data_size)
 {
@@ -329,6 +349,13 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub,
 		cw1.region.base = DMUB_CW1_BASE;
 		cw1.region.top = cw1.region.base + stack_fb->size - 1;
 
+		/**
+		 * Read back all the instruction memory so we don't hang the
+		 * DMCUB when backdoor loading if the write from x86 hasn't been
+		 * flushed yet. This only occurs in backdoor loading.
+		 */
+		dmub_flush_buffer_mem(inst_fb);
+
 		if (params->load_inst_const && dmub->hw_funcs.backdoor_load)
 			dmub->hw_funcs.backdoor_load(dmub, &cw0, &cw1);
 	}
@@ -405,33 +432,17 @@ enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub)
 	if (!dmub->hw_init)
 		return DMUB_STATUS_INVALID;
 
+	/**
+	 * Read back all the queued commands to ensure that they've
+	 * been flushed to framebuffer memory. Otherwise DMCUB might
+	 * read back stale, fully invalid or partially invalid data.
+	 */
+	dmub_rb_flush_pending(&dmub->inbox1_rb);
+
 	dmub->hw_funcs.set_inbox1_wptr(dmub, dmub->inbox1_rb.wrpt);
 	return DMUB_STATUS_OK;
 }
 
-enum dmub_status dmub_srv_cmd_submit(struct dmub_srv *dmub,
-				     const struct dmub_cmd_header *cmd,
-				     uint32_t timeout_us)
-{
-	uint32_t i = 0;
-
-	if (!dmub->hw_init)
-		return DMUB_STATUS_INVALID;
-
-	for (i = 0; i <= timeout_us; ++i) {
-		dmub->inbox1_rb.rptr = dmub->hw_funcs.get_inbox1_rptr(dmub);
-		if (dmub_rb_push_front(&dmub->inbox1_rb, cmd)) {
-			dmub->hw_funcs.set_inbox1_wptr(dmub,
-						       dmub->inbox1_rb.wrpt);
-			return DMUB_STATUS_OK;
-		}
-
-		udelay(1);
-	}
-
-	return DMUB_STATUS_TIMEOUT;
-}
-
 enum dmub_status dmub_srv_wait_for_auto_load(struct dmub_srv *dmub,
 					     uint32_t timeout_us)
 {
diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h
index 11d7daf6f076..a2903985b9e8 100644
--- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h
+++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h
@@ -134,8 +134,13 @@
 #define PICASSO_A0 0x41
 /* DCN1_01 */
 #define RAVEN2_A0 0x81
+#define RAVEN2_15D8_REV_94 0x94
+#define RAVEN2_15D8_REV_95 0x95
 #define RAVEN2_15D8_REV_E3 0xE3
 #define RAVEN2_15D8_REV_E4 0xE4
+#define RAVEN2_15D8_REV_E9 0xE9
+#define RAVEN2_15D8_REV_EA 0xEA
+#define RAVEN2_15D8_REV_EB 0xEB
 #define RAVEN1_F0 0xF0
 #define RAVEN_UNKNOWN 0xFF
 #ifndef ASICREV_IS_RAVEN
@@ -149,6 +154,11 @@
 #define ASICREV_IS_RV1_F0(eChipRev) ((eChipRev >= RAVEN1_F0) && (eChipRev < RAVEN_UNKNOWN))
 #define ASICREV_IS_DALI(eChipRev) ((eChipRev == RAVEN2_15D8_REV_E3) \
 		|| (eChipRev == RAVEN2_15D8_REV_E4))
+#define ASICREV_IS_POLLOCK(eChipRev) (eChipRev == RAVEN2_15D8_REV_94 \
+		|| eChipRev == RAVEN2_15D8_REV_95 \
+			|| eChipRev == RAVEN2_15D8_REV_E9 \
+				|| eChipRev == RAVEN2_15D8_REV_EA \
+					|| eChipRev == RAVEN2_15D8_REV_EB)
 
 #define FAMILY_RV 142 /* DCN 1*/
 
diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
index fa57885503d4..6e5ecefe7d9d 100644
--- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
+++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
@@ -816,6 +816,8 @@ void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync,
 	in_out_vrr->btr.inserted_duration_in_us = 0;
 	in_out_vrr->btr.frames_to_insert = 0;
 	in_out_vrr->btr.frame_counter = 0;
+	in_out_vrr->fixed.fixed_active = false;
+	in_out_vrr->fixed.target_refresh_in_uhz = 0;
 
 	in_out_vrr->btr.mid_point_in_us =
 				(in_out_vrr->min_duration_in_us +
@@ -832,6 +834,7 @@ void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync,
 		in_out_vrr->adjust.v_total_max = stream->timing.v_total;
 	} else if (in_out_vrr->state == VRR_STATE_ACTIVE_VARIABLE &&
 			refresh_range >= MIN_REFRESH_RANGE_IN_US) {
+
 		in_out_vrr->adjust.v_total_min =
 			calc_v_total_from_refresh(stream,
 				in_out_vrr->max_refresh_in_uhz);
diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
index 4e2f615c3566..e75a4bb94488 100644
--- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
+++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
@@ -662,7 +662,11 @@ bool dmcu_load_iram(struct dmcu *dmcu,
 
 	memset(&ram_table, 0, sizeof(ram_table));
 
-	if (dmcu->dmcu_version.abm_version == 0x23) {
+	if (dmcu->dmcu_version.abm_version == 0x24) {
+		fill_iram_v_2_3((struct iram_table_v_2_2 *)ram_table, params);
+		result = dmcu->funcs->load_iram(
+				dmcu, 0, (char *)(&ram_table), IRAM_RESERVE_AREA_START_V2_2);
+	} else if (dmcu->dmcu_version.abm_version == 0x23) {
 		fill_iram_v_2_3((struct iram_table_v_2_2 *)ram_table, params);
 
 		result = dmcu->funcs->load_iram(
@@ -687,3 +691,4 @@ bool dmcu_load_iram(struct dmcu *dmcu,
 
 	return result;
 }
+
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_offset.h
index cff8f91555d3..e9b2bd84cfed 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_offset.h
@@ -8134,6 +8134,10 @@
 #define mmMPC_OUT5_CSC_C33_C34_B                                                                       0x1604
 #define mmMPC_OUT5_CSC_C33_C34_B_BASE_IDX                                                              2
 
+#define mmMPC_OCSC_TEST_DEBUG_INDEX                                                                    0x163b
+#define mmMPC_OCSC_TEST_DEBUG_INDEX_BASE_IDX                                                           2
+#define mmMPC_OCSC_TEST_DEBUG_DATA_BASE_IDX                                                            2
+#define mmMPC_OCSC_TEST_DEBUG_DATA                                                                     0x163c
 
 // addressBlock: dce_dc_mpc_mpc_dcperfmon_dc_perfmon_dispdec
 // base address: 0x5964
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h
index 10c83fecd147..dc8ce7aaa0cf 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h
@@ -28263,7 +28263,14 @@
 #define MPC_OUT5_CSC_C33_C34_B__MPC_OCSC_C34_B__SHIFT                                                         0x10
 #define MPC_OUT5_CSC_C33_C34_B__MPC_OCSC_C33_B_MASK                                                           0x0000FFFFL
 #define MPC_OUT5_CSC_C33_C34_B__MPC_OCSC_C34_B_MASK                                                           0xFFFF0000L
-
+//MPC_OCSC_TEST_DEBUG_INDEX
+#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_INDEX__SHIFT                                           0x0
+#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_WRITE_EN__SHIFT                                        0x8
+#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_INDEX_MASK                                             0x000000FFL
+#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_WRITE_EN_MASK                                          0x00000100L
+//MPC_OCSC_TEST_DEBUG_DATA
+#define MPC_OCSC_TEST_DEBUG_DATA__MPC_OCSC_TEST_DEBUG_DATA__SHIFT                                             0x0
+#define MPC_OCSC_TEST_DEBUG_DATA__MPC_OCSC_TEST_DEBUG_DATA_MASK                                               0xFFFFFFFFL
 
 // addressBlock: dce_dc_mpc_mpc_dcperfmon_dc_perfmon_dispdec
 //DC_PERFMON17_PERFCOUNTER_CNTL
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_offset.h
index eddf83ec1c39..7cd0ee61c030 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_offset.h
@@ -7103,7 +7103,10 @@
 #define mmMPC_OUT3_CSC_C31_C32_B_BASE_IDX                                                              2
 #define mmMPC_OUT3_CSC_C33_C34_B                                                                       0x15ea
 #define mmMPC_OUT3_CSC_C33_C34_B_BASE_IDX                                                              2
-
+#define mmMPC_OCSC_TEST_DEBUG_INDEX                                                                    0x163b
+#define mmMPC_OCSC_TEST_DEBUG_INDEX_BASE_IDX                                                           2
+#define mmMPC_OCSC_TEST_DEBUG_DATA_BASE_IDX                                                            2
+#define mmMPC_OCSC_TEST_DEBUG_DATA                                                                     0x163c
 
 // addressBlock: dce_dc_mpc_mpc_dcperfmon_dc_perfmon_dispdec
 // base address: 0x5964
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h
index faa0e76e32b4..2f780aefc722 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h
@@ -56634,5 +56634,13 @@
 #define AZF0INPUTENDPOINT7_AZALIA_F0_CODEC_INPUT_PIN_CONTROL_INFOFRAME__INFOFRAME_BYTE_5_MASK                 0x00FF0000L
 #define AZF0INPUTENDPOINT7_AZALIA_F0_CODEC_INPUT_PIN_CONTROL_INFOFRAME__INFOFRAME_VALID_MASK                  0x80000000L
 
+//MPC_OCSC_TEST_DEBUG_INDEX
+#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_INDEX__SHIFT                                           0x0
+#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_WRITE_EN__SHIFT                                        0x8
+#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_INDEX_MASK                                             0x000000FFL
+#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_WRITE_EN_MASK                                          0x00000100L
+//MPC_OCSC_TEST_DEBUG_DATA
+#define MPC_OCSC_TEST_DEBUG_DATA__MPC_OCSC_TEST_DEBUG_DATA__SHIFT                                             0x0
+#define MPC_OCSC_TEST_DEBUG_DATA__MPC_OCSC_TEST_DEBUG_DATA_MASK                                               0xFFFFFFFFL
 
 #endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h
index f301e5fe2109..87c84691b5be 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h
@@ -27,6 +27,9 @@
 #define mmDF_PIE_AON0_DfGlobalClkGater									0x00fc
 #define mmDF_PIE_AON0_DfGlobalClkGater_BASE_IDX								0
 
+#define mmDF_CS_UMC_AON0_DfGlobalCtrl									0x00fe
+#define mmDF_CS_UMC_AON0_DfGlobalCtrl_BASE_IDX								0
+
 #define mmDF_CS_UMC_AON0_DramBaseAddress0								0x0044
 #define mmDF_CS_UMC_AON0_DramBaseAddress0_BASE_IDX							0
 
diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h
index 06fac509e987..65e9f756e86e 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h
@@ -33,6 +33,14 @@
 #define DF_PIE_AON0_DfGlobalClkGater__MGCGMode__SHIFT							0x0
 #define DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK							0x0000000FL
 
+/* DF_CS_UMC_AON0_DfGlobalCtrl */
+#define DF_CS_UMC_AON0_DfGlobalCtrl__GlbHashIntlvCtl64K__SHIFT						0x14
+#define DF_CS_UMC_AON0_DfGlobalCtrl__GlbHashIntlvCtl2M__SHIFT						0x15
+#define DF_CS_UMC_AON0_DfGlobalCtrl__GlbHashIntlvCtl1G__SHIFT						0x16
+#define DF_CS_UMC_AON0_DfGlobalCtrl__GlbHashIntlvCtl64K_MASK						0x00100000L
+#define DF_CS_UMC_AON0_DfGlobalCtrl__GlbHashIntlvCtl2M_MASK						0x00200000L
+#define DF_CS_UMC_AON0_DfGlobalCtrl__GlbHashIntlvCtl1G_MASK						0x00400000L
+
 /* DF_CS_AON0_DramBaseAddress0 */
 #define DF_CS_UMC_AON0_DramBaseAddress0__AddrRngVal__SHIFT						0x0
 #define DF_CS_UMC_AON0_DramBaseAddress0__LgcyMmioHoleEn__SHIFT						0x1
diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h
index d4c613a85352..c9e3f6d849a8 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h
@@ -8739,10 +8739,16 @@
 #define TCP_ADDR_CONFIG__NUM_BANKS__SHIFT                                                                     0x4
 #define TCP_ADDR_CONFIG__COLHI_WIDTH__SHIFT                                                                   0x6
 #define TCP_ADDR_CONFIG__RB_SPLIT_COLHI__SHIFT                                                                0x9
+#define TCP_ADDR_CONFIG__ENABLE64KHASH__SHIFT                                                                 0xb
+#define TCP_ADDR_CONFIG__ENABLE2MHASH__SHIFT                                                                  0xc
+#define TCP_ADDR_CONFIG__ENABLE1GHASH__SHIFT                                                                  0xd
 #define TCP_ADDR_CONFIG__NUM_TCC_BANKS_MASK                                                                   0x0000000FL
 #define TCP_ADDR_CONFIG__NUM_BANKS_MASK                                                                       0x00000030L
 #define TCP_ADDR_CONFIG__COLHI_WIDTH_MASK                                                                     0x000001C0L
 #define TCP_ADDR_CONFIG__RB_SPLIT_COLHI_MASK                                                                  0x00000200L
+#define TCP_ADDR_CONFIG__ENABLE64KHASH_MASK                                                                   0x00000800L
+#define TCP_ADDR_CONFIG__ENABLE2MHASH_MASK                                                                    0x00001000L
+#define TCP_ADDR_CONFIG__ENABLE1GHASH_MASK                                                                    0x00002000L
 //TCP_CREDIT
 #define TCP_CREDIT__LFIFO_CREDIT__SHIFT                                                                       0x0
 #define TCP_CREDIT__REQ_FIFO_CREDIT__SHIFT                                                                    0x10
diff --git a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_1_offset.h b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_1_offset.h
index 043aa695d63f..0d6b594be775 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_1_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_1_offset.h
@@ -27,5 +27,7 @@
 #define mmUMCCH0_0_EccErrCnt_BASE_IDX                                                                  0
 #define mmMCA_UMC_UMC0_MCUMC_STATUST0                                                                  0x03c2
 #define mmMCA_UMC_UMC0_MCUMC_STATUST0_BASE_IDX                                                         0
+#define mmMCA_UMC_UMC0_MCUMC_ADDRT0                                                                    0x03c4
+#define mmMCA_UMC_UMC0_MCUMC_ADDRT0_BASE_IDX                                                           0
 
 #endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_2_offset.h b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_2_offset.h
index 03be415e9555..ce005c674a18 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_2_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_2_offset.h
@@ -27,5 +27,7 @@
 #define mmUMCCH0_0_EccErrCnt_ARCT_BASE_IDX                               1
 #define mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT                               0x03c2
 #define mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT_BASE_IDX                      1
+#define mmMCA_UMC_UMC0_MCUMC_ADDRT0_ARCT                                 0x03c4
+#define mmMCA_UMC_UMC0_MCUMC_ADDRT0_ARCT_BASE_IDX                        1
 
 #endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_2_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_2_sh_mask.h
new file mode 100644
index 000000000000..a5a8c993ec3a
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_2_sh_mask.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2020  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _umc_6_1_2_SH_MASK_HEADER
+#define _umc_6_1_2_SH_MASK_HEADER
+
+//UMCCH0_0_EccErrCntSel_ARCT
+#define UMCCH0_0_EccErrCntSel_ARCT__EccErrCntCsSel__SHIFT                                                          0x0
+#define UMCCH0_0_EccErrCntSel_ARCT__EccErrInt__SHIFT                                                               0xc
+#define UMCCH0_0_EccErrCntSel_ARCT__EccErrCntEn__SHIFT                                                             0xf
+#define UMCCH0_0_EccErrCntSel_ARCT__EccErrCntCsSel_MASK                                                            0x0000000FL
+#define UMCCH0_0_EccErrCntSel_ARCT__EccErrInt_MASK                                                                 0x00003000L
+#define UMCCH0_0_EccErrCntSel_ARCT__EccErrCntEn_MASK                                                               0x00008000L
+//UMCCH0_0_EccErrCnt_ARCT
+#define UMCCH0_0_EccErrCnt_ARCT__EccErrCnt__SHIFT                                                                  0x0
+#define UMCCH0_0_EccErrCnt_ARCT__EccErrCnt_MASK                                                                    0x0000FFFFL
+//MCA_UMC_UMC0_MCUMC_STATUST0_ARCT
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrorCode__SHIFT                                                         0x0
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrorCodeExt__SHIFT                                                      0x10
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV0__SHIFT                                                           0x16
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrCoreId__SHIFT                                                         0x20
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV1__SHIFT                                                           0x26
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Scrub__SHIFT                                                             0x28
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV2__SHIFT                                                           0x29
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Poison__SHIFT                                                            0x2b
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Deferred__SHIFT                                                          0x2c
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__UECC__SHIFT                                                              0x2d
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__CECC__SHIFT                                                              0x2e
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV3__SHIFT                                                           0x2f
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Transparent__SHIFT                                                       0x34
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__SyndV__SHIFT                                                             0x35
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV4__SHIFT                                                           0x36
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__TCC__SHIFT                                                               0x37
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrCoreIdVal__SHIFT                                                      0x38
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__PCC__SHIFT                                                               0x39
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__AddrV__SHIFT                                                             0x3a
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__MiscV__SHIFT                                                             0x3b
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__En__SHIFT                                                                0x3c
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__UC__SHIFT                                                                0x3d
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Overflow__SHIFT                                                          0x3e
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Val__SHIFT                                                               0x3f
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrorCode_MASK                                                           0x000000000000FFFFL
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrorCodeExt_MASK                                                        0x00000000003F0000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV0_MASK                                                             0x00000000FFC00000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrCoreId_MASK                                                           0x0000003F00000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV1_MASK                                                             0x000000C000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Scrub_MASK                                                               0x0000010000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV2_MASK                                                             0x0000060000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Poison_MASK                                                              0x0000080000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Deferred_MASK                                                            0x0000100000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__UECC_MASK                                                                0x0000200000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__CECC_MASK                                                                0x0000400000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV3_MASK                                                             0x000F800000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Transparent_MASK                                                         0x0010000000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__SyndV_MASK                                                               0x0020000000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV4_MASK                                                             0x0040000000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__TCC_MASK                                                                 0x0080000000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrCoreIdVal_MASK                                                        0x0100000000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__PCC_MASK                                                                 0x0200000000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__AddrV_MASK                                                               0x0400000000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__MiscV_MASK                                                               0x0800000000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__En_MASK                                                                  0x1000000000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__UC_MASK                                                                  0x2000000000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Overflow_MASK                                                            0x4000000000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Val_MASK                                                                 0x8000000000000000L
+//MCA_UMC_UMC0_MCUMC_ADDRT0_ARCT
+#define MCA_UMC_UMC0_MCUMC_ADDRT0_ARCT__ErrorAddr__SHIFT                                                           0x0
+#define MCA_UMC_UMC0_MCUMC_ADDRT0_ARCT__LSB__SHIFT                                                                 0x38
+#define MCA_UMC_UMC0_MCUMC_ADDRT0_ARCT__Reserved__SHIFT                                                            0x3e
+#define MCA_UMC_UMC0_MCUMC_ADDRT0_ARCT__ErrorAddr_MASK                                                             0x00FFFFFFFFFFFFFFL
+#define MCA_UMC_UMC0_MCUMC_ADDRT0_ARCT__LSB_MASK                                                                   0x3F00000000000000L
+#define MCA_UMC_UMC0_MCUMC_ADDRT0_ARCT__Reserved_MASK                                                              0xC000000000000000L
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 2cd217e60125..a607b1034962 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -256,6 +256,10 @@ struct kfd2kgd_calls {
 			uint32_t wptr_shift, uint32_t wptr_mask,
 			struct mm_struct *mm);
 
+	int (*hiq_mqd_load)(struct kgd_dev *kgd, void *mqd,
+			    uint32_t pipe_id, uint32_t queue_id,
+			    uint32_t doorbell_off);
+
 	int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd,
 			     uint32_t __user *wptr, struct mm_struct *mm);
 
@@ -307,8 +311,6 @@ struct kfd2kgd_calls {
 
 	void (*set_vm_context_page_table_base)(struct kgd_dev *kgd,
 			uint32_t vmid, uint64_t page_table_base);
-	int (*invalidate_tlbs)(struct kgd_dev *kgd, uint16_t pasid);
-	int (*invalidate_tlbs_vmid)(struct kgd_dev *kgd, uint16_t vmid);
 	uint32_t (*read_vmid_from_vmfault_reg)(struct kgd_dev *kgd);
 	uint64_t (*get_hive_id)(struct kgd_dev *kgd);
 
diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
index 1c15c6fbe3b9..14ba6aa876e2 100644
--- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
@@ -1318,6 +1318,7 @@ static int arcturus_get_power_limit(struct smu_context *smu,
 static int arcturus_get_power_profile_mode(struct smu_context *smu,
 					   char *buf)
 {
+	struct amdgpu_device *adev = smu->adev;
 	DpmActivityMonitorCoeffInt_t activity_monitor;
 	static const char *profile_name[] = {
 					"BOOTUP_DEFAULT",
@@ -1351,7 +1352,7 @@ static int arcturus_get_power_profile_mode(struct smu_context *smu,
 	if (result)
 		return result;
 
-	if (smu_version >= 0x360d00)
+	if (smu_version >= 0x360d00 && !amdgpu_sriov_vf(adev))
 		size += sprintf(buf + size, "%16s %s %s %s %s %s %s %s %s %s %s\n",
 			title[0], title[1], title[2], title[3], title[4], title[5],
 			title[6], title[7], title[8], title[9], title[10]);
@@ -1368,7 +1369,7 @@ static int arcturus_get_power_profile_mode(struct smu_context *smu,
 		if (workload_type < 0)
 			continue;
 
-		if (smu_version >= 0x360d00) {
+		if (smu_version >= 0x360d00 && !amdgpu_sriov_vf(adev)) {
 			result = smu_update_table(smu,
 						  SMU_TABLE_ACTIVITY_MONITOR_COEFF,
 						  workload_type,
@@ -1383,7 +1384,7 @@ static int arcturus_get_power_profile_mode(struct smu_context *smu,
 		size += sprintf(buf + size, "%2d %14s%s\n",
 			i, profile_name[i], (i == smu->power_profile_mode) ? "*" : " ");
 
-		if (smu_version >= 0x360d00) {
+		if (smu_version >= 0x360d00 && !amdgpu_sriov_vf(adev)) {
 			size += sprintf(buf + size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n",
 				" ",
 				0,
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu12_driver_if.h b/drivers/gpu/drm/amd/powerplay/inc/smu12_driver_if.h
index c27c82851468..2f85a34c0591 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu12_driver_if.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu12_driver_if.h
@@ -27,7 +27,7 @@
 // *** IMPORTANT ***
 // SMU TEAM: Always increment the interface version if 
 // any structure is changed in this file
-#define SMU12_DRIVER_IF_VERSION 10
+#define SMU12_DRIVER_IF_VERSION 11
 
 typedef struct {
   int32_t value;
@@ -192,6 +192,11 @@ typedef struct {
   uint16_t SocTemperature;              //[centi-Celsius]
   uint16_t ThrottlerStatus;
   uint16_t spare;
+
+  uint16_t StapmOriginalLimit;          //[mW]
+  uint16_t StapmCurrentLimit;           //[mW]
+  uint16_t ApuPower;              //[mW]
+  uint16_t dGpuPower;               //[mW]
 } SmuMetrics_t;
 
 
diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
index e804f9854027..02f8c9cb89d9 100644
--- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
@@ -1154,11 +1154,12 @@ static int smu_v11_0_set_thermal_range(struct smu_context *smu,
 	int low = SMU_THERMAL_MINIMUM_ALERT_TEMP;
 	int high = SMU_THERMAL_MAXIMUM_ALERT_TEMP;
 	uint32_t val;
+	struct smu_table_context *table_context = &smu->smu_table;
+	struct smu_11_0_powerplay_table *powerplay_table = table_context->power_play_table;
 
 	low = max(SMU_THERMAL_MINIMUM_ALERT_TEMP,
 			range.min / SMU_TEMPERATURE_UNITS_PER_CENTIGRADES);
-	high = min(SMU_THERMAL_MAXIMUM_ALERT_TEMP,
-			range.max / SMU_TEMPERATURE_UNITS_PER_CENTIGRADES);
+	high = min((uint16_t)SMU_THERMAL_MAXIMUM_ALERT_TEMP, powerplay_table->software_shutdown_temp);
 
 	if (low > high)
 		return -EINVAL;
diff --git a/drivers/gpu/drm/arm/malidp_mw.c b/drivers/gpu/drm/arm/malidp_mw.c
index 875a3a9eabfa..7d0e7b031e44 100644
--- a/drivers/gpu/drm/arm/malidp_mw.c
+++ b/drivers/gpu/drm/arm/malidp_mw.c
@@ -56,7 +56,7 @@ malidp_mw_connector_mode_valid(struct drm_connector *connector,
 	return MODE_OK;
 }
 
-const struct drm_connector_helper_funcs malidp_mw_connector_helper_funcs = {
+static const struct drm_connector_helper_funcs malidp_mw_connector_helper_funcs = {
 	.get_modes = malidp_mw_connector_get_modes,
 	.mode_valid = malidp_mw_connector_mode_valid,
 };
diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c
index 895b73f23079..6d4a29e99ae2 100644
--- a/drivers/gpu/drm/drm_client_modeset.c
+++ b/drivers/gpu/drm/drm_client_modeset.c
@@ -115,6 +115,33 @@ drm_client_find_modeset(struct drm_client_dev *client, struct drm_crtc *crtc)
 }
 
 static struct drm_display_mode *
+drm_connector_get_tiled_mode(struct drm_connector *connector)
+{
+	struct drm_display_mode *mode;
+
+	list_for_each_entry(mode, &connector->modes, head) {
+		if (mode->hdisplay == connector->tile_h_size &&
+		    mode->vdisplay == connector->tile_v_size)
+			return mode;
+	}
+	return NULL;
+}
+
+static struct drm_display_mode *
+drm_connector_fallback_non_tiled_mode(struct drm_connector *connector)
+{
+	struct drm_display_mode *mode;
+
+	list_for_each_entry(mode, &connector->modes, head) {
+		if (mode->hdisplay == connector->tile_h_size &&
+		    mode->vdisplay == connector->tile_v_size)
+			continue;
+		return mode;
+	}
+	return NULL;
+}
+
+static struct drm_display_mode *
 drm_connector_has_preferred_mode(struct drm_connector *connector, int width, int height)
 {
 	struct drm_display_mode *mode;
@@ -348,8 +375,15 @@ static bool drm_client_target_preferred(struct drm_connector **connectors,
 	struct drm_connector *connector;
 	u64 conn_configured = 0;
 	int tile_pass = 0;
+	int num_tiled_conns = 0;
 	int i;
 
+	for (i = 0; i < connector_count; i++) {
+		if (connectors[i]->has_tile &&
+		    connectors[i]->status == connector_status_connected)
+			num_tiled_conns++;
+	}
+
 retry:
 	for (i = 0; i < connector_count; i++) {
 		connector = connectors[i];
@@ -399,6 +433,28 @@ retry:
 			list_for_each_entry(modes[i], &connector->modes, head)
 				break;
 		}
+		/*
+		 * In case of tiled mode if all tiles not present fallback to
+		 * first available non tiled mode.
+		 * After all tiles are present, try to find the tiled mode
+		 * for all and if tiled mode not present due to fbcon size
+		 * limitations, use first non tiled mode only for
+		 * tile 0,0 and set to no mode for all other tiles.
+		 */
+		if (connector->has_tile) {
+			if (num_tiled_conns <
+			    connector->num_h_tile * connector->num_v_tile ||
+			    (connector->tile_h_loc == 0 &&
+			     connector->tile_v_loc == 0 &&
+			     !drm_connector_get_tiled_mode(connector))) {
+				DRM_DEBUG_KMS("Falling back to non tiled mode on Connector %d\n",
+					      connector->base.id);
+				modes[i] = drm_connector_fallback_non_tiled_mode(connector);
+			} else {
+				modes[i] = drm_connector_get_tiled_mode(connector);
+			}
+		}
+
 		DRM_DEBUG_KMS("found mode %s\n", modes[i] ? modes[i]->name :
 			  "none");
 		conn_configured |= BIT_ULL(i);
@@ -515,6 +571,7 @@ static bool drm_client_firmware_config(struct drm_client_dev *client,
 	bool fallback = true, ret = true;
 	int num_connectors_enabled = 0;
 	int num_connectors_detected = 0;
+	int num_tiled_conns = 0;
 	struct drm_modeset_acquire_ctx ctx;
 
 	if (!drm_drv_uses_atomic_modeset(dev))
@@ -532,6 +589,11 @@ static bool drm_client_firmware_config(struct drm_client_dev *client,
 	memcpy(save_enabled, enabled, count);
 	mask = GENMASK(count - 1, 0);
 	conn_configured = 0;
+	for (i = 0; i < count; i++) {
+		if (connectors[i]->has_tile &&
+		    connectors[i]->status == connector_status_connected)
+			num_tiled_conns++;
+	}
 retry:
 	conn_seq = conn_configured;
 	for (i = 0; i < count; i++) {
@@ -631,6 +693,16 @@ retry:
 				      connector->name);
 			modes[i] = &connector->state->crtc->mode;
 		}
+		/*
+		 * In case of tiled modes, if all tiles are not present
+		 * then fallback to a non tiled mode.
+		 */
+		if (connector->has_tile &&
+		    num_tiled_conns < connector->num_h_tile * connector->num_v_tile) {
+			DRM_DEBUG_KMS("Falling back to non tiled mode on Connector %d\n",
+				      connector->base.id);
+			modes[i] = drm_connector_fallback_non_tiled_mode(connector);
+		}
 		crtcs[i] = new_crtc;
 
 		DRM_DEBUG_KMS("connector %s on [CRTC:%d:%s]: %dx%d%s\n",
diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index 5d3c1d379277..38bf111e5f9b 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -398,7 +398,7 @@ drm_dp_encode_sideband_req(const struct drm_dp_sideband_msg_req_body *req,
 			memcpy(&buf[idx], req->u.i2c_read.transactions[i].bytes, req->u.i2c_read.transactions[i].num_bytes);
 			idx += req->u.i2c_read.transactions[i].num_bytes;
 
-			buf[idx] = (req->u.i2c_read.transactions[i].no_stop_bit & 0x1) << 5;
+			buf[idx] = (req->u.i2c_read.transactions[i].no_stop_bit & 0x1) << 4;
 			buf[idx] |= (req->u.i2c_read.transactions[i].i2c_transaction_delay & 0xf);
 			idx++;
 		}
@@ -1209,6 +1209,8 @@ static int drm_dp_mst_wait_tx_reply(struct drm_dp_mst_branch *mstb,
 		    txmsg->state == DRM_DP_SIDEBAND_TX_SENT) {
 			mstb->tx_slots[txmsg->seqno] = NULL;
 		}
+		mgr->is_waiting_for_dwn_reply = false;
+
 	}
 out:
 	if (unlikely(ret == -EIO) && drm_debug_enabled(DRM_UT_DP)) {
@@ -1218,6 +1220,7 @@ out:
 	}
 	mutex_unlock(&mgr->qlock);
 
+	drm_dp_mst_kick_tx(mgr);
 	return ret;
 }
 
@@ -2342,7 +2345,7 @@ drm_dp_mst_handle_conn_stat(struct drm_dp_mst_branch *mstb,
 {
 	struct drm_dp_mst_topology_mgr *mgr = mstb->mgr;
 	struct drm_dp_mst_port *port;
-	int old_ddps, ret;
+	int old_ddps, old_input, ret, i;
 	u8 new_pdt;
 	bool dowork = false, create_connector = false;
 
@@ -2373,6 +2376,7 @@ drm_dp_mst_handle_conn_stat(struct drm_dp_mst_branch *mstb,
 	}
 
 	old_ddps = port->ddps;
+	old_input = port->input;
 	port->input = conn_stat->input_port;
 	port->mcs = conn_stat->message_capability_status;
 	port->ldps = conn_stat->legacy_device_plug_status;
@@ -2397,6 +2401,28 @@ drm_dp_mst_handle_conn_stat(struct drm_dp_mst_branch *mstb,
 		dowork = false;
 	}
 
+	if (!old_input && old_ddps != port->ddps && !port->ddps) {
+		for (i = 0; i < mgr->max_payloads; i++) {
+			struct drm_dp_vcpi *vcpi = mgr->proposed_vcpis[i];
+			struct drm_dp_mst_port *port_validated;
+
+			if (!vcpi)
+				continue;
+
+			port_validated =
+				container_of(vcpi, struct drm_dp_mst_port, vcpi);
+			port_validated =
+				drm_dp_mst_topology_get_port_validated(mgr, port_validated);
+			if (!port_validated) {
+				mutex_lock(&mgr->payload_lock);
+				vcpi->num_slots = 0;
+				mutex_unlock(&mgr->payload_lock);
+			} else {
+				drm_dp_mst_topology_put_port(port_validated);
+			}
+		}
+	}
+
 	if (port->connector)
 		drm_modeset_unlock(&mgr->base.lock);
 	else if (create_connector)
@@ -2759,9 +2785,11 @@ static void process_single_down_tx_qlock(struct drm_dp_mst_topology_mgr *mgr)
 	ret = process_single_tx_qlock(mgr, txmsg, false);
 	if (ret == 1) {
 		/* txmsg is sent it should be in the slots now */
+		mgr->is_waiting_for_dwn_reply = true;
 		list_del(&txmsg->next);
 	} else if (ret) {
 		DRM_DEBUG_KMS("failed to send msg in q %d\n", ret);
+		mgr->is_waiting_for_dwn_reply = false;
 		list_del(&txmsg->next);
 		if (txmsg->seqno != -1)
 			txmsg->dst->tx_slots[txmsg->seqno] = NULL;
@@ -2801,7 +2829,8 @@ static void drm_dp_queue_down_tx(struct drm_dp_mst_topology_mgr *mgr,
 		drm_dp_mst_dump_sideband_msg_tx(&p, txmsg);
 	}
 
-	if (list_is_singular(&mgr->tx_msg_downq))
+	if (list_is_singular(&mgr->tx_msg_downq) &&
+	    !mgr->is_waiting_for_dwn_reply)
 		process_single_down_tx_qlock(mgr);
 	mutex_unlock(&mgr->qlock);
 }
@@ -3756,6 +3785,7 @@ static int drm_dp_mst_handle_down_rep(struct drm_dp_mst_topology_mgr *mgr)
 	mutex_lock(&mgr->qlock);
 	txmsg->state = DRM_DP_SIDEBAND_TX_RX;
 	mstb->tx_slots[slot] = NULL;
+	mgr->is_waiting_for_dwn_reply = false;
 	mutex_unlock(&mgr->qlock);
 
 	wake_up_all(&mgr->tx_waitq);
@@ -3765,6 +3795,9 @@ static int drm_dp_mst_handle_down_rep(struct drm_dp_mst_topology_mgr *mgr)
 no_msg:
 	drm_dp_mst_topology_put_mstb(mstb);
 clear_down_rep_recv:
+	mutex_lock(&mgr->qlock);
+	mgr->is_waiting_for_dwn_reply = false;
+	mutex_unlock(&mgr->qlock);
 	memset(&mgr->down_rep_recv, 0, sizeof(struct drm_dp_sideband_msg_rx));
 
 	return 0;
@@ -4597,7 +4630,7 @@ static void drm_dp_tx_work(struct work_struct *work)
 	struct drm_dp_mst_topology_mgr *mgr = container_of(work, struct drm_dp_mst_topology_mgr, tx_work);
 
 	mutex_lock(&mgr->qlock);
-	if (!list_empty(&mgr->tx_msg_downq))
+	if (!list_empty(&mgr->tx_msg_downq) && !mgr->is_waiting_for_dwn_reply)
 		process_single_down_tx_qlock(mgr);
 	mutex_unlock(&mgr->qlock);
 }
@@ -5001,6 +5034,9 @@ int drm_dp_mst_atomic_check(struct drm_atomic_state *state)
 	int i, ret = 0;
 
 	for_each_new_mst_mgr_in_state(state, mgr, mst_state, i) {
+		if (!mgr->mst_state)
+			continue;
+
 		ret = drm_dp_mst_atomic_check_vcpi_alloc_limit(mgr, mst_state);
 		if (ret)
 			break;
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index f8e905192608..4c7cbce7bae7 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -1267,7 +1267,7 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var,
 	 * Changes struct fb_var_screeninfo are currently not pushed back
 	 * to KMS, hence fail if different settings are requested.
 	 */
-	if (var->bits_per_pixel != fb->format->cpp[0] * 8 ||
+	if (var->bits_per_pixel > fb->format->cpp[0] * 8 ||
 	    var->xres > fb->width || var->yres > fb->height ||
 	    var->xres_virtual > fb->width || var->yres_virtual > fb->height) {
 		drm_dbg_kms(dev, "fb requested width/height/bpp can't fit in current fb "
@@ -1293,6 +1293,11 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var,
 	}
 
 	/*
+	 * Likewise, bits_per_pixel should be rounded up to a supported value.
+	 */
+	var->bits_per_pixel = fb->format->cpp[0] * 8;
+
+	/*
 	 * drm fbdev emulation doesn't support changing the pixel format at all,
 	 * so reject all pixel format changing requests.
 	 */
@@ -1561,7 +1566,9 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper,
 		for (j = 0; j < mode_set->num_connectors; j++) {
 			struct drm_connector *connector = mode_set->connectors[j];
 
-			if (connector->has_tile) {
+			if (connector->has_tile &&
+			    desired_mode->hdisplay == connector->tile_h_size &&
+			    desired_mode->vdisplay == connector->tile_v_size) {
 				lasth = (connector->tile_h_loc == (connector->num_h_tile - 1));
 				lastv = (connector->tile_v_loc == (connector->num_v_tile - 1));
 				/* cloning to multiple tiles is just crazy-talk, so: */
diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig
index 6f7d3b3b3628..6417f374b923 100644
--- a/drivers/gpu/drm/exynos/Kconfig
+++ b/drivers/gpu/drm/exynos/Kconfig
@@ -1,13 +1,13 @@
 # SPDX-License-Identifier: GPL-2.0-only
 config DRM_EXYNOS
-	tristate "DRM Support for Samsung SoC EXYNOS Series"
+	tristate "DRM Support for Samsung SoC Exynos Series"
 	depends on OF && DRM && (ARCH_S3C64XX || ARCH_S5PV210 || ARCH_EXYNOS || ARCH_MULTIPLATFORM || COMPILE_TEST)
 	depends on MMU
 	select DRM_KMS_HELPER
 	select VIDEOMODE_HELPERS
 	select SND_SOC_HDMI_CODEC if SND_SOC
 	help
-	  Choose this option if you have a Samsung SoC EXYNOS chipset.
+	  Choose this option if you have a Samsung SoC Exynos chipset.
 	  If M is selected the module will be called exynosdrm.
 
 if DRM_EXYNOS
@@ -62,7 +62,7 @@ config DRM_EXYNOS_DSI
 	  This enables support for Exynos MIPI-DSI device.
 
 config DRM_EXYNOS_DP
-	bool "EXYNOS specific extensions for Analogix DP driver"
+	bool "Exynos specific extensions for Analogix DP driver"
 	depends on DRM_EXYNOS_FIMD || DRM_EXYNOS7_DECON
 	select DRM_ANALOGIX_DP
 	default DRM_EXYNOS
diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
index 2d5cbfda3ca7..8428ae12dfa5 100644
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -510,7 +510,7 @@ static void decon_swreset(struct decon_context *ctx)
 	       ctx->addr + DECON_CRCCTRL);
 }
 
-static void decon_enable(struct exynos_drm_crtc *crtc)
+static void decon_atomic_enable(struct exynos_drm_crtc *crtc)
 {
 	struct decon_context *ctx = crtc->ctx;
 
@@ -523,7 +523,7 @@ static void decon_enable(struct exynos_drm_crtc *crtc)
 	decon_commit(ctx->crtc);
 }
 
-static void decon_disable(struct exynos_drm_crtc *crtc)
+static void decon_atomic_disable(struct exynos_drm_crtc *crtc)
 {
 	struct decon_context *ctx = crtc->ctx;
 	int i;
@@ -599,8 +599,8 @@ static enum drm_mode_status decon_mode_valid(struct exynos_drm_crtc *crtc,
 }
 
 static const struct exynos_drm_crtc_ops decon_crtc_ops = {
-	.enable			= decon_enable,
-	.disable		= decon_disable,
+	.atomic_enable		= decon_atomic_enable,
+	.atomic_disable		= decon_atomic_disable,
 	.enable_vblank		= decon_enable_vblank,
 	.disable_vblank		= decon_disable_vblank,
 	.atomic_begin		= decon_atomic_begin,
@@ -651,7 +651,7 @@ static void decon_unbind(struct device *dev, struct device *master, void *data)
 {
 	struct decon_context *ctx = dev_get_drvdata(dev);
 
-	decon_disable(ctx->crtc);
+	decon_atomic_disable(ctx->crtc);
 
 	/* detach this sub driver from iommu mapping if supported. */
 	exynos_drm_unregister_dma(ctx->drm_dev, ctx->dev);
diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
index f0640950bd46..ff59c641fa80 100644
--- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
@@ -526,7 +526,7 @@ static void decon_init(struct decon_context *ctx)
 		writel(VIDCON1_VCLK_HOLD, ctx->regs + VIDCON1(0));
 }
 
-static void decon_enable(struct exynos_drm_crtc *crtc)
+static void decon_atomic_enable(struct exynos_drm_crtc *crtc)
 {
 	struct decon_context *ctx = crtc->ctx;
 
@@ -546,7 +546,7 @@ static void decon_enable(struct exynos_drm_crtc *crtc)
 	ctx->suspended = false;
 }
 
-static void decon_disable(struct exynos_drm_crtc *crtc)
+static void decon_atomic_disable(struct exynos_drm_crtc *crtc)
 {
 	struct decon_context *ctx = crtc->ctx;
 	int i;
@@ -568,8 +568,8 @@ static void decon_disable(struct exynos_drm_crtc *crtc)
 }
 
 static const struct exynos_drm_crtc_ops decon_crtc_ops = {
-	.enable = decon_enable,
-	.disable = decon_disable,
+	.atomic_enable = decon_atomic_enable,
+	.atomic_disable = decon_atomic_disable,
 	.enable_vblank = decon_enable_vblank,
 	.disable_vblank = decon_disable_vblank,
 	.atomic_begin = decon_atomic_begin,
@@ -653,7 +653,7 @@ static void decon_unbind(struct device *dev, struct device *master,
 {
 	struct decon_context *ctx = dev_get_drvdata(dev);
 
-	decon_disable(ctx->crtc);
+	decon_atomic_disable(ctx->crtc);
 
 	if (ctx->encoder)
 		exynos_dpi_remove(ctx->encoder);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.c b/drivers/gpu/drm/exynos/exynos_drm_crtc.c
index 77ce78986408..1c03485676ef 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_crtc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.c
@@ -23,8 +23,8 @@ static void exynos_drm_crtc_atomic_enable(struct drm_crtc *crtc,
 {
 	struct exynos_drm_crtc *exynos_crtc = to_exynos_crtc(crtc);
 
-	if (exynos_crtc->ops->enable)
-		exynos_crtc->ops->enable(exynos_crtc);
+	if (exynos_crtc->ops->atomic_enable)
+		exynos_crtc->ops->atomic_enable(exynos_crtc);
 
 	drm_crtc_vblank_on(crtc);
 }
@@ -36,8 +36,8 @@ static void exynos_drm_crtc_atomic_disable(struct drm_crtc *crtc,
 
 	drm_crtc_vblank_off(crtc);
 
-	if (exynos_crtc->ops->disable)
-		exynos_crtc->ops->disable(exynos_crtc);
+	if (exynos_crtc->ops->atomic_disable)
+		exynos_crtc->ops->atomic_disable(exynos_crtc);
 
 	if (crtc->state->event && !crtc->state->active) {
 		spin_lock_irq(&crtc->dev->event_lock);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h
index d4014ba592fd..d4d21d8cfb90 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h
@@ -118,8 +118,8 @@ struct exynos_drm_plane_config {
 /*
  * Exynos drm crtc ops
  *
- * @enable: enable the device
- * @disable: disable the device
+ * @atomic_enable: enable the device
+ * @atomic_disable: disable the device
  * @enable_vblank: specific driver callback for enabling vblank interrupt.
  * @disable_vblank: specific driver callback for disabling vblank interrupt.
  * @mode_valid: specific driver callback for mode validation
@@ -133,8 +133,8 @@ struct exynos_drm_plane_config {
  */
 struct exynos_drm_crtc;
 struct exynos_drm_crtc_ops {
-	void (*enable)(struct exynos_drm_crtc *crtc);
-	void (*disable)(struct exynos_drm_crtc *crtc);
+	void (*atomic_enable)(struct exynos_drm_crtc *crtc);
+	void (*atomic_disable)(struct exynos_drm_crtc *crtc);
 	int (*enable_vblank)(struct exynos_drm_crtc *crtc);
 	void (*disable_vblank)(struct exynos_drm_crtc *crtc);
 	enum drm_mode_status (*mode_valid)(struct exynos_drm_crtc *crtc,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index 8d0a929104e5..21aec38702fc 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -894,7 +894,7 @@ static void fimd_disable_plane(struct exynos_drm_crtc *crtc,
 		fimd_enable_shadow_channel_path(ctx, win, false);
 }
 
-static void fimd_enable(struct exynos_drm_crtc *crtc)
+static void fimd_atomic_enable(struct exynos_drm_crtc *crtc)
 {
 	struct fimd_context *ctx = crtc->ctx;
 
@@ -912,7 +912,7 @@ static void fimd_enable(struct exynos_drm_crtc *crtc)
 	fimd_commit(ctx->crtc);
 }
 
-static void fimd_disable(struct exynos_drm_crtc *crtc)
+static void fimd_atomic_disable(struct exynos_drm_crtc *crtc)
 {
 	struct fimd_context *ctx = crtc->ctx;
 	int i;
@@ -1006,8 +1006,8 @@ static void fimd_dp_clock_enable(struct exynos_drm_clk *clk, bool enable)
 }
 
 static const struct exynos_drm_crtc_ops fimd_crtc_ops = {
-	.enable = fimd_enable,
-	.disable = fimd_disable,
+	.atomic_enable = fimd_atomic_enable,
+	.atomic_disable = fimd_atomic_disable,
 	.enable_vblank = fimd_enable_vblank,
 	.disable_vblank = fimd_disable_vblank,
 	.atomic_begin = fimd_atomic_begin,
@@ -1098,7 +1098,7 @@ static void fimd_unbind(struct device *dev, struct device *master,
 {
 	struct fimd_context *ctx = dev_get_drvdata(dev);
 
-	fimd_disable(ctx->crtc);
+	fimd_atomic_disable(ctx->crtc);
 
 	exynos_drm_unregister_dma(ctx->drm_dev, ctx->dev);
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
index 7ae087b0504d..88b6fcaa20be 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
@@ -1313,6 +1313,7 @@ static int gsc_remove(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 
+	component_del(dev, &gsc_component_ops);
 	pm_runtime_dont_use_autosuspend(dev);
 	pm_runtime_disable(dev);
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
index 65b891cb9c50..b320b3a21ad4 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
@@ -120,7 +120,7 @@ static void vidi_update_plane(struct exynos_drm_crtc *crtc,
 	DRM_DEV_DEBUG_KMS(ctx->dev, "dma_addr = %pad\n", &addr);
 }
 
-static void vidi_enable(struct exynos_drm_crtc *crtc)
+static void vidi_atomic_enable(struct exynos_drm_crtc *crtc)
 {
 	struct vidi_context *ctx = crtc->ctx;
 
@@ -133,7 +133,7 @@ static void vidi_enable(struct exynos_drm_crtc *crtc)
 	drm_crtc_vblank_on(&crtc->base);
 }
 
-static void vidi_disable(struct exynos_drm_crtc *crtc)
+static void vidi_atomic_disable(struct exynos_drm_crtc *crtc)
 {
 	struct vidi_context *ctx = crtc->ctx;
 
@@ -147,8 +147,8 @@ static void vidi_disable(struct exynos_drm_crtc *crtc)
 }
 
 static const struct exynos_drm_crtc_ops vidi_crtc_ops = {
-	.enable = vidi_enable,
-	.disable = vidi_disable,
+	.atomic_enable = vidi_atomic_enable,
+	.atomic_disable = vidi_atomic_disable,
 	.enable_vblank = vidi_enable_vblank,
 	.disable_vblank = vidi_disable_vblank,
 	.update_plane = vidi_update_plane,
diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c
index 6cfdb95fef2f..38ae9c32feef 100644
--- a/drivers/gpu/drm/exynos/exynos_mixer.c
+++ b/drivers/gpu/drm/exynos/exynos_mixer.c
@@ -986,7 +986,7 @@ static void mixer_atomic_flush(struct exynos_drm_crtc *crtc)
 	exynos_crtc_handle_event(crtc);
 }
 
-static void mixer_enable(struct exynos_drm_crtc *crtc)
+static void mixer_atomic_enable(struct exynos_drm_crtc *crtc)
 {
 	struct mixer_context *ctx = crtc->ctx;
 
@@ -1015,7 +1015,7 @@ static void mixer_enable(struct exynos_drm_crtc *crtc)
 	set_bit(MXR_BIT_POWERED, &ctx->flags);
 }
 
-static void mixer_disable(struct exynos_drm_crtc *crtc)
+static void mixer_atomic_disable(struct exynos_drm_crtc *crtc)
 {
 	struct mixer_context *ctx = crtc->ctx;
 	int i;
@@ -1109,8 +1109,8 @@ static bool mixer_mode_fixup(struct exynos_drm_crtc *crtc,
 }
 
 static const struct exynos_drm_crtc_ops mixer_crtc_ops = {
-	.enable			= mixer_enable,
-	.disable		= mixer_disable,
+	.atomic_enable		= mixer_atomic_enable,
+	.atomic_disable		= mixer_atomic_disable,
 	.enable_vblank		= mixer_enable_vblank,
 	.disable_vblank		= mixer_disable_vblank,
 	.atomic_begin		= mixer_atomic_begin,
diff --git a/drivers/gpu/drm/i915/.gitignore b/drivers/gpu/drm/i915/.gitignore
new file mode 100644
index 000000000000..d9a77f3b59b2
--- /dev/null
+++ b/drivers/gpu/drm/i915/.gitignore
@@ -0,0 +1 @@
+*.hdrtest
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index b0c53661f62b..b8c5f8934dbd 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -31,9 +31,6 @@ CFLAGS_display/intel_fbdev.o = $(call cc-disable-warning, override-init)
 subdir-ccflags-y += \
 	$(call as-instr,movntdqa (%eax)$(comma)%xmm0,-DCONFIG_AS_MOVNTDQA)
 
-# Extra header tests
-header-test-pattern-$(CONFIG_DRM_I915_WERROR) := *.h
-
 subdir-ccflags-y += -I$(srctree)/$(src)
 
 # Please keep these build lists sorted!
@@ -73,11 +70,12 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o display/intel_pipe_crc.o
 i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
 
 # "Graphics Technology" (aka we talk to the gpu)
-obj-y += gt/
 gt-y += \
 	gt/debugfs_engines.o \
 	gt/debugfs_gt.o \
 	gt/debugfs_gt_pm.o \
+	gt/gen6_ppgtt.o \
+	gt/gen8_ppgtt.o \
 	gt/intel_breadcrumbs.o \
 	gt/intel_context.o \
 	gt/intel_engine_cs.o \
@@ -85,14 +83,17 @@ gt-y += \
 	gt/intel_engine_pm.o \
 	gt/intel_engine_pool.o \
 	gt/intel_engine_user.o \
+	gt/intel_ggtt.o \
 	gt/intel_gt.o \
 	gt/intel_gt_irq.o \
 	gt/intel_gt_pm.o \
 	gt/intel_gt_pm_irq.o \
 	gt/intel_gt_requests.o \
+	gt/intel_gtt.o \
 	gt/intel_llc.o \
 	gt/intel_lrc.o \
 	gt/intel_mocs.o \
+	gt/intel_ppgtt.o \
 	gt/intel_rc6.o \
 	gt/intel_renderstate.o \
 	gt/intel_reset.o \
@@ -111,7 +112,6 @@ gt-y += \
 i915-y += $(gt-y)
 
 # GEM (Graphics Execution Management) code
-obj-y += gem/
 gem-y += \
 	gem/i915_gem_busy.o \
 	gem/i915_gem_clflush.o \
@@ -157,7 +157,6 @@ i915-y += \
 	  intel_wopcm.o
 
 # general-purpose microcontroller (GuC) support
-obj-y += gt/uc/
 i915-y += gt/uc/intel_uc.o \
 	  gt/uc/intel_uc_fw.o \
 	  gt/uc/intel_guc.o \
@@ -170,7 +169,6 @@ i915-y += gt/uc/intel_uc.o \
 	  gt/uc/intel_huc_fw.o
 
 # modesetting core code
-obj-y += display/
 i915-y += \
 	display/intel_atomic.o \
 	display/intel_atomic_plane.o \
@@ -235,7 +233,6 @@ i915-y += \
 	display/vlv_dsi_pll.o
 
 # perf code
-obj-y += oa/
 i915-y += \
 	oa/i915_oa_hsw.o \
 	oa/i915_oa_bdw.o \
@@ -260,6 +257,7 @@ i915-$(CONFIG_DRM_I915_SELFTEST) += \
 	gem/selftests/igt_gem_utils.o \
 	selftests/i915_random.o \
 	selftests/i915_selftest.o \
+	selftests/igt_atomic.o \
 	selftests/igt_flush_test.o \
 	selftests/igt_live_test.o \
 	selftests/igt_mmap.o \
@@ -276,3 +274,27 @@ endif
 
 obj-$(CONFIG_DRM_I915) += i915.o
 obj-$(CONFIG_DRM_I915_GVT_KVMGT) += gvt/kvmgt.o
+
+# header test
+
+# exclude some broken headers from the test coverage
+no-header-test := \
+	display/intel_vbt_defs.h \
+	gvt/execlist.h \
+	gvt/fb_decoder.h \
+	gvt/gtt.h \
+	gvt/gvt.h \
+	gvt/interrupt.h \
+	gvt/mmio_context.h \
+	gvt/mpt.h \
+	gvt/scheduler.h
+
+extra-$(CONFIG_DRM_I915_WERROR) += \
+	$(patsubst %.h,%.hdrtest, $(filter-out $(no-header-test), \
+		$(shell cd $(srctree)/$(src) && find * -name '*.h')))
+
+quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@)
+      cmd_hdrtest = $(CC) $(c_flags) -S -o /dev/null -x c /dev/null -include $<; touch $@
+
+$(obj)/%.hdrtest: $(src)/%.h FORCE
+	$(call if_changed_dep,hdrtest)
diff --git a/drivers/gpu/drm/i915/display/Makefile b/drivers/gpu/drm/i915/display/Makefile
deleted file mode 100644
index 173c305d7866..000000000000
--- a/drivers/gpu/drm/i915/display/Makefile
+++ /dev/null
@@ -1,6 +0,0 @@
-# For building individual subdir files on the command line
-subdir-ccflags-y += -I$(srctree)/$(src)/..
-
-# Extra header tests
-header-test-pattern-$(CONFIG_DRM_I915_WERROR) := *.h
-header-test- := intel_vbt_defs.h
diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c
index 006b1a297e6f..f8e882101396 100644
--- a/drivers/gpu/drm/i915/display/icl_dsi.c
+++ b/drivers/gpu/drm/i915/display/icl_dsi.c
@@ -77,7 +77,7 @@ static enum transcoder dsi_port_to_transcoder(enum port port)
 static void wait_for_cmds_dispatched_to_panel(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	struct mipi_dsi_device *dsi;
 	enum port port;
 	enum transcoder dsi_trans;
@@ -202,7 +202,7 @@ static int dsi_send_pkt_payld(struct intel_dsi_host *host,
 static void dsi_program_swing_and_deemphasis(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum phy phy;
 	u32 tmp;
 	int lane;
@@ -267,7 +267,7 @@ static void configure_dual_link_mode(struct intel_encoder *encoder,
 				     const struct intel_crtc_state *pipe_config)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	u32 dss_ctl1;
 
 	dss_ctl1 = I915_READ(DSS_CTL1);
@@ -306,7 +306,7 @@ static void configure_dual_link_mode(struct intel_encoder *encoder,
 static int afe_clk(struct intel_encoder *encoder,
 		   const struct intel_crtc_state *crtc_state)
 {
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	int bpp;
 
 	if (crtc_state->dsc.compression_enable)
@@ -321,7 +321,7 @@ static void gen11_dsi_program_esc_clk_div(struct intel_encoder *encoder,
 					  const struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 	int afe_clk_khz;
 	u32 esc_clk_div_m;
@@ -360,7 +360,7 @@ static void get_dsi_io_power_domains(struct drm_i915_private *dev_priv,
 static void gen11_dsi_enable_io_power(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 	u32 tmp;
 
@@ -376,7 +376,7 @@ static void gen11_dsi_enable_io_power(struct intel_encoder *encoder)
 static void gen11_dsi_power_up_lanes(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum phy phy;
 
 	for_each_dsi_phy(phy, intel_dsi->phys)
@@ -387,7 +387,7 @@ static void gen11_dsi_power_up_lanes(struct intel_encoder *encoder)
 static void gen11_dsi_config_phy_lanes_sequence(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum phy phy;
 	u32 tmp;
 	int lane;
@@ -436,7 +436,7 @@ static void gen11_dsi_config_phy_lanes_sequence(struct intel_encoder *encoder)
 static void gen11_dsi_voltage_swing_program_seq(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	u32 tmp;
 	enum phy phy;
 
@@ -488,7 +488,7 @@ static void gen11_dsi_voltage_swing_program_seq(struct intel_encoder *encoder)
 static void gen11_dsi_enable_ddi_buffer(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	u32 tmp;
 	enum port port;
 
@@ -509,7 +509,7 @@ gen11_dsi_setup_dphy_timings(struct intel_encoder *encoder,
 			     const struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	u32 tmp;
 	enum port port;
 	enum phy phy;
@@ -575,7 +575,7 @@ gen11_dsi_setup_dphy_timings(struct intel_encoder *encoder,
 static void gen11_dsi_gate_clocks(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	u32 tmp;
 	enum phy phy;
 
@@ -591,7 +591,7 @@ static void gen11_dsi_gate_clocks(struct intel_encoder *encoder)
 static void gen11_dsi_ungate_clocks(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	u32 tmp;
 	enum phy phy;
 
@@ -608,7 +608,7 @@ static void gen11_dsi_map_pll(struct intel_encoder *encoder,
 			      const struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	struct intel_shared_dpll *pll = crtc_state->shared_dpll;
 	enum phy phy;
 	u32 val;
@@ -640,7 +640,7 @@ gen11_dsi_configure_transcoder(struct intel_encoder *encoder,
 			       const struct intel_crtc_state *pipe_config)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc);
 	enum pipe pipe = intel_crtc->pipe;
 	u32 tmp;
@@ -789,7 +789,7 @@ gen11_dsi_set_transcoder_timings(struct intel_encoder *encoder,
 				 const struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	const struct drm_display_mode *adjusted_mode =
 		&crtc_state->hw.adjusted_mode;
 	enum port port;
@@ -923,7 +923,7 @@ gen11_dsi_set_transcoder_timings(struct intel_encoder *encoder,
 static void gen11_dsi_enable_transcoder(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 	enum transcoder dsi_trans;
 	u32 tmp;
@@ -945,7 +945,7 @@ static void gen11_dsi_setup_timeouts(struct intel_encoder *encoder,
 				     const struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 	enum transcoder dsi_trans;
 	u32 tmp, hs_tx_timeout, lp_rx_timeout, ta_timeout, divisor, mul;
@@ -1026,7 +1026,7 @@ gen11_dsi_enable_port_and_phy(struct intel_encoder *encoder,
 static void gen11_dsi_powerup_panel(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	struct mipi_dsi_device *dsi;
 	enum port port;
 	enum transcoder dsi_trans;
@@ -1077,7 +1077,7 @@ static void gen11_dsi_pre_enable(struct intel_encoder *encoder,
 				 const struct intel_crtc_state *pipe_config,
 				 const struct drm_connector_state *conn_state)
 {
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 
 	/* step3b */
 	gen11_dsi_map_pll(encoder, pipe_config);
@@ -1104,7 +1104,7 @@ static void gen11_dsi_pre_enable(struct intel_encoder *encoder,
 static void gen11_dsi_disable_transcoder(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 	enum transcoder dsi_trans;
 	u32 tmp;
@@ -1126,7 +1126,7 @@ static void gen11_dsi_disable_transcoder(struct intel_encoder *encoder)
 
 static void gen11_dsi_powerdown_panel(struct intel_encoder *encoder)
 {
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 
 	intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_OFF);
 	intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_ASSERT_RESET);
@@ -1139,7 +1139,7 @@ static void gen11_dsi_powerdown_panel(struct intel_encoder *encoder)
 static void gen11_dsi_deconfigure_trancoder(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 	enum transcoder dsi_trans;
 	u32 tmp;
@@ -1180,7 +1180,7 @@ static void gen11_dsi_deconfigure_trancoder(struct intel_encoder *encoder)
 static void gen11_dsi_disable_port(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	u32 tmp;
 	enum port port;
 
@@ -1202,7 +1202,7 @@ static void gen11_dsi_disable_port(struct intel_encoder *encoder)
 static void gen11_dsi_disable_io_power(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 	u32 tmp;
 
@@ -1229,7 +1229,7 @@ static void gen11_dsi_disable(struct intel_encoder *encoder,
 			      const struct intel_crtc_state *old_crtc_state,
 			      const struct drm_connector_state *old_conn_state)
 {
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 
 	/* step1: turn off backlight */
 	intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_OFF);
@@ -1259,7 +1259,7 @@ static void gen11_dsi_post_disable(struct intel_encoder *encoder,
 
 	intel_dsc_disable(old_crtc_state);
 
-	skylake_scaler_disable(old_crtc_state);
+	skl_scaler_disable(old_crtc_state);
 }
 
 static enum drm_mode_status gen11_dsi_mode_valid(struct drm_connector *connector,
@@ -1272,7 +1272,7 @@ static enum drm_mode_status gen11_dsi_mode_valid(struct drm_connector *connector
 static void gen11_dsi_get_timings(struct intel_encoder *encoder,
 				  struct intel_crtc_state *pipe_config)
 {
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	struct drm_display_mode *adjusted_mode =
 					&pipe_config->hw.adjusted_mode;
 
@@ -1313,7 +1313,7 @@ static void gen11_dsi_get_config(struct intel_encoder *encoder,
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 
 	intel_dsc_get_config(encoder, pipe_config);
 
@@ -1417,7 +1417,8 @@ static void gen11_dsi_get_power_domains(struct intel_encoder *encoder,
 {
 	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
 
-	get_dsi_io_power_domains(i915, enc_to_intel_dsi(&encoder->base));
+	get_dsi_io_power_domains(i915,
+				 enc_to_intel_dsi(encoder));
 
 	if (crtc_state->dsc.compression_enable)
 		intel_display_power_get(i915,
@@ -1428,7 +1429,7 @@ static bool gen11_dsi_get_hw_state(struct intel_encoder *encoder,
 				   enum pipe *pipe)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum transcoder dsi_trans;
 	intel_wakeref_t wakeref;
 	enum port port;
diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c
index fd0026fc3618..c362eecdd414 100644
--- a/drivers/gpu/drm/i915/display/intel_atomic.c
+++ b/drivers/gpu/drm/i915/display/intel_atomic.c
@@ -37,6 +37,7 @@
 #include "intel_atomic.h"
 #include "intel_display_types.h"
 #include "intel_hdcp.h"
+#include "intel_psr.h"
 #include "intel_sprite.h"
 
 /**
@@ -129,6 +130,7 @@ int intel_digital_connector_atomic_check(struct drm_connector *conn,
 	struct drm_crtc_state *crtc_state;
 
 	intel_hdcp_atomic_check(conn, old_state, new_state);
+	intel_psr_atomic_check(conn, old_state, new_state);
 
 	if (!new_state->crtc)
 		return 0;
@@ -175,6 +177,38 @@ intel_digital_connector_duplicate_state(struct drm_connector *connector)
 }
 
 /**
+ * intel_connector_needs_modeset - check if connector needs a modeset
+ */
+bool
+intel_connector_needs_modeset(struct intel_atomic_state *state,
+			      struct drm_connector *connector)
+{
+	const struct drm_connector_state *old_conn_state, *new_conn_state;
+
+	old_conn_state = drm_atomic_get_old_connector_state(&state->base, connector);
+	new_conn_state = drm_atomic_get_new_connector_state(&state->base, connector);
+
+	return old_conn_state->crtc != new_conn_state->crtc ||
+	       (new_conn_state->crtc &&
+		drm_atomic_crtc_needs_modeset(drm_atomic_get_new_crtc_state(&state->base,
+									    new_conn_state->crtc)));
+}
+
+struct intel_digital_connector_state *
+intel_atomic_get_digital_connector_state(struct intel_atomic_state *state,
+					 struct intel_connector *connector)
+{
+	struct drm_connector_state *conn_state;
+
+	conn_state = drm_atomic_get_connector_state(&state->base,
+						    &connector->base);
+	if (IS_ERR(conn_state))
+		return ERR_CAST(conn_state);
+
+	return to_intel_digital_connector_state(conn_state);
+}
+
+/**
  * intel_crtc_duplicate_state - duplicate crtc state
  * @crtc: drm crtc
  *
diff --git a/drivers/gpu/drm/i915/display/intel_atomic.h b/drivers/gpu/drm/i915/display/intel_atomic.h
index 7b49623419ba..74c749dbfb4f 100644
--- a/drivers/gpu/drm/i915/display/intel_atomic.h
+++ b/drivers/gpu/drm/i915/display/intel_atomic.h
@@ -17,6 +17,7 @@ struct drm_device;
 struct drm_i915_private;
 struct drm_property;
 struct intel_atomic_state;
+struct intel_connector;
 struct intel_crtc;
 struct intel_crtc_state;
 
@@ -32,6 +33,11 @@ int intel_digital_connector_atomic_check(struct drm_connector *conn,
 					 struct drm_atomic_state *state);
 struct drm_connector_state *
 intel_digital_connector_duplicate_state(struct drm_connector *connector);
+bool intel_connector_needs_modeset(struct intel_atomic_state *state,
+				   struct drm_connector *connector);
+struct intel_digital_connector_state *
+intel_atomic_get_digital_connector_state(struct intel_atomic_state *state,
+					 struct intel_connector *connector);
 
 struct drm_crtc_state *intel_crtc_duplicate_state(struct drm_crtc *crtc);
 void intel_crtc_destroy_state(struct drm_crtc *crtc,
diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c
index 27710098d056..b18040793d9e 100644
--- a/drivers/gpu/drm/i915/display/intel_audio.c
+++ b/drivers/gpu/drm/i915/display/intel_audio.c
@@ -707,8 +707,8 @@ void intel_audio_codec_enable(struct intel_encoder *encoder,
 	DRM_DEBUG_DRIVER("ELD on [CONNECTOR:%d:%s], [ENCODER:%d:%s]\n",
 			 connector->base.id,
 			 connector->name,
-			 connector->encoder->base.id,
-			 connector->encoder->name);
+			 encoder->base.base.id,
+			 encoder->base.name);
 
 	connector->eld[6] = drm_av_sync_delay(connector, adjusted_mode) / 2;
 
@@ -856,7 +856,7 @@ static unsigned long i915_audio_component_get_power(struct device *kdev)
 		}
 
 		/* Force CDCLK to 2*BCLK as long as we need audio powered. */
-		if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))
+		if (IS_GEMINILAKE(dev_priv))
 			glk_force_audio_cdclk(dev_priv, true);
 
 		if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))
@@ -875,7 +875,7 @@ static void i915_audio_component_put_power(struct device *kdev,
 
 	/* Stop forcing CDCLK to 2*BCLK if no need for audio to be powered. */
 	if (--dev_priv->audio_power_refcount == 0)
-		if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))
+		if (IS_GEMINILAKE(dev_priv))
 			glk_force_audio_cdclk(dev_priv, false);
 
 	intel_display_power_put(dev_priv, POWER_DOMAIN_AUDIO, cookie);
diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c
index dcb66a33be9b..b228671d5a5d 100644
--- a/drivers/gpu/drm/i915/display/intel_bw.c
+++ b/drivers/gpu/drm/i915/display/intel_bw.c
@@ -486,3 +486,8 @@ int intel_bw_init(struct drm_i915_private *dev_priv)
 
 	return 0;
 }
+
+void intel_bw_cleanup(struct drm_i915_private *dev_priv)
+{
+	drm_atomic_private_obj_fini(&dev_priv->bw_obj);
+}
diff --git a/drivers/gpu/drm/i915/display/intel_bw.h b/drivers/gpu/drm/i915/display/intel_bw.h
index 9db10af012f4..20b9ad241802 100644
--- a/drivers/gpu/drm/i915/display/intel_bw.h
+++ b/drivers/gpu/drm/i915/display/intel_bw.h
@@ -25,6 +25,7 @@ struct intel_bw_state {
 
 void intel_bw_init_hw(struct drm_i915_private *dev_priv);
 int intel_bw_init(struct drm_i915_private *dev_priv);
+void intel_bw_cleanup(struct drm_i915_private *dev_priv);
 int intel_bw_atomic_check(struct intel_atomic_state *state);
 void intel_bw_crtc_update(struct intel_bw_state *bw_state,
 			  const struct intel_crtc_state *crtc_state);
diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c
index 7d1ab1e5b7c3..0ce5926006ca 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -2004,6 +2004,18 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state)
 	/* Account for additional needs from the planes */
 	min_cdclk = max(intel_planes_min_cdclk(crtc_state), min_cdclk);
 
+	/*
+	 * HACK. Currently for TGL platforms we calculate
+	 * min_cdclk initially based on pixel_rate divided
+	 * by 2, accounting for also plane requirements,
+	 * however in some cases the lowest possible CDCLK
+	 * doesn't work and causing the underruns.
+	 * Explicitly stating here that this seems to be currently
+	 * rather a Hack, than final solution.
+	 */
+	if (IS_TIGERLAKE(dev_priv))
+		min_cdclk = max(min_cdclk, (int)crtc_state->pixel_rate);
+
 	if (min_cdclk > dev_priv->max_cdclk_freq) {
 		DRM_DEBUG_KMS("required cdclk (%d kHz) exceeds max (%d kHz)\n",
 			      min_cdclk, dev_priv->max_cdclk_freq);
diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c
index b2b1336ecdb6..f976b800b245 100644
--- a/drivers/gpu/drm/i915/display/intel_crt.c
+++ b/drivers/gpu/drm/i915/display/intel_crt.c
@@ -65,7 +65,7 @@ static struct intel_crt *intel_encoder_to_crt(struct intel_encoder *encoder)
 	return container_of(encoder, struct intel_crt, base);
 }
 
-static struct intel_crt *intel_attached_crt(struct drm_connector *connector)
+static struct intel_crt *intel_attached_crt(struct intel_connector *connector)
 {
 	return intel_encoder_to_crt(intel_attached_encoder(connector));
 }
@@ -247,7 +247,7 @@ static void hsw_post_disable_crt(struct intel_encoder *encoder,
 
 	intel_ddi_disable_transcoder_func(old_crtc_state);
 
-	ironlake_pfit_disable(old_crtc_state);
+	ilk_pfit_disable(old_crtc_state);
 
 	intel_ddi_disable_pipe_clock(old_crtc_state);
 
@@ -351,7 +351,7 @@ intel_crt_mode_valid(struct drm_connector *connector,
 
 	/* The FDI receiver on LPT only supports 8bpc and only has 2 lanes. */
 	if (HAS_PCH_LPT(dev_priv) &&
-	    (ironlake_get_lanes_required(mode->clock, 270000, 24) > 2))
+	    ilk_get_lanes_required(mode->clock, 270000, 24) > 2)
 		return MODE_CLOCK_HIGH;
 
 	/* HSW/BDW FDI limited to 4k */
@@ -427,10 +427,10 @@ static int hsw_crt_compute_config(struct intel_encoder *encoder,
 	return 0;
 }
 
-static bool intel_ironlake_crt_detect_hotplug(struct drm_connector *connector)
+static bool ilk_crt_detect_hotplug(struct drm_connector *connector)
 {
 	struct drm_device *dev = connector->dev;
-	struct intel_crt *crt = intel_attached_crt(connector);
+	struct intel_crt *crt = intel_attached_crt(to_intel_connector(connector));
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	u32 adpa;
 	bool ret;
@@ -440,7 +440,7 @@ static bool intel_ironlake_crt_detect_hotplug(struct drm_connector *connector)
 		bool turn_off_dac = HAS_PCH_SPLIT(dev_priv);
 		u32 save_adpa;
 
-		crt->force_hotplug_required = 0;
+		crt->force_hotplug_required = false;
 
 		save_adpa = adpa = I915_READ(crt->adpa_reg);
 		DRM_DEBUG_KMS("trigger hotplug detect cycle: adpa=0x%x\n", adpa);
@@ -477,7 +477,7 @@ static bool intel_ironlake_crt_detect_hotplug(struct drm_connector *connector)
 static bool valleyview_crt_detect_hotplug(struct drm_connector *connector)
 {
 	struct drm_device *dev = connector->dev;
-	struct intel_crt *crt = intel_attached_crt(connector);
+	struct intel_crt *crt = intel_attached_crt(to_intel_connector(connector));
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	bool reenable_hpd;
 	u32 adpa;
@@ -535,7 +535,7 @@ static bool intel_crt_detect_hotplug(struct drm_connector *connector)
 	int i, tries = 0;
 
 	if (HAS_PCH_SPLIT(dev_priv))
-		return intel_ironlake_crt_detect_hotplug(connector);
+		return ilk_crt_detect_hotplug(connector);
 
 	if (IS_VALLEYVIEW(dev_priv))
 		return valleyview_crt_detect_hotplug(connector);
@@ -609,7 +609,7 @@ static int intel_crt_ddc_get_modes(struct drm_connector *connector,
 
 static bool intel_crt_detect_ddc(struct drm_connector *connector)
 {
-	struct intel_crt *crt = intel_attached_crt(connector);
+	struct intel_crt *crt = intel_attached_crt(to_intel_connector(connector));
 	struct drm_i915_private *dev_priv = to_i915(crt->base.base.dev);
 	struct edid *edid;
 	struct i2c_adapter *i2c;
@@ -795,7 +795,7 @@ intel_crt_detect(struct drm_connector *connector,
 		 bool force)
 {
 	struct drm_i915_private *dev_priv = to_i915(connector->dev);
-	struct intel_crt *crt = intel_attached_crt(connector);
+	struct intel_crt *crt = intel_attached_crt(to_intel_connector(connector));
 	struct intel_encoder *intel_encoder = &crt->base;
 	intel_wakeref_t wakeref;
 	int status, ret;
@@ -886,7 +886,7 @@ static int intel_crt_get_modes(struct drm_connector *connector)
 {
 	struct drm_device *dev = connector->dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_crt *crt = intel_attached_crt(connector);
+	struct intel_crt *crt = intel_attached_crt(to_intel_connector(connector));
 	struct intel_encoder *intel_encoder = &crt->base;
 	intel_wakeref_t wakeref;
 	struct i2c_adapter *i2c;
@@ -925,7 +925,7 @@ void intel_crt_reset(struct drm_encoder *encoder)
 		POSTING_READ(crt->adpa_reg);
 
 		DRM_DEBUG_KMS("crt adpa set to 0x%x\n", adpa);
-		crt->force_hotplug_required = 1;
+		crt->force_hotplug_required = true;
 	}
 
 }
@@ -1063,7 +1063,7 @@ void intel_crt_init(struct drm_i915_private *dev_priv)
 	/*
 	 * Configure the automatic hotplug detection stuff
 	 */
-	crt->force_hotplug_required = 0;
+	crt->force_hotplug_required = false;
 
 	/*
 	 * TODO: find a proper way to discover whether we need to set the the
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index c9ba7d7f3787..33f1dc3d7c1a 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -34,6 +34,7 @@
 #include "intel_ddi.h"
 #include "intel_display_types.h"
 #include "intel_dp.h"
+#include "intel_dp_mst.h"
 #include "intel_dp_link_training.h"
 #include "intel_dpio_phy.h"
 #include "intel_dsi.h"
@@ -1237,9 +1238,9 @@ void hsw_fdi_link_train(struct intel_encoder *encoder,
 
 static void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder)
 {
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	struct intel_digital_port *intel_dig_port =
-		enc_to_dig_port(&encoder->base);
+		enc_to_dig_port(encoder);
 
 	intel_dp->DP = intel_dig_port->saved_port_bits |
 		DDI_BUF_CTL_ENABLE | DDI_BUF_TRANS_SELECT(0);
@@ -1899,8 +1900,13 @@ intel_ddi_transcoder_func_reg_val_get(const struct intel_crtc_state *crtc_state)
 		temp |= TRANS_DDI_MODE_SELECT_DP_MST;
 		temp |= DDI_PORT_WIDTH(crtc_state->lane_count);
 
-		if (INTEL_GEN(dev_priv) >= 12)
-			temp |= TRANS_DDI_MST_TRANSPORT_SELECT(crtc_state->cpu_transcoder);
+		if (INTEL_GEN(dev_priv) >= 12) {
+			enum transcoder master;
+
+			master = crtc_state->mst_master_transcoder;
+			WARN_ON(master == INVALID_TRANSCODER);
+			temp |= TRANS_DDI_MST_TRANSPORT_SELECT(master);
+		}
 	} else {
 		temp |= TRANS_DDI_MODE_SELECT_DP_SST;
 		temp |= DDI_PORT_WIDTH(crtc_state->lane_count);
@@ -1944,17 +1950,18 @@ void intel_ddi_disable_transcoder_func(const struct intel_crtc_state *crtc_state
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
-	i915_reg_t reg = TRANS_DDI_FUNC_CTL(cpu_transcoder);
-	u32 val = I915_READ(reg);
+	u32 val;
+
+	val = I915_READ(TRANS_DDI_FUNC_CTL(cpu_transcoder));
+	val &= ~TRANS_DDI_FUNC_ENABLE;
 
 	if (INTEL_GEN(dev_priv) >= 12) {
-		val &= ~(TRANS_DDI_FUNC_ENABLE | TGL_TRANS_DDI_PORT_MASK |
-			 TRANS_DDI_DP_VC_PAYLOAD_ALLOC);
+		if (!intel_dp_mst_is_master_trans(crtc_state))
+			val &= ~TGL_TRANS_DDI_PORT_MASK;
 	} else {
-		val &= ~(TRANS_DDI_FUNC_ENABLE | TRANS_DDI_PORT_MASK |
-			 TRANS_DDI_DP_VC_PAYLOAD_ALLOC);
+		val &= ~TRANS_DDI_PORT_MASK;
 	}
-	I915_WRITE(reg, val);
+	I915_WRITE(TRANS_DDI_FUNC_CTL(cpu_transcoder), val);
 
 	if (dev_priv->quirks & QUIRK_INCREASE_DDI_DISABLED_TIME &&
 	    intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) {
@@ -2217,7 +2224,7 @@ static void intel_ddi_get_power_domains(struct intel_encoder *encoder,
 	if (WARN_ON(intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST)))
 		return;
 
-	dig_port = enc_to_dig_port(&encoder->base);
+	dig_port = enc_to_dig_port(encoder);
 	intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain);
 
 	/*
@@ -2287,7 +2294,7 @@ static void _skl_ddi_set_iboost(struct drm_i915_private *dev_priv,
 static void skl_ddi_set_iboost(struct intel_encoder *encoder,
 			       int level, enum intel_output_type type)
 {
-	struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	enum port port = encoder->port;
 	u8 iboost;
@@ -2358,7 +2365,7 @@ static void bxt_ddi_vswing_sequence(struct intel_encoder *encoder,
 u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	enum port port = encoder->port;
 	enum phy phy = intel_port_to_phy(dev_priv, port);
 	int n_entries;
@@ -2497,7 +2504,7 @@ static void cnl_ddi_vswing_sequence(struct intel_encoder *encoder,
 		width = 4;
 		rate = 0; /* Rate is always < than 6GHz for HDMI */
 	} else {
-		struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
 		width = intel_dp->lane_count;
 		rate = intel_dp->link_rate;
@@ -2623,7 +2630,7 @@ static void icl_combo_phy_ddi_vswing_sequence(struct intel_encoder *encoder,
 		width = 4;
 		/* Rate is always < than 6GHz for HDMI */
 	} else {
-		struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
 		width = intel_dp->lane_count;
 		rate = intel_dp->link_rate;
@@ -3161,57 +3168,6 @@ static void intel_ddi_clk_disable(struct intel_encoder *encoder)
 }
 
 static void
-icl_phy_set_clock_gating(struct intel_digital_port *dig_port, bool enable)
-{
-	struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev);
-	enum tc_port tc_port = intel_port_to_tc(dev_priv, dig_port->base.port);
-	u32 val, bits;
-	int ln;
-
-	if (tc_port == PORT_TC_NONE)
-		return;
-
-	bits = MG_DP_MODE_CFG_TR2PWR_GATING | MG_DP_MODE_CFG_TRPWR_GATING |
-	       MG_DP_MODE_CFG_CLNPWR_GATING | MG_DP_MODE_CFG_DIGPWR_GATING |
-	       MG_DP_MODE_CFG_GAONPWR_GATING;
-
-	for (ln = 0; ln < 2; ln++) {
-		if (INTEL_GEN(dev_priv) >= 12) {
-			I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, ln));
-			val = I915_READ(DKL_DP_MODE(tc_port));
-		} else {
-			val = I915_READ(MG_DP_MODE(ln, tc_port));
-		}
-
-		if (enable)
-			val |= bits;
-		else
-			val &= ~bits;
-
-		if (INTEL_GEN(dev_priv) >= 12)
-			I915_WRITE(DKL_DP_MODE(tc_port), val);
-		else
-			I915_WRITE(MG_DP_MODE(ln, tc_port), val);
-	}
-
-	if (INTEL_GEN(dev_priv) == 11) {
-		bits = MG_MISC_SUS0_CFG_TR2PWR_GATING |
-		       MG_MISC_SUS0_CFG_CL2PWR_GATING |
-		       MG_MISC_SUS0_CFG_GAONPWR_GATING |
-		       MG_MISC_SUS0_CFG_TRPWR_GATING |
-		       MG_MISC_SUS0_CFG_CL1PWR_GATING |
-		       MG_MISC_SUS0_CFG_DGPWR_GATING;
-
-		val = I915_READ(MG_MISC_SUS0(tc_port));
-		if (enable)
-			val |= (bits | MG_MISC_SUS0_SUSCLK_DYNCLKGATE_MODE(3));
-		else
-			val &= ~(bits | MG_MISC_SUS0_SUSCLK_DYNCLKGATE_MODE_MASK);
-		I915_WRITE(MG_MISC_SUS0(tc_port), val);
-	}
-}
-
-static void
 icl_program_mg_dp_mode(struct intel_digital_port *intel_dig_port,
 		       const struct intel_crtc_state *crtc_state)
 {
@@ -3317,7 +3273,7 @@ static void intel_ddi_enable_fec(struct intel_encoder *encoder,
 	if (!crtc_state->fec_enable)
 		return;
 
-	intel_dp = enc_to_intel_dp(&encoder->base);
+	intel_dp = enc_to_intel_dp(encoder);
 	val = I915_READ(intel_dp->regs.dp_tp_ctl);
 	val |= DP_TP_CTL_FEC_ENABLE;
 	I915_WRITE(intel_dp->regs.dp_tp_ctl, val);
@@ -3337,7 +3293,7 @@ static void intel_ddi_disable_fec_state(struct intel_encoder *encoder,
 	if (!crtc_state->fec_enable)
 		return;
 
-	intel_dp = enc_to_intel_dp(&encoder->base);
+	intel_dp = enc_to_intel_dp(encoder);
 	val = I915_READ(intel_dp->regs.dp_tp_ctl);
 	val &= ~DP_TP_CTL_FEC_ENABLE;
 	I915_WRITE(intel_dp->regs.dp_tp_ctl, val);
@@ -3428,10 +3384,10 @@ static void tgl_ddi_pre_enable_dp(struct intel_encoder *encoder,
 				  const struct intel_crtc_state *crtc_state,
 				  const struct drm_connector_state *conn_state)
 {
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	enum phy phy = intel_port_to_phy(dev_priv, encoder->port);
-	struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
 	bool is_mst = intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST);
 	int level = intel_ddi_dp_level(intel_dp);
 	enum transcoder transcoder = crtc_state->cpu_transcoder;
@@ -3458,14 +3414,14 @@ static void tgl_ddi_pre_enable_dp(struct intel_encoder *encoder,
 	 * (DFLEXDPSP.DPX4TXLATC)
 	 *
 	 * This was done before tgl_ddi_pre_enable_dp by
-	 * haswell_crtc_enable()->intel_encoders_pre_pll_enable().
+	 * hsw_crtc_enable()->intel_encoders_pre_pll_enable().
 	 */
 
 	/*
 	 * 4. Enable the port PLL.
 	 *
 	 * The PLL enabling itself was already done before this function by
-	 * haswell_crtc_enable()->intel_enable_shared_dpll().  We need only
+	 * hsw_crtc_enable()->intel_enable_shared_dpll().  We need only
 	 * configure the PLL to port mapping here.
 	 */
 	intel_ddi_clk_select(encoder, crtc_state);
@@ -3509,12 +3465,6 @@ static void tgl_ddi_pre_enable_dp(struct intel_encoder *encoder,
 	 * down this function.
 	 */
 
-	/*
-	 * 7.d Type C with DP alternate or fixed/legacy/static connection -
-	 * Disable PHY clock gating per Type-C DDI Buffer page
-	 */
-	icl_phy_set_clock_gating(dig_port, false);
-
 	/* 7.e Configure voltage swing and related IO settings */
 	tgl_ddi_vswing_sequence(encoder, crtc_state->port_clock, level,
 				encoder->type);
@@ -3566,15 +3516,6 @@ static void tgl_ddi_pre_enable_dp(struct intel_encoder *encoder,
 	if (!is_trans_port_sync_mode(crtc_state))
 		intel_dp_stop_link_train(intel_dp);
 
-	/*
-	 * TODO: enable clock gating
-	 *
-	 * It is not written in DP enabling sequence but "PHY Clockgating
-	 * programming" states that clock gating should be enabled after the
-	 * link training but doing so causes all the following trainings to fail
-	 * so not enabling it for now.
-	 */
-
 	/* 7.l Configure and enable FEC if needed */
 	intel_ddi_enable_fec(encoder, crtc_state);
 	intel_dsc_enable(encoder, crtc_state);
@@ -3584,15 +3525,18 @@ static void hsw_ddi_pre_enable_dp(struct intel_encoder *encoder,
 				  const struct intel_crtc_state *crtc_state,
 				  const struct drm_connector_state *conn_state)
 {
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	enum port port = encoder->port;
 	enum phy phy = intel_port_to_phy(dev_priv, port);
-	struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
 	bool is_mst = intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST);
 	int level = intel_ddi_dp_level(intel_dp);
 
-	WARN_ON(is_mst && (port == PORT_A || port == PORT_E));
+	if (INTEL_GEN(dev_priv) < 11)
+		WARN_ON(is_mst && (port == PORT_A || port == PORT_E));
+	else
+		WARN_ON(is_mst && port == PORT_A);
 
 	intel_dp_set_link_params(intel_dp, crtc_state->port_clock,
 				 crtc_state->lane_count, is_mst);
@@ -3610,7 +3554,6 @@ static void hsw_ddi_pre_enable_dp(struct intel_encoder *encoder,
 					dig_port->ddi_io_power_domain);
 
 	icl_program_mg_dp_mode(dig_port, crtc_state);
-	icl_phy_set_clock_gating(dig_port, false);
 
 	if (INTEL_GEN(dev_priv) >= 11)
 		icl_ddi_vswing_sequence(encoder, crtc_state->port_clock,
@@ -3644,8 +3587,6 @@ static void hsw_ddi_pre_enable_dp(struct intel_encoder *encoder,
 
 	intel_ddi_enable_fec(encoder, crtc_state);
 
-	icl_phy_set_clock_gating(dig_port, true);
-
 	if (!is_mst)
 		intel_ddi_enable_pipe_clock(crtc_state);
 
@@ -3674,12 +3615,12 @@ static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder,
 				      const struct intel_crtc_state *crtc_state,
 				      const struct drm_connector_state *conn_state)
 {
-	struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
 	struct intel_hdmi *intel_hdmi = &intel_dig_port->hdmi;
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	enum port port = encoder->port;
 	int level = intel_ddi_hdmi_level(dev_priv, port);
-	struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
 
 	intel_dp_dual_mode_set_tmds_output(intel_hdmi, true);
 	intel_ddi_clk_select(encoder, crtc_state);
@@ -3687,7 +3628,6 @@ static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder,
 	intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain);
 
 	icl_program_mg_dp_mode(dig_port, crtc_state);
-	icl_phy_set_clock_gating(dig_port, false);
 
 	if (INTEL_GEN(dev_priv) >= 12)
 		tgl_ddi_vswing_sequence(encoder, crtc_state->port_clock,
@@ -3702,8 +3642,6 @@ static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder,
 	else
 		intel_prepare_hdmi_ddi_buffers(encoder, level);
 
-	icl_phy_set_clock_gating(dig_port, true);
-
 	if (IS_GEN9_BC(dev_priv))
 		skl_ddi_set_iboost(encoder, level, INTEL_OUTPUT_HDMI);
 
@@ -3746,12 +3684,12 @@ static void intel_ddi_pre_enable(struct intel_encoder *encoder,
 		intel_ddi_pre_enable_hdmi(encoder, crtc_state, conn_state);
 	} else {
 		struct intel_lspcon *lspcon =
-				enc_to_intel_lspcon(&encoder->base);
+				enc_to_intel_lspcon(encoder);
 
 		intel_ddi_pre_enable_dp(encoder, crtc_state, conn_state);
 		if (lspcon->active) {
 			struct intel_digital_port *dig_port =
-					enc_to_dig_port(&encoder->base);
+					enc_to_dig_port(encoder);
 
 			dig_port->set_infoframes(encoder,
 						 crtc_state->has_infoframe,
@@ -3776,7 +3714,7 @@ static void intel_disable_ddi_buf(struct intel_encoder *encoder,
 	}
 
 	if (intel_crtc_has_dp_encoder(crtc_state)) {
-		struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
 		val = I915_READ(intel_dp->regs.dp_tp_ctl);
 		val &= ~(DP_TP_CTL_ENABLE | DP_TP_CTL_LINK_TRAIN_MASK);
@@ -3796,7 +3734,7 @@ static void intel_ddi_post_disable_dp(struct intel_encoder *encoder,
 				      const struct drm_connector_state *old_conn_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
 	struct intel_dp *intel_dp = &dig_port->dp;
 	bool is_mst = intel_crtc_has_type(old_crtc_state,
 					  INTEL_OUTPUT_DP_MST);
@@ -3808,8 +3746,19 @@ static void intel_ddi_post_disable_dp(struct intel_encoder *encoder,
 	 */
 	intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF);
 
-	if (INTEL_GEN(dev_priv) < 12 && !is_mst)
-		intel_ddi_disable_pipe_clock(old_crtc_state);
+	if (INTEL_GEN(dev_priv) >= 12) {
+		if (is_mst) {
+			enum transcoder cpu_transcoder = old_crtc_state->cpu_transcoder;
+			u32 val;
+
+			val = I915_READ(TRANS_DDI_FUNC_CTL(cpu_transcoder));
+			val &= ~TGL_TRANS_DDI_PORT_MASK;
+			I915_WRITE(TRANS_DDI_FUNC_CTL(cpu_transcoder), val);
+		}
+	} else {
+		if (!is_mst)
+			intel_ddi_disable_pipe_clock(old_crtc_state);
+	}
 
 	intel_disable_ddi_buf(encoder, old_crtc_state);
 
@@ -3838,7 +3787,7 @@ static void intel_ddi_post_disable_hdmi(struct intel_encoder *encoder,
 					const struct drm_connector_state *old_conn_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
 	struct intel_hdmi *intel_hdmi = &dig_port->hdmi;
 
 	dig_port->set_infoframes(encoder, false,
@@ -3860,8 +3809,6 @@ static void icl_disable_transcoder_port_sync(const struct intel_crtc_state *old_
 {
 	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	i915_reg_t reg;
-	u32 trans_ddi_func_ctl2_val;
 
 	if (old_crtc_state->master_transcoder == INVALID_TRANSCODER)
 		return;
@@ -3869,10 +3816,7 @@ static void icl_disable_transcoder_port_sync(const struct intel_crtc_state *old_
 	DRM_DEBUG_KMS("Disabling Transcoder Port Sync on Slave Transcoder %s\n",
 		      transcoder_name(old_crtc_state->cpu_transcoder));
 
-	reg = TRANS_DDI_FUNC_CTL2(old_crtc_state->cpu_transcoder);
-	trans_ddi_func_ctl2_val = ~(PORT_SYNC_MODE_ENABLE |
-				    PORT_SYNC_MODE_MASTER_SELECT_MASK);
-	I915_WRITE(reg, trans_ddi_func_ctl2_val);
+	I915_WRITE(TRANS_DDI_FUNC_CTL2(old_crtc_state->cpu_transcoder), 0);
 }
 
 static void intel_ddi_post_disable(struct intel_encoder *encoder,
@@ -3880,25 +3824,27 @@ static void intel_ddi_post_disable(struct intel_encoder *encoder,
 				   const struct drm_connector_state *old_conn_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
 	enum phy phy = intel_port_to_phy(dev_priv, encoder->port);
 	bool is_tc_port = intel_phy_is_tc(dev_priv, phy);
 
-	intel_crtc_vblank_off(old_crtc_state);
+	if (!intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_DP_MST)) {
+		intel_crtc_vblank_off(old_crtc_state);
 
-	intel_disable_pipe(old_crtc_state);
+		intel_disable_pipe(old_crtc_state);
 
-	if (INTEL_GEN(dev_priv) >= 11)
-		icl_disable_transcoder_port_sync(old_crtc_state);
+		if (INTEL_GEN(dev_priv) >= 11)
+			icl_disable_transcoder_port_sync(old_crtc_state);
 
-	intel_ddi_disable_transcoder_func(old_crtc_state);
+		intel_ddi_disable_transcoder_func(old_crtc_state);
 
-	intel_dsc_disable(old_crtc_state);
+		intel_dsc_disable(old_crtc_state);
 
-	if (INTEL_GEN(dev_priv) >= 9)
-		skylake_scaler_disable(old_crtc_state);
-	else
-		ironlake_pfit_disable(old_crtc_state);
+		if (INTEL_GEN(dev_priv) >= 9)
+			skl_scaler_disable(old_crtc_state);
+		else
+			ilk_pfit_disable(old_crtc_state);
+	}
 
 	/*
 	 * When called from DP MST code:
@@ -3970,7 +3916,7 @@ static void intel_enable_ddi_dp(struct intel_encoder *encoder,
 				const struct drm_connector_state *conn_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	enum port port = encoder->port;
 
 	if (port == PORT_A && INTEL_GEN(dev_priv) < 9)
@@ -4011,7 +3957,7 @@ static void intel_enable_ddi_hdmi(struct intel_encoder *encoder,
 				  const struct drm_connector_state *conn_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
 	struct drm_connector *connector = conn_state->connector;
 	enum port port = encoder->port;
 
@@ -4088,7 +4034,7 @@ static void intel_disable_ddi_dp(struct intel_encoder *encoder,
 				 const struct intel_crtc_state *old_crtc_state,
 				 const struct drm_connector_state *old_conn_state)
 {
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
 	intel_dp->link_trained = false;
 
@@ -4136,7 +4082,7 @@ static void intel_ddi_update_pipe_dp(struct intel_encoder *encoder,
 				     const struct intel_crtc_state *crtc_state,
 				     const struct drm_connector_state *conn_state)
 {
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
 	intel_ddi_set_dp_msa(crtc_state, conn_state);
 
@@ -4200,7 +4146,8 @@ intel_ddi_update_prepare(struct intel_atomic_state *state,
 
 	WARN_ON(crtc && crtc->active);
 
-	intel_tc_port_get_link(enc_to_dig_port(&encoder->base), required_lanes);
+	intel_tc_port_get_link(enc_to_dig_port(encoder),
+		               required_lanes);
 	if (crtc_state && crtc_state->hw.active)
 		intel_update_active_dpll(state, crtc, encoder);
 }
@@ -4210,7 +4157,7 @@ intel_ddi_update_complete(struct intel_atomic_state *state,
 			  struct intel_encoder *encoder,
 			  struct intel_crtc *crtc)
 {
-	intel_tc_port_put_link(enc_to_dig_port(&encoder->base));
+	intel_tc_port_put_link(enc_to_dig_port(encoder));
 }
 
 static void
@@ -4219,7 +4166,7 @@ intel_ddi_pre_pll_enable(struct intel_encoder *encoder,
 			 const struct drm_connector_state *conn_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
 	enum phy phy = intel_port_to_phy(dev_priv, encoder->port);
 	bool is_tc_port = intel_phy_is_tc(dev_priv, phy);
 
@@ -4405,6 +4352,11 @@ void intel_ddi_get_config(struct intel_encoder *encoder,
 		pipe_config->output_types |= BIT(INTEL_OUTPUT_DP_MST);
 		pipe_config->lane_count =
 			((temp & DDI_PORT_WIDTH_MASK) >> DDI_PORT_WIDTH_SHIFT) + 1;
+
+		if (INTEL_GEN(dev_priv) >= 12)
+			pipe_config->mst_master_transcoder =
+					REG_FIELD_GET(TRANS_DDI_MST_TRANSPORT_SELECT_MASK, temp);
+
 		intel_dp_get_m_n(intel_crtc, pipe_config);
 		break;
 	default:
@@ -4518,7 +4470,7 @@ static int intel_ddi_compute_config(struct intel_encoder *encoder,
 
 static void intel_ddi_encoder_destroy(struct drm_encoder *encoder)
 {
-	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
+	struct intel_digital_port *dig_port = enc_to_dig_port(to_intel_encoder(encoder));
 
 	intel_dp_encoder_flush_work(encoder);
 
@@ -4585,7 +4537,7 @@ static int intel_hdmi_reset_link(struct intel_encoder *encoder,
 				 struct drm_modeset_acquire_ctx *ctx)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_hdmi *hdmi = enc_to_intel_hdmi(&encoder->base);
+	struct intel_hdmi *hdmi = enc_to_intel_hdmi(encoder);
 	struct intel_connector *connector = hdmi->attached_connector;
 	struct i2c_adapter *adapter =
 		intel_gmbus_get_adapter(dev_priv, hdmi->ddc_bus);
@@ -4657,7 +4609,7 @@ intel_ddi_hotplug(struct intel_encoder *encoder,
 		  struct intel_connector *connector,
 		  bool irq_received)
 {
-	struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
 	struct drm_modeset_acquire_ctx ctx;
 	enum intel_hotplug_state state;
 	int ret;
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 1860da0a493e..19ea842cfd84 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -46,6 +46,7 @@
 #include "display/intel_crt.h"
 #include "display/intel_ddi.h"
 #include "display/intel_dp.h"
+#include "display/intel_dp_mst.h"
 #include "display/intel_dsi.h"
 #include "display/intel_dvo.h"
 #include "display/intel_gmbus.h"
@@ -145,8 +146,8 @@ static const u64 cursor_format_modifiers[] = {
 
 static void i9xx_crtc_clock_get(struct intel_crtc *crtc,
 				struct intel_crtc_state *pipe_config);
-static void ironlake_pch_clock_get(struct intel_crtc *crtc,
-				   struct intel_crtc_state *pipe_config);
+static void ilk_pch_clock_get(struct intel_crtc *crtc,
+			      struct intel_crtc_state *pipe_config);
 
 static int intel_framebuffer_init(struct intel_framebuffer *ifb,
 				  struct drm_i915_gem_object *obj,
@@ -157,15 +158,15 @@ static void intel_cpu_transcoder_set_m_n(const struct intel_crtc_state *crtc_sta
 					 const struct intel_link_m_n *m_n,
 					 const struct intel_link_m_n *m2_n2);
 static void i9xx_set_pipeconf(const struct intel_crtc_state *crtc_state);
-static void ironlake_set_pipeconf(const struct intel_crtc_state *crtc_state);
-static void haswell_set_pipeconf(const struct intel_crtc_state *crtc_state);
+static void ilk_set_pipeconf(const struct intel_crtc_state *crtc_state);
+static void hsw_set_pipeconf(const struct intel_crtc_state *crtc_state);
 static void bdw_set_pipemisc(const struct intel_crtc_state *crtc_state);
 static void vlv_prepare_pll(struct intel_crtc *crtc,
 			    const struct intel_crtc_state *pipe_config);
 static void chv_prepare_pll(struct intel_crtc *crtc,
 			    const struct intel_crtc_state *pipe_config);
-static void skylake_pfit_enable(const struct intel_crtc_state *crtc_state);
-static void ironlake_pfit_enable(const struct intel_crtc_state *crtc_state);
+static void skl_pfit_enable(const struct intel_crtc_state *crtc_state);
+static void ilk_pfit_enable(const struct intel_crtc_state *crtc_state);
 static void intel_modeset_setup_hw_state(struct drm_device *dev,
 					 struct drm_modeset_acquire_ctx *ctx);
 static struct intel_crtc_state *intel_crtc_state_alloc(struct intel_crtc *crtc);
@@ -369,7 +370,7 @@ static const struct intel_limit intel_limits_g4x_dual_channel_lvds = {
 	},
 };
 
-static const struct intel_limit intel_limits_pineview_sdvo = {
+static const struct intel_limit pnv_limits_sdvo = {
 	.dot = { .min = 20000, .max = 400000},
 	.vco = { .min = 1700000, .max = 3500000 },
 	/* Pineview's Ncounter is a ring counter */
@@ -384,7 +385,7 @@ static const struct intel_limit intel_limits_pineview_sdvo = {
 		.p2_slow = 10, .p2_fast = 5 },
 };
 
-static const struct intel_limit intel_limits_pineview_lvds = {
+static const struct intel_limit pnv_limits_lvds = {
 	.dot = { .min = 20000, .max = 400000 },
 	.vco = { .min = 1700000, .max = 3500000 },
 	.n = { .min = 3, .max = 6 },
@@ -402,7 +403,7 @@ static const struct intel_limit intel_limits_pineview_lvds = {
  * We calculate clock using (register_value + 2) for N/M1/M2, so here
  * the range value for them is (actual_value - 2).
  */
-static const struct intel_limit intel_limits_ironlake_dac = {
+static const struct intel_limit ilk_limits_dac = {
 	.dot = { .min = 25000, .max = 350000 },
 	.vco = { .min = 1760000, .max = 3510000 },
 	.n = { .min = 1, .max = 5 },
@@ -415,7 +416,7 @@ static const struct intel_limit intel_limits_ironlake_dac = {
 		.p2_slow = 10, .p2_fast = 5 },
 };
 
-static const struct intel_limit intel_limits_ironlake_single_lvds = {
+static const struct intel_limit ilk_limits_single_lvds = {
 	.dot = { .min = 25000, .max = 350000 },
 	.vco = { .min = 1760000, .max = 3510000 },
 	.n = { .min = 1, .max = 3 },
@@ -428,7 +429,7 @@ static const struct intel_limit intel_limits_ironlake_single_lvds = {
 		.p2_slow = 14, .p2_fast = 14 },
 };
 
-static const struct intel_limit intel_limits_ironlake_dual_lvds = {
+static const struct intel_limit ilk_limits_dual_lvds = {
 	.dot = { .min = 25000, .max = 350000 },
 	.vco = { .min = 1760000, .max = 3510000 },
 	.n = { .min = 1, .max = 3 },
@@ -442,7 +443,7 @@ static const struct intel_limit intel_limits_ironlake_dual_lvds = {
 };
 
 /* LVDS 100mhz refclk limits. */
-static const struct intel_limit intel_limits_ironlake_single_lvds_100m = {
+static const struct intel_limit ilk_limits_single_lvds_100m = {
 	.dot = { .min = 25000, .max = 350000 },
 	.vco = { .min = 1760000, .max = 3510000 },
 	.n = { .min = 1, .max = 2 },
@@ -455,7 +456,7 @@ static const struct intel_limit intel_limits_ironlake_single_lvds_100m = {
 		.p2_slow = 14, .p2_fast = 14 },
 };
 
-static const struct intel_limit intel_limits_ironlake_dual_lvds_100m = {
+static const struct intel_limit ilk_limits_dual_lvds_100m = {
 	.dot = { .min = 25000, .max = 350000 },
 	.vco = { .min = 1760000, .max = 3510000 },
 	.n = { .min = 1, .max = 3 },
@@ -553,13 +554,6 @@ is_trans_port_sync_mode(const struct intel_crtc_state *crtc_state)
 }
 
 static bool
-is_trans_port_sync_master(const struct intel_crtc_state *crtc_state)
-{
-	return (crtc_state->master_transcoder == INVALID_TRANSCODER &&
-		crtc_state->sync_mode_slaves_mask);
-}
-
-static bool
 is_trans_port_sync_slave(const struct intel_crtc_state *crtc_state)
 {
 	return crtc_state->master_transcoder != INVALID_TRANSCODER;
@@ -1637,7 +1631,7 @@ void vlv_wait_port_ready(struct drm_i915_private *dev_priv,
 		     I915_READ(dpll_reg) & port_mask, expected_mask);
 }
 
-static void ironlake_enable_pch_transcoder(const struct intel_crtc_state *crtc_state)
+static void ilk_enable_pch_transcoder(const struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
@@ -1735,8 +1729,8 @@ static void lpt_enable_pch_transcoder(struct drm_i915_private *dev_priv,
 		DRM_ERROR("Failed to enable PCH transcoder\n");
 }
 
-static void ironlake_disable_pch_transcoder(struct drm_i915_private *dev_priv,
-					    enum pipe pipe)
+static void ilk_disable_pch_transcoder(struct drm_i915_private *dev_priv,
+				       enum pipe pipe)
 {
 	i915_reg_t reg;
 	u32 val;
@@ -1944,7 +1938,9 @@ static bool is_ccs_plane(const struct drm_framebuffer *fb, int plane)
 
 static bool is_gen12_ccs_modifier(u64 modifier)
 {
-	return modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS;
+	return modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS ||
+	       modifier == I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS;
+
 }
 
 static bool is_gen12_ccs_plane(const struct drm_framebuffer *fb, int plane)
@@ -1977,8 +1973,7 @@ static int ccs_to_main_plane(const struct drm_framebuffer *fb, int ccs_plane)
 }
 
 /* Return either the main plane's CCS or - if not a CCS FB - UV plane */
-static int
-intel_main_to_aux_plane(const struct drm_framebuffer *fb, int main_plane)
+int intel_main_to_aux_plane(const struct drm_framebuffer *fb, int main_plane)
 {
 	if (is_ccs_modifier(fb->modifier))
 		return main_to_ccs_plane(fb, main_plane);
@@ -1994,6 +1989,13 @@ intel_format_info_is_yuv_semiplanar(const struct drm_format_info *info,
 	       info->num_planes == (is_ccs_modifier(modifier) ? 4 : 2);
 }
 
+static bool is_semiplanar_uv_plane(const struct drm_framebuffer *fb,
+				   int color_plane)
+{
+	return intel_format_info_is_yuv_semiplanar(fb->format, fb->modifier) &&
+	       color_plane == 1;
+}
+
 static unsigned int
 intel_tile_width_bytes(const struct drm_framebuffer *fb, int color_plane)
 {
@@ -2013,6 +2015,7 @@ intel_tile_width_bytes(const struct drm_framebuffer *fb, int color_plane)
 			return 128;
 		/* fall through */
 	case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS:
+	case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS:
 		if (is_ccs_plane(fb, color_plane))
 			return 64;
 		/* fall through */
@@ -2068,6 +2071,16 @@ static void intel_tile_dims(const struct drm_framebuffer *fb, int color_plane,
 	*tile_height = intel_tile_height(fb, color_plane);
 }
 
+static unsigned int intel_tile_row_size(const struct drm_framebuffer *fb,
+					int color_plane)
+{
+	unsigned int tile_width, tile_height;
+
+	intel_tile_dims(fb, color_plane, &tile_width, &tile_height);
+
+	return fb->pitches[color_plane] * tile_height;
+}
+
 unsigned int
 intel_fb_align_height(const struct drm_framebuffer *fb,
 		      int color_plane, unsigned int height)
@@ -2142,7 +2155,8 @@ static unsigned int intel_surf_alignment(const struct drm_framebuffer *fb,
 	struct drm_i915_private *dev_priv = to_i915(fb->dev);
 
 	/* AUX_DIST needs only 4K alignment */
-	if (is_aux_plane(fb, color_plane))
+	if ((INTEL_GEN(dev_priv) < 12 && is_aux_plane(fb, color_plane)) ||
+	    is_ccs_plane(fb, color_plane))
 		return 4096;
 
 	switch (fb->modifier) {
@@ -2152,11 +2166,19 @@ static unsigned int intel_surf_alignment(const struct drm_framebuffer *fb,
 		if (INTEL_GEN(dev_priv) >= 9)
 			return 256 * 1024;
 		return 0;
+	case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS:
+		if (is_semiplanar_uv_plane(fb, color_plane))
+			return intel_tile_row_size(fb, color_plane);
+		/* Fall-through */
 	case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS:
 		return 16 * 1024;
 	case I915_FORMAT_MOD_Y_TILED_CCS:
 	case I915_FORMAT_MOD_Yf_TILED_CCS:
 	case I915_FORMAT_MOD_Y_TILED:
+		if (INTEL_GEN(dev_priv) >= 12 &&
+		    is_semiplanar_uv_plane(fb, color_plane))
+			return intel_tile_row_size(fb, color_plane);
+		/* Fall-through */
 	case I915_FORMAT_MOD_Yf_TILED:
 		return 1 * 1024 * 1024;
 	default:
@@ -2193,6 +2215,8 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
 		return ERR_PTR(-EINVAL);
 
 	alignment = intel_surf_alignment(fb, 0);
+	if (WARN_ON(alignment && !is_power_of_2(alignment)))
+		return ERR_PTR(-EINVAL);
 
 	/* Note that the w/a also requires 64 PTE of padding following the
 	 * bo. We currently fill all unused PTE with the shadow page and so
@@ -2431,9 +2455,6 @@ static u32 intel_compute_aligned_offset(struct drm_i915_private *dev_priv,
 	unsigned int cpp = fb->format->cpp[color_plane];
 	u32 offset, offset_aligned;
 
-	if (alignment)
-		alignment--;
-
 	if (!is_surface_linear(fb, color_plane)) {
 		unsigned int tile_size, tile_width, tile_height;
 		unsigned int tile_rows, tiles, pitch_tiles;
@@ -2455,17 +2476,24 @@ static u32 intel_compute_aligned_offset(struct drm_i915_private *dev_priv,
 		*x %= tile_width;
 
 		offset = (tile_rows * pitch_tiles + tiles) * tile_size;
-		offset_aligned = offset & ~alignment;
+
+		offset_aligned = offset;
+		if (alignment)
+			offset_aligned = rounddown(offset_aligned, alignment);
 
 		intel_adjust_tile_offset(x, y, tile_width, tile_height,
 					 tile_size, pitch_tiles,
 					 offset, offset_aligned);
 	} else {
 		offset = *y * pitch + *x * cpp;
-		offset_aligned = offset & ~alignment;
-
-		*y = (offset & alignment) / pitch;
-		*x = ((offset & alignment) - *y * pitch) / cpp;
+		offset_aligned = offset;
+		if (alignment) {
+			offset_aligned = rounddown(offset_aligned, alignment);
+			*y = (offset % alignment) / pitch;
+			*x = ((offset % alignment) - *y * pitch) / cpp;
+		} else {
+			*y = *x = 0;
+		}
 	}
 
 	return offset_aligned;
@@ -2498,9 +2526,17 @@ static int intel_fb_offset_to_xy(int *x, int *y,
 {
 	struct drm_i915_private *dev_priv = to_i915(fb->dev);
 	unsigned int height;
+	u32 alignment;
 
-	if (fb->modifier != DRM_FORMAT_MOD_LINEAR &&
-	    fb->offsets[color_plane] % intel_tile_size(dev_priv)) {
+	if (INTEL_GEN(dev_priv) >= 12 &&
+	    is_semiplanar_uv_plane(fb, color_plane))
+		alignment = intel_tile_row_size(fb, color_plane);
+	else if (fb->modifier != DRM_FORMAT_MOD_LINEAR)
+		alignment = intel_tile_size(dev_priv);
+	else
+		alignment = 0;
+
+	if (alignment != 0 && fb->offsets[color_plane] % alignment) {
 		DRM_DEBUG_KMS("Misaligned offset 0x%08x for color plane %d\n",
 			      fb->offsets[color_plane], color_plane);
 		return -EINVAL;
@@ -2537,6 +2573,7 @@ static unsigned int intel_fb_modifier_to_tiling(u64 fb_modifier)
 	case I915_FORMAT_MOD_Y_TILED:
 	case I915_FORMAT_MOD_Y_TILED_CCS:
 	case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS:
+	case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS:
 		return I915_TILING_Y;
 	default:
 		return I915_TILING_NONE;
@@ -2588,6 +2625,30 @@ static const struct drm_format_info gen12_ccs_formats[] = {
 	{ .format = DRM_FORMAT_ABGR8888, .depth = 32, .num_planes = 2,
 	  .char_per_block = { 4, 1 }, .block_w = { 1, 2 }, .block_h = { 1, 1 },
 	  .hsub = 1, .vsub = 1, .has_alpha = true },
+	{ .format = DRM_FORMAT_YUYV, .num_planes = 2,
+	  .char_per_block = { 2, 1 }, .block_w = { 1, 2 }, .block_h = { 1, 1 },
+	  .hsub = 2, .vsub = 1, .is_yuv = true },
+	{ .format = DRM_FORMAT_YVYU, .num_planes = 2,
+	  .char_per_block = { 2, 1 }, .block_w = { 1, 2 }, .block_h = { 1, 1 },
+	  .hsub = 2, .vsub = 1, .is_yuv = true },
+	{ .format = DRM_FORMAT_UYVY, .num_planes = 2,
+	  .char_per_block = { 2, 1 }, .block_w = { 1, 2 }, .block_h = { 1, 1 },
+	  .hsub = 2, .vsub = 1, .is_yuv = true },
+	{ .format = DRM_FORMAT_VYUY, .num_planes = 2,
+	  .char_per_block = { 2, 1 }, .block_w = { 1, 2 }, .block_h = { 1, 1 },
+	  .hsub = 2, .vsub = 1, .is_yuv = true },
+	{ .format = DRM_FORMAT_NV12, .num_planes = 4,
+	  .char_per_block = { 1, 2, 1, 1 }, .block_w = { 1, 1, 4, 4 }, .block_h = { 1, 1, 1, 1 },
+	  .hsub = 2, .vsub = 2, .is_yuv = true },
+	{ .format = DRM_FORMAT_P010, .num_planes = 4,
+	  .char_per_block = { 2, 4, 1, 1 }, .block_w = { 1, 1, 2, 2 }, .block_h = { 1, 1, 1, 1 },
+	  .hsub = 2, .vsub = 2, .is_yuv = true },
+	{ .format = DRM_FORMAT_P012, .num_planes = 4,
+	  .char_per_block = { 2, 4, 1, 1 }, .block_w = { 1, 1, 2, 2 }, .block_h = { 1, 1, 1, 1 },
+	  .hsub = 2, .vsub = 2, .is_yuv = true },
+	{ .format = DRM_FORMAT_P016, .num_planes = 4,
+	  .char_per_block = { 2, 4, 1, 1 }, .block_w = { 1, 1, 2, 2 }, .block_h = { 1, 1, 1, 1 },
+	  .hsub = 2, .vsub = 2, .is_yuv = true },
 };
 
 static const struct drm_format_info *
@@ -2614,6 +2675,7 @@ intel_get_format_info(const struct drm_mode_fb_cmd2 *cmd)
 					  ARRAY_SIZE(skl_ccs_formats),
 					  cmd->pixel_format);
 	case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS:
+	case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS:
 		return lookup_format_info(gen12_ccs_formats,
 					  ARRAY_SIZE(gen12_ccs_formats),
 					  cmd->pixel_format);
@@ -2625,6 +2687,7 @@ intel_get_format_info(const struct drm_mode_fb_cmd2 *cmd)
 bool is_ccs_modifier(u64 modifier)
 {
 	return modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS ||
+	       modifier == I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS ||
 	       modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
 	       modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
 }
@@ -2698,7 +2761,7 @@ intel_fb_stride_alignment(const struct drm_framebuffer *fb, int color_plane)
 	}
 
 	tile_width = intel_tile_width_bytes(fb, color_plane);
-	if (is_ccs_modifier(fb->modifier) && color_plane == 0) {
+	if (is_ccs_modifier(fb->modifier)) {
 		/*
 		 * Display WA #0531: skl,bxt,kbl,glk
 		 *
@@ -2708,7 +2771,7 @@ intel_fb_stride_alignment(const struct drm_framebuffer *fb, int color_plane)
 		 * require the entire fb to accommodate that to avoid
 		 * potential runtime errors at plane configuration time.
 		 */
-		if (IS_GEN(dev_priv, 9) && fb->width > 3840)
+		if (IS_GEN(dev_priv, 9) && color_plane == 0 && fb->width > 3840)
 			tile_width *= 4;
 		/*
 		 * The main surface pitch must be padded to a multiple of four
@@ -2876,11 +2939,15 @@ intel_fb_check_ccs_xy(struct drm_framebuffer *fb, int ccs_plane, int x, int y)
 static void
 intel_fb_plane_dims(int *w, int *h, struct drm_framebuffer *fb, int color_plane)
 {
+	int main_plane = is_ccs_plane(fb, color_plane) ?
+			 ccs_to_main_plane(fb, color_plane) : 0;
+	int main_hsub, main_vsub;
 	int hsub, vsub;
 
+	intel_fb_plane_get_subsampling(&main_hsub, &main_vsub, fb, main_plane);
 	intel_fb_plane_get_subsampling(&hsub, &vsub, fb, color_plane);
-	*w = fb->width / hsub;
-	*h = fb->height / vsub;
+	*w = fb->width / main_hsub / hsub;
+	*h = fb->height / main_vsub / vsub;
 }
 
 /*
@@ -3598,6 +3665,7 @@ static int skl_max_plane_width(const struct drm_framebuffer *fb,
 			return 5120;
 	case I915_FORMAT_MOD_Y_TILED_CCS:
 	case I915_FORMAT_MOD_Yf_TILED_CCS:
+	case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS:
 		/* FIXME AUX plane? */
 	case I915_FORMAT_MOD_Y_TILED:
 	case I915_FORMAT_MOD_Yf_TILED:
@@ -3656,11 +3724,12 @@ static int icl_max_plane_height(void)
 	return 4320;
 }
 
-static bool skl_check_main_ccs_coordinates(struct intel_plane_state *plane_state,
-					   int main_x, int main_y, u32 main_offset)
+static bool
+skl_check_main_ccs_coordinates(struct intel_plane_state *plane_state,
+			       int main_x, int main_y, u32 main_offset,
+			       int ccs_plane)
 {
 	const struct drm_framebuffer *fb = plane_state->hw.fb;
-	int ccs_plane = main_to_ccs_plane(fb, 0);
 	int aux_x = plane_state->color_plane[ccs_plane].x;
 	int aux_y = plane_state->color_plane[ccs_plane].y;
 	u32 aux_offset = plane_state->color_plane[ccs_plane].offset;
@@ -3737,6 +3806,8 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state)
 	intel_add_fb_offsets(&x, &y, plane_state, 0);
 	offset = intel_plane_compute_aligned_offset(&x, &y, plane_state, 0);
 	alignment = intel_surf_alignment(fb, 0);
+	if (WARN_ON(alignment && !is_power_of_2(alignment)))
+		return -EINVAL;
 
 	/*
 	 * AUX surface offset is specified as the distance from the
@@ -3772,7 +3843,8 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state)
 	 * they match with the main surface x/y offsets.
 	 */
 	if (is_ccs_modifier(fb->modifier)) {
-		while (!skl_check_main_ccs_coordinates(plane_state, x, y, offset)) {
+		while (!skl_check_main_ccs_coordinates(plane_state, x, y,
+						       offset, aux_plane)) {
 			if (offset == 0)
 				break;
 
@@ -3805,7 +3877,8 @@ static int skl_check_nv12_aux_surface(struct intel_plane_state *plane_state)
 {
 	const struct drm_framebuffer *fb = plane_state->hw.fb;
 	unsigned int rotation = plane_state->hw.rotation;
-	int max_width = skl_max_plane_width(fb, 1, rotation);
+	int uv_plane = 1;
+	int max_width = skl_max_plane_width(fb, uv_plane, rotation);
 	int max_height = 4096;
 	int x = plane_state->uapi.src.x1 >> 17;
 	int y = plane_state->uapi.src.y1 >> 17;
@@ -3813,8 +3886,9 @@ static int skl_check_nv12_aux_surface(struct intel_plane_state *plane_state)
 	int h = drm_rect_height(&plane_state->uapi.src) >> 17;
 	u32 offset;
 
-	intel_add_fb_offsets(&x, &y, plane_state, 1);
-	offset = intel_plane_compute_aligned_offset(&x, &y, plane_state, 1);
+	intel_add_fb_offsets(&x, &y, plane_state, uv_plane);
+	offset = intel_plane_compute_aligned_offset(&x, &y,
+						    plane_state, uv_plane);
 
 	/* FIXME not quite sure how/if these apply to the chroma plane */
 	if (w > max_width || h > max_height) {
@@ -3823,9 +3897,39 @@ static int skl_check_nv12_aux_surface(struct intel_plane_state *plane_state)
 		return -EINVAL;
 	}
 
-	plane_state->color_plane[1].offset = offset;
-	plane_state->color_plane[1].x = x;
-	plane_state->color_plane[1].y = y;
+	if (is_ccs_modifier(fb->modifier)) {
+		int ccs_plane = main_to_ccs_plane(fb, uv_plane);
+		int aux_offset = plane_state->color_plane[ccs_plane].offset;
+		int alignment = intel_surf_alignment(fb, uv_plane);
+
+		if (offset > aux_offset)
+			offset = intel_plane_adjust_aligned_offset(&x, &y,
+								   plane_state,
+								   uv_plane,
+								   offset,
+								   aux_offset & ~(alignment - 1));
+
+		while (!skl_check_main_ccs_coordinates(plane_state, x, y,
+						       offset, ccs_plane)) {
+			if (offset == 0)
+				break;
+
+			offset = intel_plane_adjust_aligned_offset(&x, &y,
+								   plane_state,
+								   uv_plane,
+								   offset, offset - alignment);
+		}
+
+		if (x != plane_state->color_plane[ccs_plane].x ||
+		    y != plane_state->color_plane[ccs_plane].y) {
+			DRM_DEBUG_KMS("Unable to find suitable display surface offset due to CCS\n");
+			return -EINVAL;
+		}
+	}
+
+	plane_state->color_plane[uv_plane].offset = offset;
+	plane_state->color_plane[uv_plane].x = x;
+	plane_state->color_plane[uv_plane].y = y;
 
 	return 0;
 }
@@ -3835,21 +3939,40 @@ static int skl_check_ccs_aux_surface(struct intel_plane_state *plane_state)
 	const struct drm_framebuffer *fb = plane_state->hw.fb;
 	int src_x = plane_state->uapi.src.x1 >> 16;
 	int src_y = plane_state->uapi.src.y1 >> 16;
-	int hsub;
-	int vsub;
-	int x;
-	int y;
 	u32 offset;
+	int ccs_plane;
+
+	for (ccs_plane = 0; ccs_plane < fb->format->num_planes; ccs_plane++) {
+		int main_hsub, main_vsub;
+		int hsub, vsub;
+		int x, y;
 
-	intel_fb_plane_get_subsampling(&hsub, &vsub, fb, 1);
-	x = src_x / hsub;
-	y = src_y / vsub;
-	intel_add_fb_offsets(&x, &y, plane_state, 1);
-	offset = intel_plane_compute_aligned_offset(&x, &y, plane_state, 1);
+		if (!is_ccs_plane(fb, ccs_plane))
+			continue;
+
+		intel_fb_plane_get_subsampling(&main_hsub, &main_vsub, fb,
+					       ccs_to_main_plane(fb, ccs_plane));
+		intel_fb_plane_get_subsampling(&hsub, &vsub, fb, ccs_plane);
 
-	plane_state->color_plane[1].offset = offset;
-	plane_state->color_plane[1].x = x * hsub + src_x % hsub;
-	plane_state->color_plane[1].y = y * vsub + src_y % vsub;
+		hsub *= main_hsub;
+		vsub *= main_vsub;
+		x = src_x / hsub;
+		y = src_y / vsub;
+
+		intel_add_fb_offsets(&x, &y, plane_state, ccs_plane);
+
+		offset = intel_plane_compute_aligned_offset(&x, &y,
+							    plane_state,
+							    ccs_plane);
+
+		plane_state->color_plane[ccs_plane].offset = offset;
+		plane_state->color_plane[ccs_plane].x = (x * hsub +
+							 src_x % hsub) /
+							main_hsub;
+		plane_state->color_plane[ccs_plane].y = (y * vsub +
+							 src_y % vsub) /
+							main_vsub;
+	}
 
 	return 0;
 }
@@ -3858,6 +3981,7 @@ int skl_check_plane_surface(struct intel_plane_state *plane_state)
 {
 	const struct drm_framebuffer *fb = plane_state->hw.fb;
 	int ret;
+	bool needs_aux = false;
 
 	ret = intel_plane_compute_gtt(plane_state);
 	if (ret)
@@ -3867,22 +3991,32 @@ int skl_check_plane_surface(struct intel_plane_state *plane_state)
 		return 0;
 
 	/*
-	 * Handle the AUX surface first since
-	 * the main surface setup depends on it.
+	 * Handle the AUX surface first since the main surface setup depends on
+	 * it.
 	 */
+	if (is_ccs_modifier(fb->modifier)) {
+		needs_aux = true;
+		ret = skl_check_ccs_aux_surface(plane_state);
+		if (ret)
+			return ret;
+	}
+
 	if (intel_format_info_is_yuv_semiplanar(fb->format,
 						fb->modifier)) {
+		needs_aux = true;
 		ret = skl_check_nv12_aux_surface(plane_state);
 		if (ret)
 			return ret;
-	} else if (is_ccs_modifier(fb->modifier)) {
-		ret = skl_check_ccs_aux_surface(plane_state);
-		if (ret)
-			return ret;
-	} else {
-		plane_state->color_plane[1].offset = ~0xfff;
-		plane_state->color_plane[1].x = 0;
-		plane_state->color_plane[1].y = 0;
+	}
+
+	if (!needs_aux) {
+		int i;
+
+		for (i = 1; i < fb->format->num_planes; i++) {
+			plane_state->color_plane[i].offset = ~0xfff;
+			plane_state->color_plane[i].x = 0;
+			plane_state->color_plane[i].y = 0;
+		}
 	}
 
 	ret = skl_check_main_surface(plane_state);
@@ -4472,6 +4606,8 @@ static u32 skl_plane_ctl_tiling(u64 fb_modifier)
 		return PLANE_CTL_TILED_Y |
 		       PLANE_CTL_RENDER_DECOMPRESSION_ENABLE |
 		       PLANE_CTL_CLEAR_COLOR_DISABLE;
+	case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS:
+		return PLANE_CTL_TILED_Y | PLANE_CTL_MEDIA_DECOMPRESSION_ENABLE;
 	case I915_FORMAT_MOD_Yf_TILED:
 		return PLANE_CTL_TILED_YF;
 	case I915_FORMAT_MOD_Yf_TILED_CCS:
@@ -4869,8 +5005,8 @@ static void intel_fdi_normal_train(struct intel_crtc *crtc)
 }
 
 /* The FDI link training functions for ILK/Ibexpeak. */
-static void ironlake_fdi_link_train(struct intel_crtc *crtc,
-				    const struct intel_crtc_state *crtc_state)
+static void ilk_fdi_link_train(struct intel_crtc *crtc,
+			       const struct intel_crtc_state *crtc_state)
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
@@ -5222,7 +5358,7 @@ train_done:
 	DRM_DEBUG_KMS("FDI train done.\n");
 }
 
-static void ironlake_fdi_pll_enable(const struct intel_crtc_state *crtc_state)
+static void ilk_fdi_pll_enable(const struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev);
@@ -5259,7 +5395,7 @@ static void ironlake_fdi_pll_enable(const struct intel_crtc_state *crtc_state)
 	}
 }
 
-static void ironlake_fdi_pll_disable(struct intel_crtc *intel_crtc)
+static void ilk_fdi_pll_disable(struct intel_crtc *intel_crtc)
 {
 	struct drm_device *dev = intel_crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
@@ -5289,7 +5425,7 @@ static void ironlake_fdi_pll_disable(struct intel_crtc *intel_crtc)
 	udelay(100);
 }
 
-static void ironlake_fdi_disable(struct intel_crtc *crtc)
+static void ilk_fdi_disable(struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
@@ -5496,8 +5632,8 @@ int lpt_get_iclkip(struct drm_i915_private *dev_priv)
 				 desired_divisor << auxdiv);
 }
 
-static void ironlake_pch_transcoder_set_timings(const struct intel_crtc_state *crtc_state,
-						enum pipe pch_transcoder)
+static void ilk_pch_transcoder_set_timings(const struct intel_crtc_state *crtc_state,
+					   enum pipe pch_transcoder)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
@@ -5540,7 +5676,7 @@ static void cpt_set_fdi_bc_bifurcation(struct drm_i915_private *dev_priv, bool e
 	POSTING_READ(SOUTH_CHICKEN1);
 }
 
-static void ivybridge_update_fdi_bc_bifurcation(const struct intel_crtc_state *crtc_state)
+static void ivb_update_fdi_bc_bifurcation(const struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
@@ -5601,8 +5737,8 @@ intel_get_crtc_new_encoder(const struct intel_atomic_state *state,
  *   - DP transcoding bits
  *   - transcoder
  */
-static void ironlake_pch_enable(const struct intel_atomic_state *state,
-				const struct intel_crtc_state *crtc_state)
+static void ilk_pch_enable(const struct intel_atomic_state *state,
+			   const struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_device *dev = crtc->base.dev;
@@ -5613,7 +5749,7 @@ static void ironlake_pch_enable(const struct intel_atomic_state *state,
 	assert_pch_transcoder_disabled(dev_priv, pipe);
 
 	if (IS_IVYBRIDGE(dev_priv))
-		ivybridge_update_fdi_bc_bifurcation(crtc_state);
+		ivb_update_fdi_bc_bifurcation(crtc_state);
 
 	/* Write the TU size bits before fdi link training, so that error
 	 * detection works. */
@@ -5650,7 +5786,7 @@ static void ironlake_pch_enable(const struct intel_atomic_state *state,
 
 	/* set transcoder timing, panel must allow it */
 	assert_panel_unlocked(dev_priv, pipe);
-	ironlake_pch_transcoder_set_timings(crtc_state, pipe);
+	ilk_pch_transcoder_set_timings(crtc_state, pipe);
 
 	intel_fdi_normal_train(crtc);
 
@@ -5682,7 +5818,7 @@ static void ironlake_pch_enable(const struct intel_atomic_state *state,
 		I915_WRITE(reg, temp);
 	}
 
-	ironlake_enable_pch_transcoder(crtc_state);
+	ilk_enable_pch_transcoder(crtc_state);
 }
 
 static void lpt_pch_enable(const struct intel_atomic_state *state,
@@ -5697,7 +5833,7 @@ static void lpt_pch_enable(const struct intel_atomic_state *state,
 	lpt_program_iclkip(crtc_state);
 
 	/* Set transcoder timing. */
-	ironlake_pch_transcoder_set_timings(crtc_state, PIPE_A);
+	ilk_pch_transcoder_set_timings(crtc_state, PIPE_A);
 
 	lpt_enable_pch_transcoder(dev_priv, cpu_transcoder);
 }
@@ -6001,7 +6137,7 @@ static int skl_update_scaler_plane(struct intel_crtc_state *crtc_state,
 	return 0;
 }
 
-void skylake_scaler_disable(const struct intel_crtc_state *old_crtc_state)
+void skl_scaler_disable(const struct intel_crtc_state *old_crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc);
 	int i;
@@ -6010,7 +6146,7 @@ void skylake_scaler_disable(const struct intel_crtc_state *old_crtc_state)
 		skl_detach_scaler(crtc, i);
 }
 
-static void skylake_pfit_enable(const struct intel_crtc_state *crtc_state)
+static void skl_pfit_enable(const struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
@@ -6047,7 +6183,7 @@ static void skylake_pfit_enable(const struct intel_crtc_state *crtc_state)
 	}
 }
 
-static void ironlake_pfit_enable(const struct intel_crtc_state *crtc_state)
+static void ilk_pfit_enable(const struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
@@ -6411,45 +6547,29 @@ intel_connector_primary_encoder(struct intel_connector *connector)
 	if (connector->mst_port)
 		return &dp_to_dig_port(connector->mst_port)->base;
 
-	encoder = intel_attached_encoder(&connector->base);
+	encoder = intel_attached_encoder(connector);
 	WARN_ON(!encoder);
 
 	return encoder;
 }
 
-static bool
-intel_connector_needs_modeset(struct intel_atomic_state *state,
-			      const struct drm_connector_state *old_conn_state,
-			      const struct drm_connector_state *new_conn_state)
-{
-	struct intel_crtc *old_crtc = old_conn_state->crtc ?
-				      to_intel_crtc(old_conn_state->crtc) : NULL;
-	struct intel_crtc *new_crtc = new_conn_state->crtc ?
-				      to_intel_crtc(new_conn_state->crtc) : NULL;
-
-	return new_crtc != old_crtc ||
-	       (new_crtc &&
-		needs_modeset(intel_atomic_get_new_crtc_state(state, new_crtc)));
-}
-
 static void intel_encoders_update_prepare(struct intel_atomic_state *state)
 {
-	struct drm_connector_state *old_conn_state;
 	struct drm_connector_state *new_conn_state;
-	struct drm_connector *conn;
+	struct drm_connector *connector;
 	int i;
 
-	for_each_oldnew_connector_in_state(&state->base, conn,
-					   old_conn_state, new_conn_state, i) {
+	for_each_new_connector_in_state(&state->base, connector, new_conn_state,
+					i) {
+		struct intel_connector *intel_connector;
 		struct intel_encoder *encoder;
 		struct intel_crtc *crtc;
 
-		if (!intel_connector_needs_modeset(state,
-						   old_conn_state,
-						   new_conn_state))
+		if (!intel_connector_needs_modeset(state, connector))
 			continue;
 
-		encoder = intel_connector_primary_encoder(to_intel_connector(conn));
+		intel_connector = to_intel_connector(connector);
+		encoder = intel_connector_primary_encoder(intel_connector);
 		if (!encoder->update_prepare)
 			continue;
 
@@ -6461,22 +6581,21 @@ static void intel_encoders_update_prepare(struct intel_atomic_state *state)
 
 static void intel_encoders_update_complete(struct intel_atomic_state *state)
 {
-	struct drm_connector_state *old_conn_state;
 	struct drm_connector_state *new_conn_state;
-	struct drm_connector *conn;
+	struct drm_connector *connector;
 	int i;
 
-	for_each_oldnew_connector_in_state(&state->base, conn,
-					   old_conn_state, new_conn_state, i) {
+	for_each_new_connector_in_state(&state->base, connector, new_conn_state,
+					i) {
+		struct intel_connector *intel_connector;
 		struct intel_encoder *encoder;
 		struct intel_crtc *crtc;
 
-		if (!intel_connector_needs_modeset(state,
-						   old_conn_state,
-						   new_conn_state))
+		if (!intel_connector_needs_modeset(state, connector))
 			continue;
 
-		encoder = intel_connector_primary_encoder(to_intel_connector(conn));
+		intel_connector = to_intel_connector(connector);
+		encoder = intel_connector_primary_encoder(intel_connector);
 		if (!encoder->update_complete)
 			continue;
 
@@ -6643,8 +6762,8 @@ static void intel_disable_primary_plane(const struct intel_crtc_state *crtc_stat
 	plane->disable_plane(plane, crtc_state);
 }
 
-static void ironlake_crtc_enable(struct intel_atomic_state *state,
-				 struct intel_crtc *crtc)
+static void ilk_crtc_enable(struct intel_atomic_state *state,
+			    struct intel_crtc *crtc)
 {
 	const struct intel_crtc_state *new_crtc_state =
 		intel_atomic_get_new_crtc_state(state, crtc);
@@ -6680,7 +6799,7 @@ static void ironlake_crtc_enable(struct intel_atomic_state *state,
 		intel_cpu_transcoder_set_m_n(new_crtc_state,
 					     &new_crtc_state->fdi_m_n, NULL);
 
-	ironlake_set_pipeconf(new_crtc_state);
+	ilk_set_pipeconf(new_crtc_state);
 
 	crtc->active = true;
 
@@ -6690,13 +6809,13 @@ static void ironlake_crtc_enable(struct intel_atomic_state *state,
 		/* Note: FDI PLL enabling _must_ be done before we enable the
 		 * cpu pipes, hence this is separate from all the other fdi/pch
 		 * enabling. */
-		ironlake_fdi_pll_enable(new_crtc_state);
+		ilk_fdi_pll_enable(new_crtc_state);
 	} else {
 		assert_fdi_tx_disabled(dev_priv, pipe);
 		assert_fdi_rx_disabled(dev_priv, pipe);
 	}
 
-	ironlake_pfit_enable(new_crtc_state);
+	ilk_pfit_enable(new_crtc_state);
 
 	/*
 	 * On ILK+ LUT must be loaded before the pipe is running but with
@@ -6712,7 +6831,7 @@ static void ironlake_crtc_enable(struct intel_atomic_state *state,
 	intel_enable_pipe(new_crtc_state);
 
 	if (new_crtc_state->has_pch_encoder)
-		ironlake_pch_enable(state, new_crtc_state);
+		ilk_pch_enable(state, new_crtc_state);
 
 	intel_crtc_vblank_on(new_crtc_state);
 
@@ -6787,8 +6906,8 @@ static void hsw_set_frame_start_delay(const struct intel_crtc_state *crtc_state)
 	I915_WRITE(reg, val);
 }
 
-static void haswell_crtc_enable(struct intel_atomic_state *state,
-				struct intel_crtc *crtc)
+static void hsw_crtc_enable(struct intel_atomic_state *state,
+			    struct intel_crtc *crtc)
 {
 	const struct intel_crtc_state *new_crtc_state =
 		intel_atomic_get_new_crtc_state(state, crtc);
@@ -6829,7 +6948,7 @@ static void haswell_crtc_enable(struct intel_atomic_state *state,
 
 	if (!transcoder_is_dsi(cpu_transcoder)) {
 		hsw_set_frame_start_delay(new_crtc_state);
-		haswell_set_pipeconf(new_crtc_state);
+		hsw_set_pipeconf(new_crtc_state);
 	}
 
 	if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv))
@@ -6844,9 +6963,9 @@ static void haswell_crtc_enable(struct intel_atomic_state *state,
 		glk_pipe_scaler_clock_gating_wa(dev_priv, pipe, true);
 
 	if (INTEL_GEN(dev_priv) >= 9)
-		skylake_pfit_enable(new_crtc_state);
+		skl_pfit_enable(new_crtc_state);
 	else
-		ironlake_pfit_enable(new_crtc_state);
+		ilk_pfit_enable(new_crtc_state);
 
 	/*
 	 * On ILK+ LUT must be loaded before the pipe is running but with
@@ -6895,7 +7014,7 @@ static void haswell_crtc_enable(struct intel_atomic_state *state,
 	}
 }
 
-void ironlake_pfit_disable(const struct intel_crtc_state *old_crtc_state)
+void ilk_pfit_disable(const struct intel_crtc_state *old_crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
@@ -6910,8 +7029,8 @@ void ironlake_pfit_disable(const struct intel_crtc_state *old_crtc_state)
 	}
 }
 
-static void ironlake_crtc_disable(struct intel_atomic_state *state,
-				  struct intel_crtc *crtc)
+static void ilk_crtc_disable(struct intel_atomic_state *state,
+			     struct intel_crtc *crtc)
 {
 	const struct intel_crtc_state *old_crtc_state =
 		intel_atomic_get_old_crtc_state(state, crtc);
@@ -6932,15 +7051,15 @@ static void ironlake_crtc_disable(struct intel_atomic_state *state,
 
 	intel_disable_pipe(old_crtc_state);
 
-	ironlake_pfit_disable(old_crtc_state);
+	ilk_pfit_disable(old_crtc_state);
 
 	if (old_crtc_state->has_pch_encoder)
-		ironlake_fdi_disable(crtc);
+		ilk_fdi_disable(crtc);
 
 	intel_encoders_post_disable(state, crtc);
 
 	if (old_crtc_state->has_pch_encoder) {
-		ironlake_disable_pch_transcoder(dev_priv, pipe);
+		ilk_disable_pch_transcoder(dev_priv, pipe);
 
 		if (HAS_PCH_CPT(dev_priv)) {
 			i915_reg_t reg;
@@ -6960,15 +7079,15 @@ static void ironlake_crtc_disable(struct intel_atomic_state *state,
 			I915_WRITE(PCH_DPLL_SEL, temp);
 		}
 
-		ironlake_fdi_pll_disable(crtc);
+		ilk_fdi_pll_disable(crtc);
 	}
 
 	intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true);
 	intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, true);
 }
 
-static void haswell_crtc_disable(struct intel_atomic_state *state,
-				 struct intel_crtc *crtc)
+static void hsw_crtc_disable(struct intel_atomic_state *state,
+			     struct intel_crtc *crtc)
 {
 	/*
 	 * FIXME collapse everything to one hook.
@@ -7505,8 +7624,8 @@ static int pipe_required_fdi_lanes(struct intel_crtc_state *crtc_state)
 	return 0;
 }
 
-static int ironlake_check_fdi_lanes(struct drm_device *dev, enum pipe pipe,
-				     struct intel_crtc_state *pipe_config)
+static int ilk_check_fdi_lanes(struct drm_device *dev, enum pipe pipe,
+			       struct intel_crtc_state *pipe_config)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_atomic_state *state = pipe_config->uapi.state;
@@ -7578,8 +7697,8 @@ static int ironlake_check_fdi_lanes(struct drm_device *dev, enum pipe pipe,
 }
 
 #define RETRY 1
-static int ironlake_fdi_compute_config(struct intel_crtc *intel_crtc,
-				       struct intel_crtc_state *pipe_config)
+static int ilk_fdi_compute_config(struct intel_crtc *intel_crtc,
+				  struct intel_crtc_state *pipe_config)
 {
 	struct drm_device *dev = intel_crtc->base.dev;
 	const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
@@ -7598,15 +7717,15 @@ retry:
 
 	fdi_dotclock = adjusted_mode->crtc_clock;
 
-	lane = ironlake_get_lanes_required(fdi_dotclock, link_bw,
-					   pipe_config->pipe_bpp);
+	lane = ilk_get_lanes_required(fdi_dotclock, link_bw,
+				      pipe_config->pipe_bpp);
 
 	pipe_config->fdi_lanes = lane;
 
 	intel_link_compute_m_n(pipe_config->pipe_bpp, lane, fdi_dotclock,
 			       link_bw, &pipe_config->fdi_m_n, false, false);
 
-	ret = ironlake_check_fdi_lanes(dev, intel_crtc->pipe, pipe_config);
+	ret = ilk_check_fdi_lanes(dev, intel_crtc->pipe, pipe_config);
 	if (ret == -EDEADLK)
 		return ret;
 
@@ -7812,7 +7931,7 @@ static int intel_crtc_compute_config(struct intel_crtc *crtc,
 	intel_crtc_compute_pixel_rate(pipe_config);
 
 	if (pipe_config->has_pch_encoder)
-		return ironlake_fdi_compute_config(crtc, pipe_config);
+		return ilk_fdi_compute_config(crtc, pipe_config);
 
 	return 0;
 }
@@ -8795,9 +8914,9 @@ static int pnv_crtc_compute_clock(struct intel_crtc *crtc,
 			DRM_DEBUG_KMS("using SSC reference clock of %d kHz\n", refclk);
 		}
 
-		limit = &intel_limits_pineview_lvds;
+		limit = &pnv_limits_lvds;
 	} else {
-		limit = &intel_limits_pineview_sdvo;
+		limit = &pnv_limits_sdvo;
 	}
 
 	if (!crtc_state->clock_set &&
@@ -9224,7 +9343,7 @@ out:
 	return ret;
 }
 
-static void ironlake_init_pch_refclk(struct drm_i915_private *dev_priv)
+static void ilk_init_pch_refclk(struct drm_i915_private *dev_priv)
 {
 	struct intel_encoder *encoder;
 	int i;
@@ -9722,12 +9841,12 @@ static void lpt_init_pch_refclk(struct drm_i915_private *dev_priv)
 void intel_init_pch_refclk(struct drm_i915_private *dev_priv)
 {
 	if (HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv))
-		ironlake_init_pch_refclk(dev_priv);
+		ilk_init_pch_refclk(dev_priv);
 	else if (HAS_PCH_LPT(dev_priv))
 		lpt_init_pch_refclk(dev_priv);
 }
 
-static void ironlake_set_pipeconf(const struct intel_crtc_state *crtc_state)
+static void ilk_set_pipeconf(const struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
@@ -9783,7 +9902,7 @@ static void ironlake_set_pipeconf(const struct intel_crtc_state *crtc_state)
 	POSTING_READ(PIPECONF(pipe));
 }
 
-static void haswell_set_pipeconf(const struct intel_crtc_state *crtc_state)
+static void hsw_set_pipeconf(const struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
@@ -9871,7 +9990,7 @@ int bdw_get_pipemisc_bpp(struct intel_crtc *crtc)
 	}
 }
 
-int ironlake_get_lanes_required(int target_clock, int link_bw, int bpp)
+int ilk_get_lanes_required(int target_clock, int link_bw, int bpp)
 {
 	/*
 	 * Account for spread spectrum to avoid
@@ -9882,14 +10001,14 @@ int ironlake_get_lanes_required(int target_clock, int link_bw, int bpp)
 	return DIV_ROUND_UP(bps, link_bw * 8);
 }
 
-static bool ironlake_needs_fb_cb_tune(struct dpll *dpll, int factor)
+static bool ilk_needs_fb_cb_tune(struct dpll *dpll, int factor)
 {
 	return i9xx_dpll_compute_m(dpll) < factor * dpll->n;
 }
 
-static void ironlake_compute_dpll(struct intel_crtc *crtc,
-				  struct intel_crtc_state *crtc_state,
-				  struct dpll *reduced_clock)
+static void ilk_compute_dpll(struct intel_crtc *crtc,
+			     struct intel_crtc_state *crtc_state,
+			     struct dpll *reduced_clock)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	u32 dpll, fp, fp2;
@@ -9909,7 +10028,7 @@ static void ironlake_compute_dpll(struct intel_crtc *crtc,
 
 	fp = i9xx_dpll_compute_fp(&crtc_state->dpll);
 
-	if (ironlake_needs_fb_cb_tune(&crtc_state->dpll, factor))
+	if (ilk_needs_fb_cb_tune(&crtc_state->dpll, factor))
 		fp |= FP_CB_TUNE;
 
 	if (reduced_clock) {
@@ -9989,8 +10108,8 @@ static void ironlake_compute_dpll(struct intel_crtc *crtc,
 	crtc_state->dpll_hw_state.fp1 = fp2;
 }
 
-static int ironlake_crtc_compute_clock(struct intel_crtc *crtc,
-				       struct intel_crtc_state *crtc_state)
+static int ilk_crtc_compute_clock(struct intel_crtc *crtc,
+				  struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	struct intel_atomic_state *state =
@@ -10014,17 +10133,17 @@ static int ironlake_crtc_compute_clock(struct intel_crtc *crtc,
 
 		if (intel_is_dual_link_lvds(dev_priv)) {
 			if (refclk == 100000)
-				limit = &intel_limits_ironlake_dual_lvds_100m;
+				limit = &ilk_limits_dual_lvds_100m;
 			else
-				limit = &intel_limits_ironlake_dual_lvds;
+				limit = &ilk_limits_dual_lvds;
 		} else {
 			if (refclk == 100000)
-				limit = &intel_limits_ironlake_single_lvds_100m;
+				limit = &ilk_limits_single_lvds_100m;
 			else
-				limit = &intel_limits_ironlake_single_lvds;
+				limit = &ilk_limits_single_lvds;
 		}
 	} else {
-		limit = &intel_limits_ironlake_dac;
+		limit = &ilk_limits_dac;
 	}
 
 	if (!crtc_state->clock_set &&
@@ -10034,7 +10153,7 @@ static int ironlake_crtc_compute_clock(struct intel_crtc *crtc,
 		return -EINVAL;
 	}
 
-	ironlake_compute_dpll(crtc, crtc_state, NULL);
+	ilk_compute_dpll(crtc, crtc_state, NULL);
 
 	if (!intel_reserve_shared_dplls(state, crtc, NULL)) {
 		DRM_DEBUG_KMS("failed to find PLL for pipe %c\n",
@@ -10109,15 +10228,15 @@ void intel_dp_get_m_n(struct intel_crtc *crtc,
 					     &pipe_config->dp_m2_n2);
 }
 
-static void ironlake_get_fdi_m_n_config(struct intel_crtc *crtc,
-					struct intel_crtc_state *pipe_config)
+static void ilk_get_fdi_m_n_config(struct intel_crtc *crtc,
+				   struct intel_crtc_state *pipe_config)
 {
 	intel_cpu_transcoder_get_m_n(crtc, pipe_config->cpu_transcoder,
 				     &pipe_config->fdi_m_n, NULL);
 }
 
-static void skylake_get_pfit_config(struct intel_crtc *crtc,
-				    struct intel_crtc_state *pipe_config)
+static void skl_get_pfit_config(struct intel_crtc *crtc,
+				struct intel_crtc_state *pipe_config)
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
@@ -10148,8 +10267,8 @@ static void skylake_get_pfit_config(struct intel_crtc *crtc,
 }
 
 static void
-skylake_get_initial_plane_config(struct intel_crtc *crtc,
-				 struct intel_initial_plane_config *plane_config)
+skl_get_initial_plane_config(struct intel_crtc *crtc,
+			     struct intel_initial_plane_config *plane_config)
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
@@ -10210,6 +10329,8 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc,
 			fb->modifier = INTEL_GEN(dev_priv) >= 12 ?
 				I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS :
 				I915_FORMAT_MOD_Y_TILED_CCS;
+		else if (val & PLANE_CTL_MEDIA_DECOMPRESSION_ENABLE)
+			fb->modifier = I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS;
 		else
 			fb->modifier = I915_FORMAT_MOD_Y_TILED;
 		break;
@@ -10276,8 +10397,8 @@ error:
 	kfree(intel_fb);
 }
 
-static void ironlake_get_pfit_config(struct intel_crtc *crtc,
-				     struct intel_crtc_state *pipe_config)
+static void ilk_get_pfit_config(struct intel_crtc *crtc,
+				struct intel_crtc_state *pipe_config)
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
@@ -10300,8 +10421,8 @@ static void ironlake_get_pfit_config(struct intel_crtc *crtc,
 	}
 }
 
-static bool ironlake_get_pipe_config(struct intel_crtc *crtc,
-				     struct intel_crtc_state *pipe_config)
+static bool ilk_get_pipe_config(struct intel_crtc *crtc,
+				struct intel_crtc_state *pipe_config)
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
@@ -10372,7 +10493,7 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc,
 		pipe_config->fdi_lanes = ((FDI_DP_PORT_WIDTH_MASK & tmp) >>
 					  FDI_DP_PORT_WIDTH_SHIFT) + 1;
 
-		ironlake_get_fdi_m_n_config(crtc, pipe_config);
+		ilk_get_fdi_m_n_config(crtc, pipe_config);
 
 		if (HAS_PCH_IBX(dev_priv)) {
 			/*
@@ -10400,7 +10521,7 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc,
 			((tmp & PLL_REF_SDVO_HDMI_MULTIPLIER_MASK)
 			 >> PLL_REF_SDVO_HDMI_MULTIPLIER_SHIFT) + 1;
 
-		ironlake_pch_clock_get(crtc, pipe_config);
+		ilk_pch_clock_get(crtc, pipe_config);
 	} else {
 		pipe_config->pixel_multiplier = 1;
 	}
@@ -10408,7 +10529,7 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc,
 	intel_get_pipe_timings(crtc, pipe_config);
 	intel_get_pipe_src_size(crtc, pipe_config);
 
-	ironlake_get_pfit_config(crtc, pipe_config);
+	ilk_get_pfit_config(crtc, pipe_config);
 
 	ret = true;
 
@@ -10417,8 +10538,9 @@ out:
 
 	return ret;
 }
-static int haswell_crtc_compute_clock(struct intel_crtc *crtc,
-				      struct intel_crtc_state *crtc_state)
+
+static int hsw_crtc_compute_clock(struct intel_crtc *crtc,
+				  struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	struct intel_atomic_state *state =
@@ -10439,9 +10561,8 @@ static int haswell_crtc_compute_clock(struct intel_crtc *crtc,
 	return 0;
 }
 
-static void cannonlake_get_ddi_pll(struct drm_i915_private *dev_priv,
-				   enum port port,
-				   struct intel_crtc_state *pipe_config)
+static void cnl_get_ddi_pll(struct drm_i915_private *dev_priv, enum port port,
+			    struct intel_crtc_state *pipe_config)
 {
 	enum intel_dpll_id id;
 	u32 temp;
@@ -10455,9 +10576,8 @@ static void cannonlake_get_ddi_pll(struct drm_i915_private *dev_priv,
 	pipe_config->shared_dpll = intel_get_shared_dpll_by_id(dev_priv, id);
 }
 
-static void icelake_get_ddi_pll(struct drm_i915_private *dev_priv,
-				enum port port,
-				struct intel_crtc_state *pipe_config)
+static void icl_get_ddi_pll(struct drm_i915_private *dev_priv, enum port port,
+			    struct intel_crtc_state *pipe_config)
 {
 	enum phy phy = intel_port_to_phy(dev_priv, port);
 	enum icl_port_dpll_id port_dpll_id;
@@ -10516,9 +10636,8 @@ static void bxt_get_ddi_pll(struct drm_i915_private *dev_priv,
 	pipe_config->shared_dpll = intel_get_shared_dpll_by_id(dev_priv, id);
 }
 
-static void skylake_get_ddi_pll(struct drm_i915_private *dev_priv,
-				enum port port,
-				struct intel_crtc_state *pipe_config)
+static void skl_get_ddi_pll(struct drm_i915_private *dev_priv, enum port port,
+			    struct intel_crtc_state *pipe_config)
 {
 	enum intel_dpll_id id;
 	u32 temp;
@@ -10532,9 +10651,8 @@ static void skylake_get_ddi_pll(struct drm_i915_private *dev_priv,
 	pipe_config->shared_dpll = intel_get_shared_dpll_by_id(dev_priv, id);
 }
 
-static void haswell_get_ddi_pll(struct drm_i915_private *dev_priv,
-				enum port port,
-				struct intel_crtc_state *pipe_config)
+static void hsw_get_ddi_pll(struct drm_i915_private *dev_priv, enum port port,
+			    struct intel_crtc_state *pipe_config)
 {
 	enum intel_dpll_id id;
 	u32 ddi_pll_sel = I915_READ(PORT_CLK_SEL(port));
@@ -10722,8 +10840,8 @@ static bool bxt_get_dsi_transcoder_state(struct intel_crtc *crtc,
 	return transcoder_is_dsi(pipe_config->cpu_transcoder);
 }
 
-static void haswell_get_ddi_port_state(struct intel_crtc *crtc,
-				       struct intel_crtc_state *pipe_config)
+static void hsw_get_ddi_port_state(struct intel_crtc *crtc,
+				   struct intel_crtc_state *pipe_config)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum transcoder cpu_transcoder = pipe_config->cpu_transcoder;
@@ -10743,15 +10861,15 @@ static void haswell_get_ddi_port_state(struct intel_crtc *crtc,
 	}
 
 	if (INTEL_GEN(dev_priv) >= 11)
-		icelake_get_ddi_pll(dev_priv, port, pipe_config);
+		icl_get_ddi_pll(dev_priv, port, pipe_config);
 	else if (IS_CANNONLAKE(dev_priv))
-		cannonlake_get_ddi_pll(dev_priv, port, pipe_config);
+		cnl_get_ddi_pll(dev_priv, port, pipe_config);
 	else if (IS_GEN9_BC(dev_priv))
-		skylake_get_ddi_pll(dev_priv, port, pipe_config);
+		skl_get_ddi_pll(dev_priv, port, pipe_config);
 	else if (IS_GEN9_LP(dev_priv))
 		bxt_get_ddi_pll(dev_priv, port, pipe_config);
 	else
-		haswell_get_ddi_pll(dev_priv, port, pipe_config);
+		hsw_get_ddi_pll(dev_priv, port, pipe_config);
 
 	pll = pipe_config->shared_dpll;
 	if (pll) {
@@ -10772,7 +10890,7 @@ static void haswell_get_ddi_port_state(struct intel_crtc *crtc,
 		pipe_config->fdi_lanes = ((FDI_DP_PORT_WIDTH_MASK & tmp) >>
 					  FDI_DP_PORT_WIDTH_SHIFT) + 1;
 
-		ironlake_get_fdi_m_n_config(crtc, pipe_config);
+		ilk_get_fdi_m_n_config(crtc, pipe_config);
 	}
 }
 
@@ -10794,7 +10912,7 @@ static enum transcoder transcoder_master_readout(struct drm_i915_private *dev_pr
 		return master_select - 1;
 }
 
-static void icelake_get_trans_port_sync_config(struct intel_crtc_state *crtc_state)
+static void icl_get_trans_port_sync_config(struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 	u32 transcoders;
@@ -10829,8 +10947,8 @@ static void icelake_get_trans_port_sync_config(struct intel_crtc_state *crtc_sta
 		crtc_state->sync_mode_slaves_mask);
 }
 
-static bool haswell_get_pipe_config(struct intel_crtc *crtc,
-				    struct intel_crtc_state *pipe_config)
+static bool hsw_get_pipe_config(struct intel_crtc *crtc,
+				struct intel_crtc_state *pipe_config)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	intel_wakeref_t wakerefs[POWER_DOMAIN_NUM], wf;
@@ -10865,7 +10983,7 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc,
 
 	if (!transcoder_is_dsi(pipe_config->cpu_transcoder) ||
 	    INTEL_GEN(dev_priv) >= 11) {
-		haswell_get_ddi_port_state(crtc, pipe_config);
+		hsw_get_ddi_port_state(crtc, pipe_config);
 		intel_get_pipe_timings(crtc, pipe_config);
 	}
 
@@ -10922,9 +11040,9 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc,
 		power_domain_mask |= BIT_ULL(power_domain);
 
 		if (INTEL_GEN(dev_priv) >= 9)
-			skylake_get_pfit_config(crtc, pipe_config);
+			skl_get_pfit_config(crtc, pipe_config);
 		else
-			ironlake_get_pfit_config(crtc, pipe_config);
+			ilk_get_pfit_config(crtc, pipe_config);
 	}
 
 	if (hsw_crtc_supports_ips(crtc)) {
@@ -10950,7 +11068,7 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc,
 
 	if (INTEL_GEN(dev_priv) >= 11 &&
 	    !transcoder_is_dsi(pipe_config->cpu_transcoder))
-		icelake_get_trans_port_sync_config(pipe_config);
+		icl_get_trans_port_sync_config(pipe_config);
 
 out:
 	for_each_power_domain(power_domain, power_domain_mask)
@@ -11570,7 +11688,7 @@ int intel_get_load_detect_pipe(struct drm_connector *connector,
 {
 	struct intel_crtc *intel_crtc;
 	struct intel_encoder *intel_encoder =
-		intel_attached_encoder(connector);
+		intel_attached_encoder(to_intel_connector(connector));
 	struct drm_crtc *possible_crtc;
 	struct drm_encoder *encoder = &intel_encoder->base;
 	struct drm_crtc *crtc = NULL;
@@ -11724,7 +11842,7 @@ void intel_release_load_detect_pipe(struct drm_connector *connector,
 				    struct drm_modeset_acquire_ctx *ctx)
 {
 	struct intel_encoder *intel_encoder =
-		intel_attached_encoder(connector);
+		intel_attached_encoder(to_intel_connector(connector));
 	struct drm_encoder *encoder = &intel_encoder->base;
 	struct drm_atomic_state *state = old->restore_state;
 	int ret;
@@ -11867,8 +11985,8 @@ int intel_dotclock_calculate(int link_freq,
 	return div_u64(mul_u32_u32(m_n->link_m, link_freq), m_n->link_n);
 }
 
-static void ironlake_pch_clock_get(struct intel_crtc *crtc,
-				   struct intel_crtc_state *pipe_config)
+static void ilk_pch_clock_get(struct intel_crtc *crtc,
+			      struct intel_crtc_state *pipe_config)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 
@@ -11897,6 +12015,7 @@ static void intel_crtc_state_reset(struct intel_crtc_state *crtc_state,
 	crtc_state->hsw_workaround_pipe = INVALID_PIPE;
 	crtc_state->output_format = INTEL_OUTPUT_FORMAT_INVALID;
 	crtc_state->scaler_state.scaler_id = -1;
+	crtc_state->mst_master_transcoder = INVALID_TRANSCODER;
 }
 
 static struct intel_crtc_state *intel_crtc_state_alloc(struct intel_crtc *crtc)
@@ -12278,88 +12397,121 @@ static bool c8_planes_changed(const struct intel_crtc_state *new_crtc_state)
 	return !old_crtc_state->c8_planes != !new_crtc_state->c8_planes;
 }
 
-static int icl_add_sync_mode_crtcs(struct intel_crtc_state *crtc_state)
+static bool
+intel_atomic_is_master_connector(struct intel_crtc_state *crtc_state)
+{
+	struct drm_crtc *crtc = crtc_state->uapi.crtc;
+	struct drm_atomic_state *state = crtc_state->uapi.state;
+	struct drm_connector *connector;
+	struct drm_connector_state *connector_state;
+	int i;
+
+	for_each_new_connector_in_state(state, connector, connector_state, i) {
+		if (connector_state->crtc != crtc)
+			continue;
+		if (connector->has_tile &&
+		    connector->tile_h_loc == connector->num_h_tile - 1 &&
+		    connector->tile_v_loc == connector->num_v_tile - 1)
+			return true;
+	}
+
+	return false;
+}
+
+static void reset_port_sync_mode_state(struct intel_crtc_state *crtc_state)
+{
+	crtc_state->master_transcoder = INVALID_TRANSCODER;
+	crtc_state->sync_mode_slaves_mask = 0;
+}
+
+static int icl_compute_port_sync_crtc_state(struct drm_connector *connector,
+					    struct intel_crtc_state *crtc_state,
+					    int num_tiled_conns)
 {
 	struct drm_crtc *crtc = crtc_state->uapi.crtc;
 	struct intel_atomic_state *state = to_intel_atomic_state(crtc_state->uapi.state);
 	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
-	struct drm_connector *master_connector, *connector;
-	struct drm_connector_state *connector_state;
+	struct drm_connector *master_connector;
 	struct drm_connector_list_iter conn_iter;
 	struct drm_crtc *master_crtc = NULL;
 	struct drm_crtc_state *master_crtc_state;
 	struct intel_crtc_state *master_pipe_config;
-	int i, tile_group_id;
 
 	if (INTEL_GEN(dev_priv) < 11)
 		return 0;
 
+	if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP))
+		return 0;
+
 	/*
 	 * In case of tiled displays there could be one or more slaves but there is
 	 * only one master. Lets make the CRTC used by the connector corresponding
 	 * to the last horizonal and last vertical tile a master/genlock CRTC.
 	 * All the other CRTCs corresponding to other tiles of the same Tile group
 	 * are the slave CRTCs and hold a pointer to their genlock CRTC.
+	 * If all tiles not present do not make master slave assignments.
 	 */
-	for_each_new_connector_in_state(&state->base, connector, connector_state, i) {
-		if (connector_state->crtc != crtc)
-			continue;
-		if (!connector->has_tile)
+	if (!connector->has_tile ||
+	    crtc_state->hw.mode.hdisplay != connector->tile_h_size ||
+	    crtc_state->hw.mode.vdisplay != connector->tile_v_size ||
+	    num_tiled_conns < connector->num_h_tile * connector->num_v_tile) {
+		reset_port_sync_mode_state(crtc_state);
+		return 0;
+	}
+	/* Last Horizontal and last vertical tile connector is a master
+	 * Master's crtc state is already populated in slave for port sync
+	 */
+	if (connector->tile_h_loc == connector->num_h_tile - 1 &&
+	    connector->tile_v_loc == connector->num_v_tile - 1)
+		return 0;
+
+	/* Loop through all connectors and configure the Slave crtc_state
+	 * to point to the correct master.
+	 */
+	drm_connector_list_iter_begin(&dev_priv->drm, &conn_iter);
+	drm_for_each_connector_iter(master_connector, &conn_iter) {
+		struct drm_connector_state *master_conn_state = NULL;
+
+		if (!(master_connector->has_tile &&
+		      master_connector->tile_group->id == connector->tile_group->id))
 			continue;
-		if (crtc_state->hw.mode.hdisplay != connector->tile_h_size ||
-		    crtc_state->hw.mode.vdisplay != connector->tile_v_size)
-			return 0;
-		if (connector->tile_h_loc == connector->num_h_tile - 1 &&
-		    connector->tile_v_loc == connector->num_v_tile - 1)
+		if (master_connector->tile_h_loc != master_connector->num_h_tile - 1 ||
+		    master_connector->tile_v_loc != master_connector->num_v_tile - 1)
 			continue;
-		crtc_state->sync_mode_slaves_mask = 0;
-		tile_group_id = connector->tile_group->id;
-		drm_connector_list_iter_begin(&dev_priv->drm, &conn_iter);
-		drm_for_each_connector_iter(master_connector, &conn_iter) {
-			struct drm_connector_state *master_conn_state = NULL;
-
-			if (!master_connector->has_tile)
-				continue;
-			if (master_connector->tile_h_loc != master_connector->num_h_tile - 1 ||
-			    master_connector->tile_v_loc != master_connector->num_v_tile - 1)
-				continue;
-			if (master_connector->tile_group->id != tile_group_id)
-				continue;
 
-			master_conn_state = drm_atomic_get_connector_state(&state->base,
-									   master_connector);
-			if (IS_ERR(master_conn_state)) {
-				drm_connector_list_iter_end(&conn_iter);
-				return PTR_ERR(master_conn_state);
-			}
-			if (master_conn_state->crtc) {
-				master_crtc = master_conn_state->crtc;
-				break;
-			}
+		master_conn_state = drm_atomic_get_connector_state(&state->base,
+								   master_connector);
+		if (IS_ERR(master_conn_state)) {
+			drm_connector_list_iter_end(&conn_iter);
+			return PTR_ERR(master_conn_state);
 		}
-		drm_connector_list_iter_end(&conn_iter);
-
-		if (!master_crtc) {
-			DRM_DEBUG_KMS("Could not find Master CRTC for Slave CRTC %d\n",
-				      connector_state->crtc->base.id);
-			return -EINVAL;
+		if (master_conn_state->crtc) {
+			master_crtc = master_conn_state->crtc;
+			break;
 		}
+	}
+	drm_connector_list_iter_end(&conn_iter);
 
-		master_crtc_state = drm_atomic_get_crtc_state(&state->base,
-							      master_crtc);
-		if (IS_ERR(master_crtc_state))
-			return PTR_ERR(master_crtc_state);
-
-		master_pipe_config = to_intel_crtc_state(master_crtc_state);
-		crtc_state->master_transcoder = master_pipe_config->cpu_transcoder;
-		master_pipe_config->sync_mode_slaves_mask |=
-			BIT(crtc_state->cpu_transcoder);
-		DRM_DEBUG_KMS("Master Transcoder = %s added for Slave CRTC = %d, slave transcoder bitmask = %d\n",
-			      transcoder_name(crtc_state->master_transcoder),
-			      crtc_state->uapi.crtc->base.id,
-			      master_pipe_config->sync_mode_slaves_mask);
+	if (!master_crtc) {
+		DRM_DEBUG_KMS("Could not find Master CRTC for Slave CRTC %d\n",
+			      crtc->base.id);
+		return -EINVAL;
 	}
 
+	master_crtc_state = drm_atomic_get_crtc_state(&state->base,
+						      master_crtc);
+	if (IS_ERR(master_crtc_state))
+		return PTR_ERR(master_crtc_state);
+
+	master_pipe_config = to_intel_crtc_state(master_crtc_state);
+	crtc_state->master_transcoder = master_pipe_config->cpu_transcoder;
+	master_pipe_config->sync_mode_slaves_mask |=
+		BIT(crtc_state->cpu_transcoder);
+	DRM_DEBUG_KMS("Master Transcoder = %s added for Slave CRTC = %d, slave transcoder bitmask = %d\n",
+		      transcoder_name(crtc_state->master_transcoder),
+		      crtc->base.id,
+		      master_pipe_config->sync_mode_slaves_mask);
+
 	return 0;
 }
 
@@ -12755,6 +12907,9 @@ static void intel_dump_pipe_config(const struct intel_crtc_state *pipe_config,
 			      pipe_config->csc_mode, pipe_config->gamma_mode,
 			      pipe_config->gamma_enable, pipe_config->csc_enable);
 
+	DRM_DEBUG_KMS("MST master transcoder: %s\n",
+		      transcoder_name(pipe_config->mst_master_transcoder));
+
 dump_planes:
 	if (!state)
 		return;
@@ -12901,9 +13056,11 @@ intel_crtc_prepare_cleared_state(struct intel_crtc_state *crtc_state)
 		saved_state->wm = crtc_state->wm;
 	/*
 	 * Save the slave bitmask which gets filled for master crtc state during
-	 * slave atomic check call.
+	 * slave atomic check call. For all other CRTCs reset the port sync variables
+	 * crtc_state->master_transcoder needs to be set to INVALID
 	 */
-	if (is_trans_port_sync_master(crtc_state))
+	reset_port_sync_mode_state(saved_state);
+	if (intel_atomic_is_master_connector(crtc_state))
 		saved_state->sync_mode_slaves_mask =
 			crtc_state->sync_mode_slaves_mask;
 
@@ -12924,7 +13081,7 @@ intel_modeset_pipe_config(struct intel_crtc_state *pipe_config)
 	struct drm_connector *connector;
 	struct drm_connector_state *connector_state;
 	int base_bpp, ret;
-	int i;
+	int i, tile_group_id = -1, num_tiled_conns = 0;
 	bool retry = true;
 
 	pipe_config->cpu_transcoder =
@@ -12994,13 +13151,22 @@ encoder_retry:
 	drm_mode_set_crtcinfo(&pipe_config->hw.adjusted_mode,
 			      CRTC_STEREO_DOUBLE);
 
-	/* Set the crtc_state defaults for trans_port_sync */
-	pipe_config->master_transcoder = INVALID_TRANSCODER;
-	ret = icl_add_sync_mode_crtcs(pipe_config);
-	if (ret) {
-		DRM_DEBUG_KMS("Cannot assign Sync Mode CRTCs: %d\n",
-			      ret);
-		return ret;
+	/* Get tile_group_id of tiled connector */
+	for_each_new_connector_in_state(state, connector, connector_state, i) {
+		if (connector_state->crtc == crtc &&
+		    connector->has_tile) {
+			tile_group_id = connector->tile_group->id;
+			break;
+		}
+	}
+
+	/* Get total number of tiled connectors in state that belong to
+	 * this tile group.
+	 */
+	for_each_new_connector_in_state(state, connector, connector_state, i) {
+		if (connector->has_tile &&
+		    connector->tile_group->id == tile_group_id)
+			num_tiled_conns++;
 	}
 
 	/* Pass our mode to the connectors and the CRTC to give them a chance to
@@ -13011,6 +13177,14 @@ encoder_retry:
 		if (connector_state->crtc != crtc)
 			continue;
 
+		ret = icl_compute_port_sync_crtc_state(connector, pipe_config,
+						       num_tiled_conns);
+		if (ret) {
+			DRM_DEBUG_KMS("Cannot assign Sync Mode CRTCs: %d\n",
+				      ret);
+			return ret;
+		}
+
 		encoder = to_intel_encoder(connector_state->best_encoder);
 		ret = encoder->compute_config(encoder, pipe_config,
 					      connector_state);
@@ -13535,6 +13709,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 	PIPE_CONF_CHECK_I(dsc.dsc_split);
 	PIPE_CONF_CHECK_I(dsc.compressed_bpp);
 
+	PIPE_CONF_CHECK_I(mst_master_transcoder);
+
 #undef PIPE_CONF_CHECK_X
 #undef PIPE_CONF_CHECK_I
 #undef PIPE_CONF_CHECK_BOOL
@@ -14048,7 +14224,7 @@ static void intel_modeset_clear_plls(struct intel_atomic_state *state)
  * multiple pipes, and planes are enabled after the pipe, we need to wait at
  * least 2 vblanks on the first pipe before enabling planes on the second pipe.
  */
-static int haswell_mode_set_planes_workaround(struct intel_atomic_state *state)
+static int hsw_mode_set_planes_workaround(struct intel_atomic_state *state)
 {
 	struct intel_crtc_state *crtc_state;
 	struct intel_crtc *crtc;
@@ -14143,7 +14319,7 @@ static int intel_modeset_checks(struct intel_atomic_state *state)
 	intel_modeset_clear_plls(state);
 
 	if (IS_HASWELL(dev_priv))
-		return haswell_mode_set_planes_workaround(state);
+		return hsw_mode_set_planes_workaround(state);
 
 	return 0;
 }
@@ -14173,7 +14349,11 @@ static void intel_crtc_check_fastset(const struct intel_crtc_state *old_crtc_sta
 
 	new_crtc_state->uapi.mode_changed = false;
 	new_crtc_state->update_pipe = true;
+}
 
+static void intel_crtc_copy_fastset(const struct intel_crtc_state *old_crtc_state,
+				    struct intel_crtc_state *new_crtc_state)
+{
 	/*
 	 * If we're not doing the full modeset we want to
 	 * keep the current M/N values as they may be
@@ -14296,6 +14476,107 @@ static int intel_atomic_check_crtcs(struct intel_atomic_state *state)
 	return 0;
 }
 
+static bool intel_cpu_transcoder_needs_modeset(struct intel_atomic_state *state,
+					       enum transcoder transcoder)
+{
+	struct intel_crtc_state *new_crtc_state;
+	struct intel_crtc *crtc;
+	int i;
+
+	for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i)
+		if (new_crtc_state->cpu_transcoder == transcoder)
+			return needs_modeset(new_crtc_state);
+
+	return false;
+}
+
+static void
+intel_modeset_synced_crtcs(struct intel_atomic_state *state,
+			   u8 transcoders)
+{
+	struct intel_crtc_state *new_crtc_state;
+	struct intel_crtc *crtc;
+	int i;
+
+	for_each_new_intel_crtc_in_state(state, crtc,
+					 new_crtc_state, i) {
+		if (transcoders & BIT(new_crtc_state->cpu_transcoder)) {
+			new_crtc_state->uapi.mode_changed = true;
+			new_crtc_state->update_pipe = false;
+		}
+	}
+}
+
+static int
+intel_modeset_all_tiles(struct intel_atomic_state *state, int tile_grp_id)
+{
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	struct drm_connector *connector;
+	struct drm_connector_list_iter conn_iter;
+	int ret = 0;
+
+	drm_connector_list_iter_begin(&dev_priv->drm, &conn_iter);
+	drm_for_each_connector_iter(connector, &conn_iter) {
+		struct drm_connector_state *conn_state;
+		struct drm_crtc_state *crtc_state;
+
+		if (!connector->has_tile ||
+		    connector->tile_group->id != tile_grp_id)
+			continue;
+		conn_state = drm_atomic_get_connector_state(&state->base,
+							    connector);
+		if (IS_ERR(conn_state)) {
+			ret =  PTR_ERR(conn_state);
+			break;
+		}
+
+		if (!conn_state->crtc)
+			continue;
+
+		crtc_state = drm_atomic_get_crtc_state(&state->base,
+						       conn_state->crtc);
+		if (IS_ERR(crtc_state)) {
+			ret = PTR_ERR(crtc_state);
+			break;
+		}
+		crtc_state->mode_changed = true;
+		ret = drm_atomic_add_affected_connectors(&state->base,
+							 conn_state->crtc);
+		if (ret)
+			break;
+	}
+	drm_connector_list_iter_end(&conn_iter);
+
+	return ret;
+}
+
+static int
+intel_atomic_check_tiled_conns(struct intel_atomic_state *state)
+{
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	struct drm_connector *connector;
+	struct drm_connector_state *old_conn_state, *new_conn_state;
+	int i, ret;
+
+	if (INTEL_GEN(dev_priv) < 11)
+		return 0;
+
+	/* Is tiled, mark all other tiled CRTCs as needing a modeset */
+	for_each_oldnew_connector_in_state(&state->base, connector,
+					   old_conn_state, new_conn_state, i) {
+		if (!connector->has_tile)
+			continue;
+		if (!intel_connector_needs_modeset(state, connector))
+			continue;
+
+		ret = intel_modeset_all_tiles(state, connector->tile_group->id);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 /**
  * intel_atomic_check - validate state object
  * @dev: drm device
@@ -14323,6 +14604,21 @@ static int intel_atomic_check(struct drm_device *dev,
 	if (ret)
 		goto fail;
 
+	/**
+	 * This check adds all the connectors in current state that belong to
+	 * the same tile group to a full modeset.
+	 * This function directly sets the mode_changed to true and we also call
+	 * drm_atomic_add_affected_connectors(). Hence we are not explicitly
+	 * calling drm_atomic_helper_check_modeset() after this.
+	 *
+	 * Fixme: Handle some corner cases where one of the
+	 * tiled connectors gets disconnected and tile info is lost but since it
+	 * was previously synced to other conn, we need to add that to the modeset.
+	 */
+	ret = intel_atomic_check_tiled_conns(state);
+	if (ret)
+		goto fail;
+
 	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
 					    new_crtc_state, i) {
 		if (!needs_modeset(new_crtc_state)) {
@@ -14334,8 +14630,6 @@ static int intel_atomic_check(struct drm_device *dev,
 
 		if (!new_crtc_state->uapi.enable) {
 			intel_crtc_copy_uapi_to_hw_state(new_crtc_state);
-
-			any_ms = true;
 			continue;
 		}
 
@@ -14348,9 +14642,49 @@ static int intel_atomic_check(struct drm_device *dev,
 			goto fail;
 
 		intel_crtc_check_fastset(old_crtc_state, new_crtc_state);
+	}
+
+	/**
+	 * Check if fastset is allowed by external dependencies like other
+	 * pipes and transcoders.
+	 *
+	 * Right now it only forces a fullmodeset when the MST master
+	 * transcoder did not changed but the pipe of the master transcoder
+	 * needs a fullmodeset so all slaves also needs to do a fullmodeset or
+	 * in case of port synced crtcs, if one of the synced crtcs
+	 * needs a full modeset, all other synced crtcs should be
+	 * forced a full modeset.
+	 */
+	for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) {
+		if (!new_crtc_state->hw.enable || needs_modeset(new_crtc_state))
+			continue;
+
+		if (intel_dp_mst_is_slave_trans(new_crtc_state)) {
+			enum transcoder master = new_crtc_state->mst_master_transcoder;
+
+			if (intel_cpu_transcoder_needs_modeset(state, master)) {
+				new_crtc_state->uapi.mode_changed = true;
+				new_crtc_state->update_pipe = false;
+			}
+		} else if (is_trans_port_sync_mode(new_crtc_state)) {
+			u8 trans = new_crtc_state->sync_mode_slaves_mask |
+				   BIT(new_crtc_state->master_transcoder);
 
-		if (needs_modeset(new_crtc_state))
+			intel_modeset_synced_crtcs(state, trans);
+		}
+	}
+
+	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
+					    new_crtc_state, i) {
+		if (needs_modeset(new_crtc_state)) {
 			any_ms = true;
+			continue;
+		}
+
+		if (!new_crtc_state->update_pipe)
+			continue;
+
+		intel_crtc_copy_fastset(old_crtc_state, new_crtc_state);
 	}
 
 	if (any_ms && !check_digital_port_conflicts(state)) {
@@ -14472,12 +14806,12 @@ static void intel_pipe_fastset(const struct intel_crtc_state *old_crtc_state,
 		skl_detach_scalers(new_crtc_state);
 
 		if (new_crtc_state->pch_pfit.enabled)
-			skylake_pfit_enable(new_crtc_state);
+			skl_pfit_enable(new_crtc_state);
 	} else if (HAS_PCH_SPLIT(dev_priv)) {
 		if (new_crtc_state->pch_pfit.enabled)
-			ironlake_pfit_enable(new_crtc_state);
+			ilk_pfit_enable(new_crtc_state);
 		else if (old_crtc_state->pch_pfit.enabled)
-			ironlake_pfit_disable(old_crtc_state);
+			ilk_pfit_disable(old_crtc_state);
 	}
 
 	if (INTEL_GEN(dev_priv) >= 11)
@@ -14619,7 +14953,7 @@ static void intel_commit_modeset_disables(struct intel_atomic_state *state)
 	u32 handled = 0;
 	int i;
 
-	/* Only disable port sync slaves */
+	/* Only disable port sync and MST slaves */
 	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
 					    new_crtc_state, i) {
 		if (!needs_modeset(new_crtc_state))
@@ -14633,7 +14967,8 @@ static void intel_commit_modeset_disables(struct intel_atomic_state *state)
 		 * slave CRTCs are disabled first and then master CRTC since
 		 * Slave vblanks are masked till Master Vblanks.
 		 */
-		if (!is_trans_port_sync_slave(old_crtc_state))
+		if (!is_trans_port_sync_slave(old_crtc_state) &&
+		    !intel_dp_mst_is_slave_trans(old_crtc_state))
 			continue;
 
 		intel_pre_plane_update(state, crtc);
@@ -14694,10 +15029,14 @@ static void intel_set_dp_tp_ctl_normal(struct intel_crtc *crtc,
 		if (conn_state->crtc == &crtc->base)
 			break;
 	}
-	intel_dp = enc_to_intel_dp(&intel_attached_encoder(conn)->base);
+	intel_dp = enc_to_intel_dp(intel_attached_encoder(to_intel_connector(conn)));
 	intel_dp_stop_link_train(intel_dp);
 }
 
+/*
+ * TODO: This is only called from port sync and it is identical to what will be
+ * executed again in intel_update_crtc() over port sync pipes
+ */
 static void intel_post_crtc_enable_updates(struct intel_crtc *crtc,
 					   struct intel_atomic_state *state)
 {
@@ -14786,15 +15125,21 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state)
 	u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices;
 	u8 required_slices = state->wm_results.ddb.enabled_slices;
 	struct skl_ddb_entry entries[I915_MAX_PIPES] = {};
-	u8 dirty_pipes = 0;
+	const u8 num_pipes = INTEL_NUM_PIPES(dev_priv);
+	u8 update_pipes = 0, modeset_pipes = 0;
 	int i;
 
 	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
+		if (!new_crtc_state->hw.active)
+			continue;
+
 		/* ignore allocations for crtc's that have been turned off. */
-		if (!needs_modeset(new_crtc_state) && new_crtc_state->hw.active)
+		if (!needs_modeset(new_crtc_state)) {
 			entries[i] = old_crtc_state->wm.skl.ddb;
-		if (new_crtc_state->hw.active)
-			dirty_pipes |= BIT(crtc->pipe);
+			update_pipes |= BIT(crtc->pipe);
+		} else {
+			modeset_pipes |= BIT(crtc->pipe);
+		}
 	}
 
 	/* If 2nd DBuf slice required, enable it here */
@@ -14804,38 +15149,29 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state)
 	/*
 	 * Whenever the number of active pipes changes, we need to make sure we
 	 * update the pipes in the right order so that their ddb allocations
-	 * never overlap with eachother inbetween CRTC updates. Otherwise we'll
+	 * never overlap with each other between CRTC updates. Otherwise we'll
 	 * cause pipe underruns and other bad stuff.
+	 *
+	 * So first lets enable all pipes that do not need a fullmodeset as
+	 * those don't have any external dependency.
 	 */
-	while (dirty_pipes) {
+	while (update_pipes) {
 		for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
 						    new_crtc_state, i) {
 			enum pipe pipe = crtc->pipe;
-			bool modeset = needs_modeset(new_crtc_state);
 
-			if ((dirty_pipes & BIT(pipe)) == 0)
+			if ((update_pipes & BIT(pipe)) == 0)
 				continue;
 
 			if (skl_ddb_allocation_overlaps(&new_crtc_state->wm.skl.ddb,
-							entries,
-							INTEL_NUM_PIPES(dev_priv), i))
+							entries, num_pipes, i))
 				continue;
 
 			entries[i] = new_crtc_state->wm.skl.ddb;
-			dirty_pipes &= ~BIT(pipe);
-
-			if (modeset && is_trans_port_sync_mode(new_crtc_state)) {
-				if (is_trans_port_sync_master(new_crtc_state))
-					intel_update_trans_port_sync_crtcs(crtc,
-									   state,
-									   old_crtc_state,
-									   new_crtc_state);
-				else
-					continue;
-			} else {
-				intel_update_crtc(crtc, state, old_crtc_state,
-						  new_crtc_state);
-			}
+			update_pipes &= ~BIT(pipe);
+
+			intel_update_crtc(crtc, state, old_crtc_state,
+					  new_crtc_state);
 
 			/*
 			 * If this is an already active pipe, it's DDB changed,
@@ -14845,11 +15181,72 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state)
 			 */
 			if (!skl_ddb_entry_equal(&new_crtc_state->wm.skl.ddb,
 						 &old_crtc_state->wm.skl.ddb) &&
-			    !modeset && dirty_pipes)
+			    (update_pipes | modeset_pipes))
 				intel_wait_for_vblank(dev_priv, pipe);
 		}
 	}
 
+	/*
+	 * Enable all pipes that needs a modeset and do not depends on other
+	 * pipes
+	 */
+	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
+					    new_crtc_state, i) {
+		enum pipe pipe = crtc->pipe;
+
+		if ((modeset_pipes & BIT(pipe)) == 0)
+			continue;
+
+		if (intel_dp_mst_is_slave_trans(new_crtc_state) ||
+		    is_trans_port_sync_slave(new_crtc_state))
+			continue;
+
+		WARN_ON(skl_ddb_allocation_overlaps(&new_crtc_state->wm.skl.ddb,
+						    entries, num_pipes, i));
+
+		entries[i] = new_crtc_state->wm.skl.ddb;
+		modeset_pipes &= ~BIT(pipe);
+
+		if (is_trans_port_sync_mode(new_crtc_state)) {
+			struct intel_crtc *slave_crtc;
+
+			intel_update_trans_port_sync_crtcs(crtc, state,
+							   old_crtc_state,
+							   new_crtc_state);
+
+			slave_crtc = intel_get_slave_crtc(new_crtc_state);
+			/* TODO: update entries[] of slave */
+			modeset_pipes &= ~BIT(slave_crtc->pipe);
+
+		} else {
+			intel_update_crtc(crtc, state, old_crtc_state,
+					  new_crtc_state);
+		}
+	}
+
+	/*
+	 * Finally enable all pipes that needs a modeset and depends on
+	 * other pipes, right now it is only MST slaves as both port sync slave
+	 * and master are enabled together
+	 */
+	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
+					    new_crtc_state, i) {
+		enum pipe pipe = crtc->pipe;
+
+		if ((modeset_pipes & BIT(pipe)) == 0)
+			continue;
+
+		WARN_ON(skl_ddb_allocation_overlaps(&new_crtc_state->wm.skl.ddb,
+						    entries, num_pipes, i));
+
+		entries[i] = new_crtc_state->wm.skl.ddb;
+		modeset_pipes &= ~BIT(pipe);
+
+		intel_update_crtc(crtc, state, old_crtc_state, new_crtc_state);
+	}
+
+	WARN_ON(modeset_pipes);
+
 	/* If 2nd DBuf slice is no more required disable it */
 	if (INTEL_GEN(dev_priv) >= 11 && required_slices < hw_enabled_slices)
 		icl_dbuf_slices_update(dev_priv, required_slices);
@@ -16586,8 +16983,11 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 	}
 
 	/* FIXME need to adjust LINOFF/TILEOFF accordingly. */
-	if (mode_cmd->offsets[0] != 0)
+	if (mode_cmd->offsets[0] != 0) {
+		DRM_DEBUG_KMS("plane 0 offset (0x%08x) must be 0\n",
+			      mode_cmd->offsets[0]);
 		goto err;
+	}
 
 	drm_helper_mode_fill_fb_struct(&dev_priv->drm, fb, mode_cmd);
 
@@ -16814,29 +17214,28 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv)
 	intel_init_cdclk_hooks(dev_priv);
 
 	if (INTEL_GEN(dev_priv) >= 9) {
-		dev_priv->display.get_pipe_config = haswell_get_pipe_config;
+		dev_priv->display.get_pipe_config = hsw_get_pipe_config;
 		dev_priv->display.get_initial_plane_config =
-			skylake_get_initial_plane_config;
-		dev_priv->display.crtc_compute_clock =
-			haswell_crtc_compute_clock;
-		dev_priv->display.crtc_enable = haswell_crtc_enable;
-		dev_priv->display.crtc_disable = haswell_crtc_disable;
+			skl_get_initial_plane_config;
+		dev_priv->display.crtc_compute_clock = hsw_crtc_compute_clock;
+		dev_priv->display.crtc_enable = hsw_crtc_enable;
+		dev_priv->display.crtc_disable = hsw_crtc_disable;
 	} else if (HAS_DDI(dev_priv)) {
-		dev_priv->display.get_pipe_config = haswell_get_pipe_config;
+		dev_priv->display.get_pipe_config = hsw_get_pipe_config;
 		dev_priv->display.get_initial_plane_config =
 			i9xx_get_initial_plane_config;
 		dev_priv->display.crtc_compute_clock =
-			haswell_crtc_compute_clock;
-		dev_priv->display.crtc_enable = haswell_crtc_enable;
-		dev_priv->display.crtc_disable = haswell_crtc_disable;
+			hsw_crtc_compute_clock;
+		dev_priv->display.crtc_enable = hsw_crtc_enable;
+		dev_priv->display.crtc_disable = hsw_crtc_disable;
 	} else if (HAS_PCH_SPLIT(dev_priv)) {
-		dev_priv->display.get_pipe_config = ironlake_get_pipe_config;
+		dev_priv->display.get_pipe_config = ilk_get_pipe_config;
 		dev_priv->display.get_initial_plane_config =
 			i9xx_get_initial_plane_config;
 		dev_priv->display.crtc_compute_clock =
-			ironlake_crtc_compute_clock;
-		dev_priv->display.crtc_enable = ironlake_crtc_enable;
-		dev_priv->display.crtc_disable = ironlake_crtc_disable;
+			ilk_crtc_compute_clock;
+		dev_priv->display.crtc_enable = ilk_crtc_enable;
+		dev_priv->display.crtc_disable = ilk_crtc_disable;
 	} else if (IS_CHERRYVIEW(dev_priv)) {
 		dev_priv->display.get_pipe_config = i9xx_get_pipe_config;
 		dev_priv->display.get_initial_plane_config =
@@ -16882,7 +17281,7 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv)
 	}
 
 	if (IS_GEN(dev_priv, 5)) {
-		dev_priv->display.fdi_link_train = ironlake_fdi_link_train;
+		dev_priv->display.fdi_link_train = ilk_fdi_link_train;
 	} else if (IS_GEN(dev_priv, 6)) {
 		dev_priv->display.fdi_link_train = gen6_fdi_link_train;
 	} else if (IS_IVYBRIDGE(dev_priv)) {
@@ -17827,8 +18226,11 @@ get_encoder_power_domains(struct drm_i915_private *dev_priv)
 
 static void intel_early_display_was(struct drm_i915_private *dev_priv)
 {
-	/* Display WA #1185 WaDisableDARBFClkGating:cnl,glk */
-	if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv))
+	/*
+	 * Display WA #1185 WaDisableDARBFClkGating:cnl,glk,icl,ehl,tgl
+	 * Also known as Wa_14010480278.
+	 */
+	if (IS_GEN_RANGE(dev_priv, 10, 12) || IS_GEMINILAKE(dev_priv))
 		I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
 			   DARBF_GATING_DIS);
 
@@ -17928,7 +18330,7 @@ intel_modeset_setup_hw_state(struct drm_device *dev,
 		/* We need to sanitize only the MST primary port. */
 		if (encoder->type != INTEL_OUTPUT_DP_MST &&
 		    intel_phy_is_tc(dev_priv, phy))
-			intel_tc_port_sanitize(enc_to_dig_port(&encoder->base));
+			intel_tc_port_sanitize(enc_to_dig_port(encoder));
 	}
 
 	get_encoder_power_domains(dev_priv);
@@ -18101,6 +18503,8 @@ void intel_modeset_driver_remove(struct drm_i915_private *i915)
 
 	intel_gmbus_teardown(i915);
 
+	intel_bw_cleanup(i915);
+
 	destroy_workqueue(i915->flip_wq);
 	destroy_workqueue(i915->modeset_wq);
 
diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h
index 0fef9263cddc..028aab728514 100644
--- a/drivers/gpu/drm/i915/display/intel_display.h
+++ b/drivers/gpu/drm/i915/display/intel_display.h
@@ -474,6 +474,7 @@ void intel_link_compute_m_n(u16 bpp, int nlanes,
 			    struct intel_link_m_n *m_n,
 			    bool constant_n, bool fec_enable);
 bool is_ccs_modifier(u64 modifier);
+int intel_main_to_aux_plane(const struct drm_framebuffer *fb, int main_plane);
 void lpt_disable_clkout_dp(struct drm_i915_private *dev_priv);
 u32 intel_plane_fb_max_stride(struct drm_i915_private *dev_priv,
 			      u32 pixel_format, u64 modifier);
@@ -521,7 +522,7 @@ int intel_get_pipe_from_crtc_id_ioctl(struct drm_device *dev, void *data,
 u32 intel_crtc_get_vblank_counter(struct intel_crtc *crtc);
 void intel_crtc_vblank_off(const struct intel_crtc_state *crtc_state);
 
-int ironlake_get_lanes_required(int target_clock, int link_bw, int bpp);
+int ilk_get_lanes_required(int target_clock, int link_bw, int bpp);
 void vlv_wait_port_ready(struct drm_i915_private *dev_priv,
 			 struct intel_digital_port *dport,
 			 unsigned int expected_mask);
@@ -578,8 +579,8 @@ void intel_crtc_arm_fifo_underrun(struct intel_crtc *crtc,
 
 u16 skl_scaler_calc_phase(int sub, int scale, bool chroma_center);
 int skl_update_scaler_crtc(struct intel_crtc_state *crtc_state);
-void skylake_scaler_disable(const struct intel_crtc_state *old_crtc_state);
-void ironlake_pfit_disable(const struct intel_crtc_state *old_crtc_state);
+void skl_scaler_disable(const struct intel_crtc_state *old_crtc_state);
+void ilk_pfit_disable(const struct intel_crtc_state *old_crtc_state);
 u32 glk_plane_color_ctl(const struct intel_crtc_state *crtc_state,
 			const struct intel_plane_state *plane_state);
 u32 glk_plane_color_ctl_crtc(const struct intel_crtc_state *crtc_state);
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c
index 679457156797..21561acfa3ac 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.c
+++ b/drivers/gpu/drm/i915/display/intel_display_power.c
@@ -514,7 +514,7 @@ static void icl_tc_port_assert_ref_held(struct drm_i915_private *dev_priv,
 		if (encoder->type == INTEL_OUTPUT_DP_MST)
 			continue;
 
-		dig_port = enc_to_dig_port(&encoder->base);
+		dig_port = enc_to_dig_port(encoder);
 		if (WARN_ON(!dig_port))
 			continue;
 
@@ -1664,8 +1664,8 @@ void chv_phy_powergate_lanes(struct intel_encoder *encoder,
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct i915_power_domains *power_domains = &dev_priv->power_domains;
-	enum dpio_phy phy = vlv_dport_to_phy(enc_to_dig_port(&encoder->base));
-	enum dpio_channel ch = vlv_dport_to_channel(enc_to_dig_port(&encoder->base));
+	enum dpio_phy phy = vlv_dport_to_phy(enc_to_dig_port(encoder));
+	enum dpio_channel ch = vlv_dport_to_channel(enc_to_dig_port(encoder));
 
 	mutex_lock(&power_domains->lock);
 
diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index 83ea04149b77..888ea8a170d1 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -90,8 +90,8 @@ struct intel_framebuffer {
 	/* for each plane in the normal GTT view */
 	struct {
 		unsigned int x, y;
-	} normal[2];
-	/* for each plane in the rotated GTT view */
+	} normal[4];
+	/* for each plane in the rotated GTT view for no-CCS formats */
 	struct {
 		unsigned int x, y;
 		unsigned int pitch; /* pixels */
@@ -555,7 +555,7 @@ struct intel_plane_state {
 		 */
 		u32 stride;
 		int x, y;
-	} color_plane[2];
+	} color_plane[4];
 
 	/* plane control register */
 	u32 ctl;
@@ -1054,6 +1054,9 @@ struct intel_crtc_state {
 
 	/* Bitmask to indicate slaves attached */
 	u8 sync_mode_slaves_mask;
+
+	/* Only valid on TGL+ */
+	enum transcoder mst_master_transcoder;
 };
 
 struct intel_crtc {
@@ -1435,9 +1438,9 @@ struct intel_load_detect_pipe {
 };
 
 static inline struct intel_encoder *
-intel_attached_encoder(struct drm_connector *connector)
+intel_attached_encoder(struct intel_connector *connector)
 {
-	return to_intel_connector(connector)->encoder;
+	return connector->encoder;
 }
 
 static inline bool intel_encoder_is_dig_port(struct intel_encoder *encoder)
@@ -1454,12 +1457,12 @@ static inline bool intel_encoder_is_dig_port(struct intel_encoder *encoder)
 }
 
 static inline struct intel_digital_port *
-enc_to_dig_port(struct drm_encoder *encoder)
+enc_to_dig_port(struct intel_encoder *encoder)
 {
-	struct intel_encoder *intel_encoder = to_intel_encoder(encoder);
+	struct intel_encoder *intel_encoder = encoder;
 
 	if (intel_encoder_is_dig_port(intel_encoder))
-		return container_of(encoder, struct intel_digital_port,
+		return container_of(&encoder->base, struct intel_digital_port,
 				    base.base);
 	else
 		return NULL;
@@ -1468,16 +1471,17 @@ enc_to_dig_port(struct drm_encoder *encoder)
 static inline struct intel_digital_port *
 conn_to_dig_port(struct intel_connector *connector)
 {
-	return enc_to_dig_port(&intel_attached_encoder(&connector->base)->base);
+	return enc_to_dig_port(intel_attached_encoder(connector));
 }
 
 static inline struct intel_dp_mst_encoder *
-enc_to_mst(struct drm_encoder *encoder)
+enc_to_mst(struct intel_encoder *encoder)
 {
-	return container_of(encoder, struct intel_dp_mst_encoder, base.base);
+	return container_of(&encoder->base, struct intel_dp_mst_encoder,
+			    base.base);
 }
 
-static inline struct intel_dp *enc_to_intel_dp(struct drm_encoder *encoder)
+static inline struct intel_dp *enc_to_intel_dp(struct intel_encoder *encoder)
 {
 	return &enc_to_dig_port(encoder)->dp;
 }
@@ -1490,14 +1494,14 @@ static inline bool intel_encoder_is_dp(struct intel_encoder *encoder)
 		return true;
 	case INTEL_OUTPUT_DDI:
 		/* Skip pure HDMI/DVI DDI encoders */
-		return i915_mmio_reg_valid(enc_to_intel_dp(&encoder->base)->output_reg);
+		return i915_mmio_reg_valid(enc_to_intel_dp(encoder)->output_reg);
 	default:
 		return false;
 	}
 }
 
 static inline struct intel_lspcon *
-enc_to_intel_lspcon(struct drm_encoder *encoder)
+enc_to_intel_lspcon(struct intel_encoder *encoder)
 {
 	return &enc_to_dig_port(encoder)->lspcon;
 }
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 2f31d226c6eb..c7424e2a04a3 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -146,9 +146,9 @@ bool intel_dp_is_edp(struct intel_dp *intel_dp)
 	return intel_dig_port->base.type == INTEL_OUTPUT_EDP;
 }
 
-static struct intel_dp *intel_attached_dp(struct drm_connector *connector)
+static struct intel_dp *intel_attached_dp(struct intel_connector *connector)
 {
-	return enc_to_intel_dp(&intel_attached_encoder(connector)->base);
+	return enc_to_intel_dp(intel_attached_encoder(connector));
 }
 
 static void intel_dp_link_down(struct intel_encoder *encoder,
@@ -614,7 +614,7 @@ static enum drm_mode_status
 intel_dp_mode_valid(struct drm_connector *connector,
 		    struct drm_display_mode *mode)
 {
-	struct intel_dp *intel_dp = intel_attached_dp(connector);
+	struct intel_dp *intel_dp = intel_attached_dp(to_intel_connector(connector));
 	struct intel_connector *intel_connector = to_intel_connector(connector);
 	struct drm_display_mode *fixed_mode = intel_connector->panel.fixed_mode;
 	struct drm_i915_private *dev_priv = to_i915(connector->dev);
@@ -834,7 +834,7 @@ static enum pipe vlv_find_free_pps(struct drm_i915_private *dev_priv)
 	 * Pick one that's not used by other ports.
 	 */
 	for_each_intel_dp(&dev_priv->drm, encoder) {
-		struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
 		if (encoder->type == INTEL_OUTPUT_EDP) {
 			WARN_ON(intel_dp->active_pipe != INVALID_PIPE &&
@@ -1031,7 +1031,7 @@ void intel_power_sequencer_reset(struct drm_i915_private *dev_priv)
 	 */
 
 	for_each_intel_dp(&dev_priv->drm, encoder) {
-		struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
 		WARN_ON(intel_dp->active_pipe != INVALID_PIPE);
 
@@ -2034,7 +2034,7 @@ static int intel_dp_dsc_compute_bpp(struct intel_dp *intel_dp, u8 dsc_max_bpc)
 static int intel_dp_dsc_compute_params(struct intel_encoder *encoder,
 				       struct intel_crtc_state *crtc_state)
 {
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	struct drm_dsc_config *vdsc_cfg = &crtc_state->dsc.config;
 	u8 line_buf_depth;
 	int ret;
@@ -2205,7 +2205,7 @@ intel_dp_compute_link_config(struct intel_encoder *encoder,
 			     struct drm_connector_state *conn_state)
 {
 	struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	struct link_config_limits limits;
 	int common_len;
 	int ret;
@@ -2366,8 +2366,8 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
-	struct intel_lspcon *lspcon = enc_to_intel_lspcon(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+	struct intel_lspcon *lspcon = enc_to_intel_lspcon(encoder);
 	enum port port = encoder->port;
 	struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc);
 	struct intel_connector *intel_connector = intel_dp->attached_connector;
@@ -2482,7 +2482,7 @@ static void intel_dp_prepare(struct intel_encoder *encoder,
 			     const struct intel_crtc_state *pipe_config)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	enum port port = encoder->port;
 	struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc);
 	const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
@@ -2509,7 +2509,7 @@ static void intel_dp_prepare(struct intel_encoder *encoder,
 	 *
 	 * CPT PCH is quite different, having many bits moved
 	 * to the TRANS_DP_CTL register instead. That
-	 * configuration happens (oddly) in ironlake_pch_enable
+	 * configuration happens (oddly) in ilk_pch_enable
 	 */
 
 	/* Preserve the BIOS-computed detected bit. This is
@@ -2653,7 +2653,7 @@ static void edp_wait_backlight_off(struct intel_dp *intel_dp)
  * is locked
  */
 
-static  u32 ironlake_get_pp_control(struct intel_dp *intel_dp)
+static  u32 ilk_get_pp_control(struct intel_dp *intel_dp)
 {
 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
 	u32 control;
@@ -2703,7 +2703,7 @@ static bool edp_panel_vdd_on(struct intel_dp *intel_dp)
 	if (!edp_have_panel_power(intel_dp))
 		wait_panel_power_cycle(intel_dp);
 
-	pp = ironlake_get_pp_control(intel_dp);
+	pp = ilk_get_pp_control(intel_dp);
 	pp |= EDP_FORCE_VDD;
 
 	pp_stat_reg = _pp_stat_reg(intel_dp);
@@ -2768,7 +2768,7 @@ static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp)
 		      intel_dig_port->base.base.base.id,
 		      intel_dig_port->base.base.name);
 
-	pp = ironlake_get_pp_control(intel_dp);
+	pp = ilk_get_pp_control(intel_dp);
 	pp &= ~EDP_FORCE_VDD;
 
 	pp_ctrl_reg = _pp_ctrl_reg(intel_dp);
@@ -2864,7 +2864,7 @@ static void edp_panel_on(struct intel_dp *intel_dp)
 	wait_panel_power_cycle(intel_dp);
 
 	pp_ctrl_reg = _pp_ctrl_reg(intel_dp);
-	pp = ironlake_get_pp_control(intel_dp);
+	pp = ilk_get_pp_control(intel_dp);
 	if (IS_GEN(dev_priv, 5)) {
 		/* ILK workaround: disable reset around power sequence */
 		pp &= ~PANEL_POWER_RESET;
@@ -2919,7 +2919,7 @@ static void edp_panel_off(struct intel_dp *intel_dp)
 	WARN(!intel_dp->want_panel_vdd, "Need [ENCODER:%d:%s] VDD to turn off panel\n",
 	     dig_port->base.base.base.id, dig_port->base.base.name);
 
-	pp = ironlake_get_pp_control(intel_dp);
+	pp = ilk_get_pp_control(intel_dp);
 	/* We need to switch off panel power _and_ force vdd, for otherwise some
 	 * panels get very unhappy and cease to work. */
 	pp &= ~(PANEL_POWER_ON | PANEL_POWER_RESET | EDP_FORCE_VDD |
@@ -2968,7 +2968,7 @@ static void _intel_edp_backlight_on(struct intel_dp *intel_dp)
 		i915_reg_t pp_ctrl_reg = _pp_ctrl_reg(intel_dp);
 		u32 pp;
 
-		pp = ironlake_get_pp_control(intel_dp);
+		pp = ilk_get_pp_control(intel_dp);
 		pp |= EDP_BLC_ENABLE;
 
 		I915_WRITE(pp_ctrl_reg, pp);
@@ -2980,7 +2980,7 @@ static void _intel_edp_backlight_on(struct intel_dp *intel_dp)
 void intel_edp_backlight_on(const struct intel_crtc_state *crtc_state,
 			    const struct drm_connector_state *conn_state)
 {
-	struct intel_dp *intel_dp = enc_to_intel_dp(conn_state->best_encoder);
+	struct intel_dp *intel_dp = enc_to_intel_dp(to_intel_encoder(conn_state->best_encoder));
 
 	if (!intel_dp_is_edp(intel_dp))
 		return;
@@ -3004,7 +3004,7 @@ static void _intel_edp_backlight_off(struct intel_dp *intel_dp)
 		i915_reg_t pp_ctrl_reg = _pp_ctrl_reg(intel_dp);
 		u32 pp;
 
-		pp = ironlake_get_pp_control(intel_dp);
+		pp = ilk_get_pp_control(intel_dp);
 		pp &= ~EDP_BLC_ENABLE;
 
 		I915_WRITE(pp_ctrl_reg, pp);
@@ -3018,7 +3018,7 @@ static void _intel_edp_backlight_off(struct intel_dp *intel_dp)
 /* Disable backlight PP control and backlight PWM. */
 void intel_edp_backlight_off(const struct drm_connector_state *old_conn_state)
 {
-	struct intel_dp *intel_dp = enc_to_intel_dp(old_conn_state->best_encoder);
+	struct intel_dp *intel_dp = enc_to_intel_dp(to_intel_encoder(old_conn_state->best_encoder));
 
 	if (!intel_dp_is_edp(intel_dp))
 		return;
@@ -3036,13 +3036,13 @@ void intel_edp_backlight_off(const struct drm_connector_state *old_conn_state)
 static void intel_edp_backlight_power(struct intel_connector *connector,
 				      bool enable)
 {
-	struct intel_dp *intel_dp = intel_attached_dp(&connector->base);
+	struct intel_dp *intel_dp = intel_attached_dp(connector);
 	intel_wakeref_t wakeref;
 	bool is_enabled;
 
 	is_enabled = false;
 	with_pps_lock(intel_dp, wakeref)
-		is_enabled = ironlake_get_pp_control(intel_dp) & EDP_BLC_ENABLE;
+		is_enabled = ilk_get_pp_control(intel_dp) & EDP_BLC_ENABLE;
 	if (is_enabled == enable)
 		return;
 
@@ -3079,13 +3079,13 @@ static void assert_edp_pll(struct drm_i915_private *dev_priv, bool state)
 #define assert_edp_pll_enabled(d) assert_edp_pll((d), true)
 #define assert_edp_pll_disabled(d) assert_edp_pll((d), false)
 
-static void ironlake_edp_pll_on(struct intel_dp *intel_dp,
-				const struct intel_crtc_state *pipe_config)
+static void ilk_edp_pll_on(struct intel_dp *intel_dp,
+			   const struct intel_crtc_state *pipe_config)
 {
 	struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 
-	assert_pipe_disabled(dev_priv, crtc->pipe);
+	assert_pipe_disabled(dev_priv, pipe_config->cpu_transcoder);
 	assert_dp_port_disabled(intel_dp);
 	assert_edp_pll_disabled(dev_priv);
 
@@ -3119,13 +3119,13 @@ static void ironlake_edp_pll_on(struct intel_dp *intel_dp,
 	udelay(200);
 }
 
-static void ironlake_edp_pll_off(struct intel_dp *intel_dp,
-				 const struct intel_crtc_state *old_crtc_state)
+static void ilk_edp_pll_off(struct intel_dp *intel_dp,
+			    const struct intel_crtc_state *old_crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 
-	assert_pipe_disabled(dev_priv, crtc->pipe);
+	assert_pipe_disabled(dev_priv, old_crtc_state->cpu_transcoder);
 	assert_dp_port_disabled(intel_dp);
 	assert_edp_pll_enabled(dev_priv);
 
@@ -3258,7 +3258,7 @@ static bool intel_dp_get_hw_state(struct intel_encoder *encoder,
 				  enum pipe *pipe)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	intel_wakeref_t wakeref;
 	bool ret;
 
@@ -3279,7 +3279,7 @@ static void intel_dp_get_config(struct intel_encoder *encoder,
 				struct intel_crtc_state *pipe_config)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	u32 tmp, flags = 0;
 	enum port port = encoder->port;
 	struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc);
@@ -3363,7 +3363,7 @@ static void intel_disable_dp(struct intel_encoder *encoder,
 			     const struct intel_crtc_state *old_crtc_state,
 			     const struct drm_connector_state *old_conn_state)
 {
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
 	intel_dp->link_trained = false;
 
@@ -3397,7 +3397,7 @@ static void g4x_post_disable_dp(struct intel_encoder *encoder,
 				const struct intel_crtc_state *old_crtc_state,
 				const struct drm_connector_state *old_conn_state)
 {
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	enum port port = encoder->port;
 
 	/*
@@ -3410,7 +3410,7 @@ static void g4x_post_disable_dp(struct intel_encoder *encoder,
 
 	/* Only ilk+ has port A */
 	if (port == PORT_A)
-		ironlake_edp_pll_off(intel_dp, old_crtc_state);
+		ilk_edp_pll_off(intel_dp, old_crtc_state);
 }
 
 static void vlv_post_disable_dp(struct intel_encoder *encoder,
@@ -3548,7 +3548,7 @@ static void intel_enable_dp(struct intel_encoder *encoder,
 			    const struct drm_connector_state *conn_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc);
 	u32 dp_reg = I915_READ(intel_dp->output_reg);
 	enum pipe pipe = crtc->pipe;
@@ -3608,14 +3608,14 @@ static void g4x_pre_enable_dp(struct intel_encoder *encoder,
 			      const struct intel_crtc_state *pipe_config,
 			      const struct drm_connector_state *conn_state)
 {
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	enum port port = encoder->port;
 
 	intel_dp_prepare(encoder, pipe_config);
 
 	/* Only ilk+ has port A */
 	if (port == PORT_A)
-		ironlake_edp_pll_on(intel_dp, pipe_config);
+		ilk_edp_pll_on(intel_dp, pipe_config);
 }
 
 static void vlv_detach_power_sequencer(struct intel_dp *intel_dp)
@@ -3658,7 +3658,7 @@ static void vlv_steal_power_sequencer(struct drm_i915_private *dev_priv,
 	lockdep_assert_held(&dev_priv->pps_mutex);
 
 	for_each_intel_dp(&dev_priv->drm, encoder) {
-		struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
 		WARN(intel_dp->active_pipe == pipe,
 		     "stealing pipe %c power sequencer from active [ENCODER:%d:%s]\n",
@@ -3681,7 +3681,7 @@ static void vlv_init_panel_power_sequencer(struct intel_encoder *encoder,
 					   const struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 
 	lockdep_assert_held(&dev_priv->pps_mutex);
@@ -4203,7 +4203,7 @@ intel_dp_link_down(struct intel_encoder *encoder,
 		   const struct intel_crtc_state *old_crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc);
 	enum port port = encoder->port;
 	u32 DP = intel_dp->DP;
@@ -4903,7 +4903,7 @@ static u8 intel_dp_autotest_video_pattern(struct intel_dp *intel_dp)
 	intel_dp->compliance.test_data.hdisplay = be16_to_cpu(h_width);
 	intel_dp->compliance.test_data.vdisplay = be16_to_cpu(v_height);
 	/* Set test active flag here so userspace doesn't interrupt things */
-	intel_dp->compliance.test_active = 1;
+	intel_dp->compliance.test_active = true;
 
 	return DP_TEST_ACK;
 }
@@ -4947,7 +4947,7 @@ static u8 intel_dp_autotest_edid(struct intel_dp *intel_dp)
 	}
 
 	/* Set test active flag here so userspace doesn't interrupt things */
-	intel_dp->compliance.test_active = 1;
+	intel_dp->compliance.test_active = true;
 
 	return test_result;
 }
@@ -5096,7 +5096,7 @@ int intel_dp_retrain_link(struct intel_encoder *encoder,
 			  struct drm_modeset_acquire_ctx *ctx)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	struct intel_connector *connector = intel_dp->attached_connector;
 	struct drm_connector_state *conn_state;
 	struct intel_crtc_state *crtc_state;
@@ -5536,7 +5536,7 @@ static bool intel_combo_phy_connected(struct drm_i915_private *dev_priv,
 static bool icp_digital_port_connected(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
 	enum phy phy = intel_port_to_phy(dev_priv, encoder->port);
 
 	if (intel_phy_is_combo(dev_priv, phy))
@@ -5651,7 +5651,7 @@ intel_dp_detect(struct drm_connector *connector,
 		bool force)
 {
 	struct drm_i915_private *dev_priv = to_i915(connector->dev);
-	struct intel_dp *intel_dp = intel_attached_dp(connector);
+	struct intel_dp *intel_dp = intel_attached_dp(to_intel_connector(connector));
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
 	struct intel_encoder *encoder = &dig_port->base;
 	enum drm_connector_status status;
@@ -5755,7 +5755,7 @@ out:
 static void
 intel_dp_force(struct drm_connector *connector)
 {
-	struct intel_dp *intel_dp = intel_attached_dp(connector);
+	struct intel_dp *intel_dp = intel_attached_dp(to_intel_connector(connector));
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
 	struct intel_encoder *intel_encoder = &dig_port->base;
 	struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev);
@@ -5790,7 +5790,7 @@ static int intel_dp_get_modes(struct drm_connector *connector)
 	}
 
 	/* if eDP has no EDID, fall back to fixed mode */
-	if (intel_dp_is_edp(intel_attached_dp(connector)) &&
+	if (intel_dp_is_edp(intel_attached_dp(to_intel_connector(connector))) &&
 	    intel_connector->panel.fixed_mode) {
 		struct drm_display_mode *mode;
 
@@ -5808,7 +5808,7 @@ static int intel_dp_get_modes(struct drm_connector *connector)
 static int
 intel_dp_connector_register(struct drm_connector *connector)
 {
-	struct intel_dp *intel_dp = intel_attached_dp(connector);
+	struct intel_dp *intel_dp = intel_attached_dp(to_intel_connector(connector));
 	int ret;
 
 	ret = intel_connector_register(connector);
@@ -5830,7 +5830,7 @@ intel_dp_connector_register(struct drm_connector *connector)
 static void
 intel_dp_connector_unregister(struct drm_connector *connector)
 {
-	struct intel_dp *intel_dp = intel_attached_dp(connector);
+	struct intel_dp *intel_dp = intel_attached_dp(to_intel_connector(connector));
 
 	drm_dp_cec_unregister_connector(&intel_dp->aux);
 	drm_dp_aux_unregister(&intel_dp->aux);
@@ -5839,7 +5839,7 @@ intel_dp_connector_unregister(struct drm_connector *connector)
 
 void intel_dp_encoder_flush_work(struct drm_encoder *encoder)
 {
-	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
+	struct intel_digital_port *intel_dig_port = enc_to_dig_port(to_intel_encoder(encoder));
 	struct intel_dp *intel_dp = &intel_dig_port->dp;
 
 	intel_dp_mst_encoder_cleanup(intel_dig_port);
@@ -5868,12 +5868,12 @@ static void intel_dp_encoder_destroy(struct drm_encoder *encoder)
 	intel_dp_encoder_flush_work(encoder);
 
 	drm_encoder_cleanup(encoder);
-	kfree(enc_to_dig_port(encoder));
+	kfree(enc_to_dig_port(to_intel_encoder(encoder)));
 }
 
 void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder)
 {
-	struct intel_dp *intel_dp = enc_to_intel_dp(&intel_encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(intel_encoder);
 	intel_wakeref_t wakeref;
 
 	if (!intel_dp_is_edp(intel_dp))
@@ -5904,7 +5904,7 @@ static
 int intel_dp_hdcp_write_an_aksv(struct intel_digital_port *intel_dig_port,
 				u8 *an)
 {
-	struct intel_dp *intel_dp = enc_to_intel_dp(&intel_dig_port->base.base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(to_intel_encoder(&intel_dig_port->base.base));
 	static const struct drm_dp_aux_msg msg = {
 		.request = DP_AUX_NATIVE_WRITE,
 		.address = DP_AUX_HDCP_AKSV,
@@ -6514,7 +6514,7 @@ static enum pipe vlv_active_pipe(struct intel_dp *intel_dp)
 void intel_dp_encoder_reset(struct drm_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->dev);
-	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+	struct intel_dp *intel_dp = enc_to_intel_dp(to_intel_encoder(encoder));
 	struct intel_lspcon *lspcon = dp_to_lspcon(intel_dp);
 	intel_wakeref_t wakeref;
 
@@ -6693,7 +6693,7 @@ intel_pps_readout_hw_state(struct intel_dp *intel_dp, struct edp_power_seq *seq)
 
 	intel_pps_get_registers(intel_dp, &regs);
 
-	pp_ctl = ironlake_get_pp_control(intel_dp);
+	pp_ctl = ilk_get_pp_control(intel_dp);
 
 	/* Ensure PPS is unlocked */
 	if (!HAS_DDI(dev_priv))
@@ -6863,7 +6863,7 @@ intel_dp_init_panel_power_sequencer_registers(struct intel_dp *intel_dp,
 	 * soon as the new power sequencer gets initialized.
 	 */
 	if (force_disable_vdd) {
-		u32 pp = ironlake_get_pp_control(intel_dp);
+		u32 pp = ilk_get_pp_control(intel_dp);
 
 		WARN(pp & PANEL_POWER_ON, "Panel power already on\n");
 
@@ -7660,7 +7660,7 @@ void intel_dp_mst_suspend(struct drm_i915_private *dev_priv)
 		if (encoder->type != INTEL_OUTPUT_DDI)
 			continue;
 
-		intel_dp = enc_to_intel_dp(&encoder->base);
+		intel_dp = enc_to_intel_dp(encoder);
 
 		if (!intel_dp->can_mst)
 			continue;
@@ -7681,7 +7681,7 @@ void intel_dp_mst_resume(struct drm_i915_private *dev_priv)
 		if (encoder->type != INTEL_OUTPUT_DDI)
 			continue;
 
-		intel_dp = enc_to_intel_dp(&encoder->base);
+		intel_dp = enc_to_intel_dp(encoder);
 
 		if (!intel_dp->can_mst)
 			continue;
diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
index 020422da2ae2..7c653f8c307f 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
@@ -57,7 +57,7 @@ static void set_aux_backlight_enable(struct intel_dp *intel_dp, bool enable)
  */
 static u32 intel_dp_aux_get_backlight(struct intel_connector *connector)
 {
-	struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder);
 	u8 read_val[2] = { 0x0 };
 	u16 level = 0;
 
@@ -82,7 +82,7 @@ static void
 intel_dp_aux_set_backlight(const struct drm_connector_state *conn_state, u32 level)
 {
 	struct intel_connector *connector = to_intel_connector(conn_state->connector);
-	struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder);
 	u8 vals[2] = { 0x0 };
 
 	vals[0] = level;
@@ -110,7 +110,7 @@ intel_dp_aux_set_backlight(const struct drm_connector_state *conn_state, u32 lev
 static bool intel_dp_aux_set_pwm_freq(struct intel_connector *connector)
 {
 	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
-	struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder);
 	int freq, fxp, fxp_min, fxp_max, fxp_actual, f = 1;
 	u8 pn, pn_min, pn_max;
 
@@ -178,7 +178,7 @@ static void intel_dp_aux_enable_backlight(const struct intel_crtc_state *crtc_st
 					  const struct drm_connector_state *conn_state)
 {
 	struct intel_connector *connector = to_intel_connector(conn_state->connector);
-	struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder);
 	u8 dpcd_buf, new_dpcd_buf, edp_backlight_mode;
 
 	if (drm_dp_dpcd_readb(&intel_dp->aux,
@@ -222,13 +222,14 @@ static void intel_dp_aux_enable_backlight(const struct intel_crtc_state *crtc_st
 
 static void intel_dp_aux_disable_backlight(const struct drm_connector_state *old_conn_state)
 {
-	set_aux_backlight_enable(enc_to_intel_dp(old_conn_state->best_encoder), false);
+	set_aux_backlight_enable(enc_to_intel_dp(to_intel_encoder(old_conn_state->best_encoder)),
+				 false);
 }
 
 static int intel_dp_aux_setup_backlight(struct intel_connector *connector,
 					enum pipe pipe)
 {
-	struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder);
 	struct intel_panel *panel = &connector->panel;
 
 	if (intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_BYTE_COUNT)
@@ -247,7 +248,7 @@ static int intel_dp_aux_setup_backlight(struct intel_connector *connector,
 static bool
 intel_dp_aux_display_control_capable(struct intel_connector *connector)
 {
-	struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder);
 
 	/* Check the eDP Display control capabilities registers to determine if
 	 * the panel can support backlight control over the aux channel
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 53bc14d0e953..cba68c5a80fa 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -43,7 +43,7 @@ static int intel_dp_mst_compute_link_config(struct intel_encoder *encoder,
 					    struct link_config_limits *limits)
 {
 	struct drm_atomic_state *state = crtc_state->uapi.state;
-	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base);
+	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder);
 	struct intel_dp *intel_dp = &intel_mst->primary->dp;
 	struct intel_connector *connector =
 		to_intel_connector(conn_state->connector);
@@ -88,12 +88,58 @@ static int intel_dp_mst_compute_link_config(struct intel_encoder *encoder,
 	return 0;
 }
 
+/*
+ * Iterate over all connectors and return the smallest transcoder in the MST
+ * stream
+ */
+static enum transcoder
+intel_dp_mst_master_trans_compute(struct intel_atomic_state *state,
+				  struct intel_dp *mst_port)
+{
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	struct intel_digital_connector_state *conn_state;
+	struct intel_connector *connector;
+	enum pipe ret = I915_MAX_PIPES;
+	int i;
+
+	if (INTEL_GEN(dev_priv) < 12)
+		return INVALID_TRANSCODER;
+
+	for_each_new_intel_connector_in_state(state, connector, conn_state, i) {
+		struct intel_crtc_state *crtc_state;
+		struct intel_crtc *crtc;
+
+		if (connector->mst_port != mst_port || !conn_state->base.crtc)
+			continue;
+
+		crtc = to_intel_crtc(conn_state->base.crtc);
+		crtc_state = intel_atomic_get_new_crtc_state(state, crtc);
+		if (!crtc_state->uapi.active)
+			continue;
+
+		/*
+		 * Using crtc->pipe because crtc_state->cpu_transcoder is
+		 * computed, so others CRTCs could have non-computed
+		 * cpu_transcoder
+		 */
+		if (crtc->pipe < ret)
+			ret = crtc->pipe;
+	}
+
+	if (ret == I915_MAX_PIPES)
+		return INVALID_TRANSCODER;
+
+	/* Simple cast works because TGL don't have a eDP transcoder */
+	return (enum transcoder)ret;
+}
+
 static int intel_dp_mst_compute_config(struct intel_encoder *encoder,
 				       struct intel_crtc_state *pipe_config,
 				       struct drm_connector_state *conn_state)
 {
+	struct intel_atomic_state *state = to_intel_atomic_state(conn_state->state);
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base);
+	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder);
 	struct intel_dp *intel_dp = &intel_mst->primary->dp;
 	struct intel_connector *connector =
 		to_intel_connector(conn_state->connector);
@@ -155,24 +201,91 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder,
 
 	intel_ddi_compute_min_voltage_level(dev_priv, pipe_config);
 
+	pipe_config->mst_master_transcoder = intel_dp_mst_master_trans_compute(state, intel_dp);
+
+	return 0;
+}
+
+/*
+ * If one of the connectors in a MST stream needs a modeset, mark all CRTCs
+ * that shares the same MST stream as mode changed,
+ * intel_modeset_pipe_config()+intel_crtc_check_fastset() will take care to do
+ * a fastset when possible.
+ */
+static int
+intel_dp_mst_atomic_master_trans_check(struct intel_connector *connector,
+				       struct intel_atomic_state *state)
+{
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	struct drm_connector_list_iter connector_list_iter;
+	struct intel_connector *connector_iter;
+
+	if (INTEL_GEN(dev_priv) < 12)
+		return  0;
+
+	if (!intel_connector_needs_modeset(state, &connector->base))
+		return 0;
+
+	drm_connector_list_iter_begin(&dev_priv->drm, &connector_list_iter);
+	for_each_intel_connector_iter(connector_iter, &connector_list_iter) {
+		struct intel_digital_connector_state *conn_iter_state;
+		struct intel_crtc_state *crtc_state;
+		struct intel_crtc *crtc;
+		int ret;
+
+		if (connector_iter->mst_port != connector->mst_port ||
+		    connector_iter == connector)
+			continue;
+
+		conn_iter_state = intel_atomic_get_digital_connector_state(state,
+									   connector_iter);
+		if (IS_ERR(conn_iter_state)) {
+			drm_connector_list_iter_end(&connector_list_iter);
+			return PTR_ERR(conn_iter_state);
+		}
+
+		if (!conn_iter_state->base.crtc)
+			continue;
+
+		crtc = to_intel_crtc(conn_iter_state->base.crtc);
+		crtc_state = intel_atomic_get_crtc_state(&state->base, crtc);
+		if (IS_ERR(crtc_state)) {
+			drm_connector_list_iter_end(&connector_list_iter);
+			return PTR_ERR(crtc_state);
+		}
+
+		ret = drm_atomic_add_affected_planes(&state->base, &crtc->base);
+		if (ret) {
+			drm_connector_list_iter_end(&connector_list_iter);
+			return ret;
+		}
+		crtc_state->uapi.mode_changed = true;
+	}
+	drm_connector_list_iter_end(&connector_list_iter);
+
 	return 0;
 }
 
 static int
 intel_dp_mst_atomic_check(struct drm_connector *connector,
-			  struct drm_atomic_state *state)
+			  struct drm_atomic_state *_state)
 {
+	struct intel_atomic_state *state = to_intel_atomic_state(_state);
 	struct drm_connector_state *new_conn_state =
-		drm_atomic_get_new_connector_state(state, connector);
+		drm_atomic_get_new_connector_state(&state->base, connector);
 	struct drm_connector_state *old_conn_state =
-		drm_atomic_get_old_connector_state(state, connector);
+		drm_atomic_get_old_connector_state(&state->base, connector);
 	struct intel_connector *intel_connector =
 		to_intel_connector(connector);
 	struct drm_crtc *new_crtc = new_conn_state->crtc;
 	struct drm_dp_mst_topology_mgr *mgr;
 	int ret;
 
-	ret = intel_digital_connector_atomic_check(connector, state);
+	ret = intel_digital_connector_atomic_check(connector, &state->base);
+	if (ret)
+		return ret;
+
+	ret = intel_dp_mst_atomic_master_trans_check(intel_connector, state);
 	if (ret)
 		return ret;
 
@@ -183,12 +296,9 @@ intel_dp_mst_atomic_check(struct drm_connector *connector,
 	 * connector
 	 */
 	if (new_crtc) {
-		struct intel_atomic_state *intel_state =
-			to_intel_atomic_state(state);
 		struct intel_crtc *intel_crtc = to_intel_crtc(new_crtc);
 		struct intel_crtc_state *crtc_state =
-			intel_atomic_get_new_crtc_state(intel_state,
-							intel_crtc);
+			intel_atomic_get_new_crtc_state(state, intel_crtc);
 
 		if (!crtc_state ||
 		    !drm_atomic_crtc_needs_modeset(&crtc_state->uapi) ||
@@ -196,8 +306,8 @@ intel_dp_mst_atomic_check(struct drm_connector *connector,
 			return 0;
 	}
 
-	mgr = &enc_to_mst(old_conn_state->best_encoder)->primary->dp.mst_mgr;
-	ret = drm_dp_atomic_release_vcpi_slots(state, mgr,
+	mgr = &enc_to_mst(to_intel_encoder(old_conn_state->best_encoder))->primary->dp.mst_mgr;
+	ret = drm_dp_atomic_release_vcpi_slots(&state->base, mgr,
 					       intel_connector->port);
 
 	return ret;
@@ -207,7 +317,7 @@ static void intel_mst_disable_dp(struct intel_encoder *encoder,
 				 const struct intel_crtc_state *old_crtc_state,
 				 const struct drm_connector_state *old_conn_state)
 {
-	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base);
+	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder);
 	struct intel_digital_port *intel_dig_port = intel_mst->primary;
 	struct intel_dp *intel_dp = &intel_dig_port->dp;
 	struct intel_connector *connector =
@@ -231,29 +341,51 @@ static void intel_mst_post_disable_dp(struct intel_encoder *encoder,
 				      const struct intel_crtc_state *old_crtc_state,
 				      const struct drm_connector_state *old_conn_state)
 {
-	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base);
+	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder);
 	struct intel_digital_port *intel_dig_port = intel_mst->primary;
 	struct intel_dp *intel_dp = &intel_dig_port->dp;
 	struct intel_connector *connector =
 		to_intel_connector(old_conn_state->connector);
 	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
 	bool last_mst_stream;
+	u32 val;
 
 	intel_dp->active_mst_links--;
 	last_mst_stream = intel_dp->active_mst_links == 0;
+	WARN_ON(INTEL_GEN(dev_priv) >= 12 && last_mst_stream &&
+		!intel_dp_mst_is_master_trans(old_crtc_state));
 
 	intel_crtc_vblank_off(old_crtc_state);
 
 	intel_disable_pipe(old_crtc_state);
 
+	drm_dp_update_payload_part2(&intel_dp->mst_mgr);
+
+	val = I915_READ(TRANS_DDI_FUNC_CTL(old_crtc_state->cpu_transcoder));
+	val &= ~TRANS_DDI_DP_VC_PAYLOAD_ALLOC;
+	I915_WRITE(TRANS_DDI_FUNC_CTL(old_crtc_state->cpu_transcoder), val);
+
+	if (intel_de_wait_for_set(dev_priv, intel_dp->regs.dp_tp_status,
+				  DP_TP_STATUS_ACT_SENT, 1))
+		DRM_ERROR("Timed out waiting for ACT sent when disabling\n");
+	drm_dp_check_act_status(&intel_dp->mst_mgr);
+
+	drm_dp_mst_deallocate_vcpi(&intel_dp->mst_mgr, connector->port);
+
 	intel_ddi_disable_transcoder_func(old_crtc_state);
 
 	if (INTEL_GEN(dev_priv) >= 9)
-		skylake_scaler_disable(old_crtc_state);
+		skl_scaler_disable(old_crtc_state);
 	else
-		ironlake_pfit_disable(old_crtc_state);
+		ilk_pfit_disable(old_crtc_state);
 
 	/*
+	 * Power down mst path before disabling the port, otherwise we end
+	 * up getting interrupts from the sink upon detecting link loss.
+	 */
+	drm_dp_send_power_updown_phy(&intel_dp->mst_mgr, connector->port,
+				     false);
+	/*
 	 * From TGL spec: "If multi-stream slave transcoder: Configure
 	 * Transcoder Clock Select to direct no clock to the transcoder"
 	 *
@@ -263,19 +395,6 @@ static void intel_mst_post_disable_dp(struct intel_encoder *encoder,
 	if (INTEL_GEN(dev_priv) < 12 || !last_mst_stream)
 		intel_ddi_disable_pipe_clock(old_crtc_state);
 
-	/* this can fail */
-	drm_dp_check_act_status(&intel_dp->mst_mgr);
-	/* and this can also fail */
-	drm_dp_update_payload_part2(&intel_dp->mst_mgr);
-
-	drm_dp_mst_deallocate_vcpi(&intel_dp->mst_mgr, connector->port);
-
-	/*
-	 * Power down mst path before disabling the port, otherwise we end
-	 * up getting interrupts from the sink upon detecting link loss.
-	 */
-	drm_dp_send_power_updown_phy(&intel_dp->mst_mgr, connector->port,
-				     false);
 
 	intel_mst->connector = NULL;
 	if (last_mst_stream)
@@ -289,7 +408,7 @@ static void intel_mst_pre_pll_enable_dp(struct intel_encoder *encoder,
 					const struct intel_crtc_state *pipe_config,
 					const struct drm_connector_state *conn_state)
 {
-	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base);
+	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder);
 	struct intel_digital_port *intel_dig_port = intel_mst->primary;
 	struct intel_dp *intel_dp = &intel_dig_port->dp;
 
@@ -302,7 +421,7 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder,
 				    const struct intel_crtc_state *pipe_config,
 				    const struct drm_connector_state *conn_state)
 {
-	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base);
+	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder);
 	struct intel_digital_port *intel_dig_port = intel_mst->primary;
 	struct intel_dp *intel_dp = &intel_dig_port->dp;
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
@@ -318,6 +437,8 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder,
 	connector->encoder = encoder;
 	intel_mst->connector = connector;
 	first_mst_stream = intel_dp->active_mst_links == 0;
+	WARN_ON(INTEL_GEN(dev_priv) >= 12 && first_mst_stream &&
+		!intel_dp_mst_is_master_trans(pipe_config));
 
 	DRM_DEBUG_KMS("active links %d\n", intel_dp->active_mst_links);
 
@@ -360,7 +481,7 @@ static void intel_mst_enable_dp(struct intel_encoder *encoder,
 				const struct intel_crtc_state *pipe_config,
 				const struct drm_connector_state *conn_state)
 {
-	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base);
+	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder);
 	struct intel_digital_port *intel_dig_port = intel_mst->primary;
 	struct intel_dp *intel_dp = &intel_dig_port->dp;
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
@@ -381,7 +502,7 @@ static void intel_mst_enable_dp(struct intel_encoder *encoder,
 static bool intel_dp_mst_enc_get_hw_state(struct intel_encoder *encoder,
 				      enum pipe *pipe)
 {
-	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base);
+	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder);
 	*pipe = intel_mst->pipe;
 	if (intel_mst->connector)
 		return true;
@@ -391,7 +512,7 @@ static bool intel_dp_mst_enc_get_hw_state(struct intel_encoder *encoder,
 static void intel_dp_mst_enc_get_config(struct intel_encoder *encoder,
 					struct intel_crtc_state *pipe_config)
 {
-	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base);
+	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder);
 	struct intel_digital_port *intel_dig_port = intel_mst->primary;
 
 	intel_ddi_get_config(&intel_dig_port->base, pipe_config);
@@ -499,7 +620,7 @@ static const struct drm_connector_helper_funcs intel_dp_mst_connector_helper_fun
 
 static void intel_dp_mst_encoder_destroy(struct drm_encoder *encoder)
 {
-	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder);
+	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(to_intel_encoder(encoder));
 
 	drm_encoder_cleanup(encoder);
 	kfree(intel_mst);
@@ -723,3 +844,14 @@ intel_dp_mst_encoder_cleanup(struct intel_digital_port *intel_dig_port)
 	drm_dp_mst_topology_mgr_destroy(&intel_dp->mst_mgr);
 	/* encoders will get killed by normal cleanup */
 }
+
+bool intel_dp_mst_is_master_trans(const struct intel_crtc_state *crtc_state)
+{
+	return crtc_state->mst_master_transcoder == crtc_state->cpu_transcoder;
+}
+
+bool intel_dp_mst_is_slave_trans(const struct intel_crtc_state *crtc_state)
+{
+	return crtc_state->mst_master_transcoder != INVALID_TRANSCODER &&
+	       crtc_state->mst_master_transcoder != crtc_state->cpu_transcoder;
+}
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.h b/drivers/gpu/drm/i915/display/intel_dp_mst.h
index f660ad80db04..854724f68f09 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.h
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.h
@@ -6,10 +6,15 @@
 #ifndef __INTEL_DP_MST_H__
 #define __INTEL_DP_MST_H__
 
+#include <linux/types.h>
+
 struct intel_digital_port;
+struct intel_crtc_state;
 
 int intel_dp_mst_encoder_init(struct intel_digital_port *intel_dig_port, int conn_id);
 void intel_dp_mst_encoder_cleanup(struct intel_digital_port *intel_dig_port);
 int intel_dp_mst_encoder_active_links(struct intel_digital_port *intel_dig_port);
+bool intel_dp_mst_is_master_trans(const struct intel_crtc_state *crtc_state);
+bool intel_dp_mst_is_slave_trans(const struct intel_crtc_state *crtc_state);
 
 #endif /* __INTEL_DP_MST_H__ */
diff --git a/drivers/gpu/drm/i915/display/intel_dpio_phy.c b/drivers/gpu/drm/i915/display/intel_dpio_phy.c
index 704f38681c4b..6fb1f7a7364e 100644
--- a/drivers/gpu/drm/i915/display/intel_dpio_phy.c
+++ b/drivers/gpu/drm/i915/display/intel_dpio_phy.c
@@ -642,7 +642,7 @@ void chv_set_phy_signal_level(struct intel_encoder *encoder,
 			      bool uniq_trans_scale)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_digital_port *dport = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dport = enc_to_dig_port(encoder);
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc);
 	enum dpio_channel ch = vlv_dport_to_channel(dport);
 	enum pipe pipe = intel_crtc->pipe;
@@ -738,7 +738,7 @@ void chv_data_lane_soft_reset(struct intel_encoder *encoder,
 			      bool reset)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	enum dpio_channel ch = vlv_dport_to_channel(enc_to_dig_port(&encoder->base));
+	enum dpio_channel ch = vlv_dport_to_channel(enc_to_dig_port(encoder));
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	enum pipe pipe = crtc->pipe;
 	u32 val;
@@ -781,7 +781,7 @@ void chv_data_lane_soft_reset(struct intel_encoder *encoder,
 void chv_phy_pre_pll_enable(struct intel_encoder *encoder,
 			    const struct intel_crtc_state *crtc_state)
 {
-	struct intel_digital_port *dport = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dport = enc_to_dig_port(encoder);
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	enum dpio_channel ch = vlv_dport_to_channel(dport);
@@ -861,7 +861,7 @@ void chv_phy_pre_pll_enable(struct intel_encoder *encoder,
 void chv_phy_pre_encoder_enable(struct intel_encoder *encoder,
 				const struct intel_crtc_state *crtc_state)
 {
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	struct intel_digital_port *dport = dp_to_dig_port(intel_dp);
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
@@ -940,7 +940,7 @@ void chv_phy_pre_encoder_enable(struct intel_encoder *encoder,
 
 void chv_phy_release_cl2_override(struct intel_encoder *encoder)
 {
-	struct intel_digital_port *dport = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dport = enc_to_dig_port(encoder);
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 
 	if (dport->release_cl2_override) {
@@ -989,7 +989,7 @@ void vlv_set_phy_signal_level(struct intel_encoder *encoder,
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc);
-	struct intel_digital_port *dport = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dport = enc_to_dig_port(encoder);
 	enum dpio_channel port = vlv_dport_to_channel(dport);
 	enum pipe pipe = intel_crtc->pipe;
 
@@ -1014,7 +1014,7 @@ void vlv_set_phy_signal_level(struct intel_encoder *encoder,
 void vlv_phy_pre_pll_enable(struct intel_encoder *encoder,
 			    const struct intel_crtc_state *crtc_state)
 {
-	struct intel_digital_port *dport = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dport = enc_to_dig_port(encoder);
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	enum dpio_channel port = vlv_dport_to_channel(dport);
@@ -1043,7 +1043,7 @@ void vlv_phy_pre_pll_enable(struct intel_encoder *encoder,
 void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder,
 				const struct intel_crtc_state *crtc_state)
 {
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	struct intel_digital_port *dport = dp_to_dig_port(intel_dp);
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
@@ -1073,7 +1073,7 @@ void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder,
 void vlv_phy_reset_lanes(struct intel_encoder *encoder,
 			 const struct intel_crtc_state *old_crtc_state)
 {
-	struct intel_digital_port *dport = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dport = enc_to_dig_port(encoder);
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc);
 	enum dpio_channel port = vlv_dport_to_channel(dport);
diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
index 728a4b045de7..c75e34d87111 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
@@ -2972,8 +2972,8 @@ static void icl_update_active_dpll(struct intel_atomic_state *state,
 	enum icl_port_dpll_id port_dpll_id = ICL_PORT_DPLL_DEFAULT;
 
 	primary_port = encoder->type == INTEL_OUTPUT_DP_MST ?
-		enc_to_mst(&encoder->base)->primary :
-		enc_to_dig_port(&encoder->base);
+		enc_to_mst(encoder)->primary :
+		enc_to_dig_port(encoder);
 
 	if (primary_port &&
 	    (primary_port->tc_mode == TC_PORT_DP_ALT ||
diff --git a/drivers/gpu/drm/i915/display/intel_dsi.h b/drivers/gpu/drm/i915/display/intel_dsi.h
index b15be5814599..19f78a4022d3 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi.h
+++ b/drivers/gpu/drm/i915/display/intel_dsi.h
@@ -45,8 +45,9 @@ struct intel_dsi {
 	struct intel_dsi_host *dsi_hosts[I915_MAX_PORTS];
 	intel_wakeref_t io_wakeref[I915_MAX_PORTS];
 
-	/* GPIO Desc for CRC based Panel control */
+	/* GPIO Desc for panel and backlight control */
 	struct gpio_desc *gpio_panel;
+	struct gpio_desc *gpio_backlight;
 
 	struct intel_connector *attached_connector;
 
@@ -68,6 +69,9 @@ struct intel_dsi {
 	/* number of DSI lanes */
 	unsigned int lane_count;
 
+	/* i2c bus associated with the slave device */
+	int i2c_bus_num;
+
 	/*
 	 * video mode pixel format
 	 *
@@ -141,9 +145,9 @@ static inline struct intel_dsi_host *to_intel_dsi_host(struct mipi_dsi_host *h)
 #define for_each_dsi_phy(__phy, __phys_mask) \
 	for_each_phy_masked(__phy, __phys_mask)
 
-static inline struct intel_dsi *enc_to_intel_dsi(struct drm_encoder *encoder)
+static inline struct intel_dsi *enc_to_intel_dsi(struct intel_encoder *encoder)
 {
-	return container_of(encoder, struct intel_dsi, base.base);
+	return container_of(&encoder->base, struct intel_dsi, base.base);
 }
 
 static inline bool is_vid_mode(struct intel_dsi *intel_dsi)
@@ -158,7 +162,7 @@ static inline bool is_cmd_mode(struct intel_dsi *intel_dsi)
 
 static inline u16 intel_dsi_encoder_ports(struct intel_encoder *encoder)
 {
-	return enc_to_intel_dsi(&encoder->base)->ports;
+	return enc_to_intel_dsi(encoder)->ports;
 }
 
 /* icl_dsi.c */
@@ -203,6 +207,8 @@ void bxt_dsi_reset_clocks(struct intel_encoder *encoder, enum port port);
 
 /* intel_dsi_vbt.c */
 bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id);
+void intel_dsi_vbt_gpio_init(struct intel_dsi *intel_dsi, bool panel_is_on);
+void intel_dsi_vbt_gpio_cleanup(struct intel_dsi *intel_dsi);
 void intel_dsi_vbt_exec_sequence(struct intel_dsi *intel_dsi,
 				 enum mipi_seq seq_id);
 void intel_dsi_msleep(struct intel_dsi *intel_dsi, int msec);
diff --git a/drivers/gpu/drm/i915/display/intel_dsi_dcs_backlight.c b/drivers/gpu/drm/i915/display/intel_dsi_dcs_backlight.c
index bb3fd8b786a2..c87838843d0b 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_dcs_backlight.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_dcs_backlight.c
@@ -46,7 +46,7 @@
 static u32 dcs_get_backlight(struct intel_connector *connector)
 {
 	struct intel_encoder *encoder = connector->encoder;
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	struct mipi_dsi_device *dsi_device;
 	u8 data = 0;
 	enum port port;
@@ -64,7 +64,7 @@ static u32 dcs_get_backlight(struct intel_connector *connector)
 
 static void dcs_set_backlight(const struct drm_connector_state *conn_state, u32 level)
 {
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(conn_state->best_encoder);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(to_intel_encoder(conn_state->best_encoder));
 	struct mipi_dsi_device *dsi_device;
 	u8 data = level;
 	enum port port;
@@ -79,7 +79,7 @@ static void dcs_set_backlight(const struct drm_connector_state *conn_state, u32
 
 static void dcs_disable_backlight(const struct drm_connector_state *conn_state)
 {
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(conn_state->best_encoder);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(to_intel_encoder(conn_state->best_encoder));
 	struct mipi_dsi_device *dsi_device;
 	enum port port;
 
@@ -113,7 +113,7 @@ static void dcs_disable_backlight(const struct drm_connector_state *conn_state)
 static void dcs_enable_backlight(const struct intel_crtc_state *crtc_state,
 				 const struct drm_connector_state *conn_state)
 {
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(conn_state->best_encoder);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(to_intel_encoder(conn_state->best_encoder));
 	struct intel_panel *panel = &to_intel_connector(conn_state->connector)->panel;
 	struct mipi_dsi_device *dsi_device;
 	enum port port;
diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index f90946c912ee..89fb0d90b694 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -25,7 +25,10 @@
  */
 
 #include <linux/gpio/consumer.h>
+#include <linux/gpio/machine.h>
 #include <linux/mfd/intel_soc_pmic.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/pinctrl/machine.h>
 #include <linux/slab.h>
 
 #include <asm/intel-mid.h>
@@ -83,6 +86,12 @@ static struct gpio_map vlv_gpio_table[] = {
 	{ VLV_GPIO_NC_11_PANEL1_BKLTCTL },
 };
 
+struct i2c_adapter_lookup {
+	u16 slave_addr;
+	struct intel_dsi *intel_dsi;
+	acpi_handle dev_handle;
+};
+
 #define CHV_GPIO_IDX_START_N		0
 #define CHV_GPIO_IDX_START_E		73
 #define CHV_GPIO_IDX_START_SW		100
@@ -375,11 +384,98 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data)
 	return data;
 }
 
+static int i2c_adapter_lookup(struct acpi_resource *ares, void *data)
+{
+	struct i2c_adapter_lookup *lookup = data;
+	struct intel_dsi *intel_dsi = lookup->intel_dsi;
+	struct acpi_resource_i2c_serialbus *sb;
+	struct i2c_adapter *adapter;
+	acpi_handle adapter_handle;
+	acpi_status status;
+
+	if (intel_dsi->i2c_bus_num >= 0 ||
+	    !i2c_acpi_get_i2c_resource(ares, &sb))
+		return 1;
+
+	if (lookup->slave_addr != sb->slave_address)
+		return 1;
+
+	status = acpi_get_handle(lookup->dev_handle,
+				 sb->resource_source.string_ptr,
+				 &adapter_handle);
+	if (ACPI_FAILURE(status))
+		return 1;
+
+	adapter = i2c_acpi_find_adapter_by_handle(adapter_handle);
+	if (adapter)
+		intel_dsi->i2c_bus_num = adapter->nr;
+
+	return 1;
+}
+
 static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data)
 {
-	DRM_DEBUG_KMS("Skipping I2C element execution\n");
+	struct drm_device *drm_dev = intel_dsi->base.base.dev;
+	struct device *dev = &drm_dev->pdev->dev;
+	struct i2c_adapter *adapter;
+	struct acpi_device *acpi_dev;
+	struct list_head resource_list;
+	struct i2c_adapter_lookup lookup;
+	struct i2c_msg msg;
+	int ret;
+	u8 vbt_i2c_bus_num = *(data + 2);
+	u16 slave_addr = *(u16 *)(data + 3);
+	u8 reg_offset = *(data + 5);
+	u8 payload_size = *(data + 6);
+	u8 *payload_data;
+
+	if (intel_dsi->i2c_bus_num < 0) {
+		intel_dsi->i2c_bus_num = vbt_i2c_bus_num;
+
+		acpi_dev = ACPI_COMPANION(dev);
+		if (acpi_dev) {
+			memset(&lookup, 0, sizeof(lookup));
+			lookup.slave_addr = slave_addr;
+			lookup.intel_dsi = intel_dsi;
+			lookup.dev_handle = acpi_device_handle(acpi_dev);
+
+			INIT_LIST_HEAD(&resource_list);
+			acpi_dev_get_resources(acpi_dev, &resource_list,
+					       i2c_adapter_lookup,
+					       &lookup);
+			acpi_dev_free_resource_list(&resource_list);
+		}
+	}
 
-	return data + *(data + 6) + 7;
+	adapter = i2c_get_adapter(intel_dsi->i2c_bus_num);
+	if (!adapter) {
+		DRM_DEV_ERROR(dev, "Cannot find a valid i2c bus for xfer\n");
+		goto err_bus;
+	}
+
+	payload_data = kzalloc(payload_size + 1, GFP_KERNEL);
+	if (!payload_data)
+		goto err_alloc;
+
+	payload_data[0] = reg_offset;
+	memcpy(&payload_data[1], (data + 7), payload_size);
+
+	msg.addr = slave_addr;
+	msg.flags = 0;
+	msg.len = payload_size + 1;
+	msg.buf = payload_data;
+
+	ret = i2c_transfer(adapter, &msg, 1);
+	if (ret < 0)
+		DRM_DEV_ERROR(dev,
+			      "Failed to xfer payload of size (%u) to reg (%u)\n",
+			      payload_size, reg_offset);
+
+	kfree(payload_data);
+err_alloc:
+	i2c_put_adapter(adapter);
+err_bus:
+	return data + payload_size + 7;
 }
 
 static const u8 *mipi_exec_spi(struct intel_dsi *intel_dsi, const u8 *data)
@@ -453,8 +549,8 @@ static const char *sequence_name(enum mipi_seq seq_id)
 		return "(unknown)";
 }
 
-void intel_dsi_vbt_exec_sequence(struct intel_dsi *intel_dsi,
-				 enum mipi_seq seq_id)
+static void intel_dsi_vbt_exec(struct intel_dsi *intel_dsi,
+			       enum mipi_seq seq_id)
 {
 	struct drm_i915_private *dev_priv = to_i915(intel_dsi->base.base.dev);
 	const u8 *data;
@@ -519,6 +615,22 @@ void intel_dsi_vbt_exec_sequence(struct intel_dsi *intel_dsi,
 	}
 }
 
+void intel_dsi_vbt_exec_sequence(struct intel_dsi *intel_dsi,
+				 enum mipi_seq seq_id)
+{
+	if (seq_id == MIPI_SEQ_POWER_ON && intel_dsi->gpio_panel)
+		gpiod_set_value_cansleep(intel_dsi->gpio_panel, 1);
+	if (seq_id == MIPI_SEQ_BACKLIGHT_ON && intel_dsi->gpio_backlight)
+		gpiod_set_value_cansleep(intel_dsi->gpio_backlight, 1);
+
+	intel_dsi_vbt_exec(intel_dsi, seq_id);
+
+	if (seq_id == MIPI_SEQ_POWER_OFF && intel_dsi->gpio_panel)
+		gpiod_set_value_cansleep(intel_dsi->gpio_panel, 0);
+	if (seq_id == MIPI_SEQ_BACKLIGHT_OFF && intel_dsi->gpio_backlight)
+		gpiod_set_value_cansleep(intel_dsi->gpio_backlight, 0);
+}
+
 void intel_dsi_msleep(struct intel_dsi *intel_dsi, int msec)
 {
 	struct drm_i915_private *dev_priv = to_i915(intel_dsi->base.base.dev);
@@ -664,6 +776,8 @@ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id)
 	intel_dsi->panel_off_delay = pps->panel_off_delay / 10;
 	intel_dsi->panel_pwr_cycle_delay = pps->panel_power_cycle_delay / 10;
 
+	intel_dsi->i2c_bus_num = -1;
+
 	/* a regular driver would get the device in probe */
 	for_each_dsi_port(port, intel_dsi->ports) {
 		mipi_dsi_attach(intel_dsi->dsi_hosts[port]->device);
@@ -671,3 +785,110 @@ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id)
 
 	return true;
 }
+
+/*
+ * On some BYT/CHT devs some sequences are incomplete and we need to manually
+ * control some GPIOs. We need to add a GPIO lookup table before we get these.
+ * If the GOP did not initialize the panel (HDMI inserted) we may need to also
+ * change the pinmux for the SoC's PWM0 pin from GPIO to PWM.
+ */
+static struct gpiod_lookup_table pmic_panel_gpio_table = {
+	/* Intel GFX is consumer */
+	.dev_id = "0000:00:02.0",
+	.table = {
+		/* Panel EN/DISABLE */
+		GPIO_LOOKUP("gpio_crystalcove", 94, "panel", GPIO_ACTIVE_HIGH),
+		{ }
+	},
+};
+
+static struct gpiod_lookup_table soc_panel_gpio_table = {
+	.dev_id = "0000:00:02.0",
+	.table = {
+		GPIO_LOOKUP("INT33FC:01", 10, "backlight", GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP("INT33FC:01", 11, "panel", GPIO_ACTIVE_HIGH),
+		{ }
+	},
+};
+
+static const struct pinctrl_map soc_pwm_pinctrl_map[] = {
+	PIN_MAP_MUX_GROUP("0000:00:02.0", "soc_pwm0", "INT33FC:00",
+			  "pwm0_grp", "pwm"),
+};
+
+void intel_dsi_vbt_gpio_init(struct intel_dsi *intel_dsi, bool panel_is_on)
+{
+	struct drm_device *dev = intel_dsi->base.base.dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct mipi_config *mipi_config = dev_priv->vbt.dsi.config;
+	enum gpiod_flags flags = panel_is_on ? GPIOD_OUT_HIGH : GPIOD_OUT_LOW;
+	bool want_backlight_gpio = false;
+	bool want_panel_gpio = false;
+	struct pinctrl *pinctrl;
+	int ret;
+
+	if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) &&
+	    mipi_config->pwm_blc == PPS_BLC_PMIC) {
+		gpiod_add_lookup_table(&pmic_panel_gpio_table);
+		want_panel_gpio = true;
+	}
+
+	if (IS_VALLEYVIEW(dev_priv) && mipi_config->pwm_blc == PPS_BLC_SOC) {
+		gpiod_add_lookup_table(&soc_panel_gpio_table);
+		want_panel_gpio = true;
+		want_backlight_gpio = true;
+
+		/* Ensure PWM0 pin is muxed as PWM instead of GPIO */
+		ret = pinctrl_register_mappings(soc_pwm_pinctrl_map,
+					     ARRAY_SIZE(soc_pwm_pinctrl_map));
+		if (ret)
+			DRM_ERROR("Failed to register pwm0 pinmux mapping\n");
+
+		pinctrl = devm_pinctrl_get_select(dev->dev, "soc_pwm0");
+		if (IS_ERR(pinctrl))
+			DRM_ERROR("Failed to set pinmux to PWM\n");
+	}
+
+	if (want_panel_gpio) {
+		intel_dsi->gpio_panel = gpiod_get(dev->dev, "panel", flags);
+		if (IS_ERR(intel_dsi->gpio_panel)) {
+			DRM_ERROR("Failed to own gpio for panel control\n");
+			intel_dsi->gpio_panel = NULL;
+		}
+	}
+
+	if (want_backlight_gpio) {
+		intel_dsi->gpio_backlight =
+			gpiod_get(dev->dev, "backlight", flags);
+		if (IS_ERR(intel_dsi->gpio_backlight)) {
+			DRM_ERROR("Failed to own gpio for backlight control\n");
+			intel_dsi->gpio_backlight = NULL;
+		}
+	}
+}
+
+void intel_dsi_vbt_gpio_cleanup(struct intel_dsi *intel_dsi)
+{
+	struct drm_device *dev = intel_dsi->base.base.dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct mipi_config *mipi_config = dev_priv->vbt.dsi.config;
+
+	if (intel_dsi->gpio_panel) {
+		gpiod_put(intel_dsi->gpio_panel);
+		intel_dsi->gpio_panel = NULL;
+	}
+
+	if (intel_dsi->gpio_backlight) {
+		gpiod_put(intel_dsi->gpio_backlight);
+		intel_dsi->gpio_backlight = NULL;
+	}
+
+	if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) &&
+	    mipi_config->pwm_blc == PPS_BLC_PMIC)
+		gpiod_remove_lookup_table(&pmic_panel_gpio_table);
+
+	if (IS_VALLEYVIEW(dev_priv) && mipi_config->pwm_blc == PPS_BLC_SOC) {
+		pinctrl_unregister_mappings(soc_pwm_pinctrl_map);
+		gpiod_remove_lookup_table(&soc_panel_gpio_table);
+	}
+}
diff --git a/drivers/gpu/drm/i915/display/intel_dvo.c b/drivers/gpu/drm/i915/display/intel_dvo.c
index a74dc5b915d1..86a337c9d85d 100644
--- a/drivers/gpu/drm/i915/display/intel_dvo.c
+++ b/drivers/gpu/drm/i915/display/intel_dvo.c
@@ -125,7 +125,7 @@ static struct intel_dvo *enc_to_dvo(struct intel_encoder *encoder)
 	return container_of(encoder, struct intel_dvo, base);
 }
 
-static struct intel_dvo *intel_attached_dvo(struct drm_connector *connector)
+static struct intel_dvo *intel_attached_dvo(struct intel_connector *connector)
 {
 	return enc_to_dvo(intel_attached_encoder(connector));
 }
@@ -134,7 +134,7 @@ static bool intel_dvo_connector_get_hw_state(struct intel_connector *connector)
 {
 	struct drm_device *dev = connector->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_dvo *intel_dvo = intel_attached_dvo(&connector->base);
+	struct intel_dvo *intel_dvo = intel_attached_dvo(connector);
 	u32 tmp;
 
 	tmp = I915_READ(intel_dvo->dev.dvo_reg);
@@ -220,7 +220,7 @@ static enum drm_mode_status
 intel_dvo_mode_valid(struct drm_connector *connector,
 		     struct drm_display_mode *mode)
 {
-	struct intel_dvo *intel_dvo = intel_attached_dvo(connector);
+	struct intel_dvo *intel_dvo = intel_attached_dvo(to_intel_connector(connector));
 	const struct drm_display_mode *fixed_mode =
 		to_intel_connector(connector)->panel.fixed_mode;
 	int max_dotclk = to_i915(connector->dev)->max_dotclk_freq;
@@ -311,7 +311,7 @@ static void intel_dvo_pre_enable(struct intel_encoder *encoder,
 static enum drm_connector_status
 intel_dvo_detect(struct drm_connector *connector, bool force)
 {
-	struct intel_dvo *intel_dvo = intel_attached_dvo(connector);
+	struct intel_dvo *intel_dvo = intel_attached_dvo(to_intel_connector(connector));
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
 		      connector->base.id, connector->name);
 	return intel_dvo->dev.dev_ops->detect(&intel_dvo->dev);
diff --git a/drivers/gpu/drm/i915/display/intel_fifo_underrun.c b/drivers/gpu/drm/i915/display/intel_fifo_underrun.c
index ab61f88d1d33..6c83b350525d 100644
--- a/drivers/gpu/drm/i915/display/intel_fifo_underrun.c
+++ b/drivers/gpu/drm/i915/display/intel_fifo_underrun.c
@@ -126,8 +126,8 @@ static void i9xx_set_fifo_underrun_reporting(struct drm_device *dev,
 	}
 }
 
-static void ironlake_set_fifo_underrun_reporting(struct drm_device *dev,
-						 enum pipe pipe, bool enable)
+static void ilk_set_fifo_underrun_reporting(struct drm_device *dev,
+					    enum pipe pipe, bool enable)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	u32 bit = (pipe == PIPE_A) ?
@@ -139,7 +139,7 @@ static void ironlake_set_fifo_underrun_reporting(struct drm_device *dev,
 		ilk_disable_display_irq(dev_priv, bit);
 }
 
-static void ivybridge_check_fifo_underruns(struct intel_crtc *crtc)
+static void ivb_check_fifo_underruns(struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
@@ -157,9 +157,9 @@ static void ivybridge_check_fifo_underruns(struct intel_crtc *crtc)
 	DRM_ERROR("fifo underrun on pipe %c\n", pipe_name(pipe));
 }
 
-static void ivybridge_set_fifo_underrun_reporting(struct drm_device *dev,
-						  enum pipe pipe,
-						  bool enable, bool old)
+static void ivb_set_fifo_underrun_reporting(struct drm_device *dev,
+					    enum pipe pipe, bool enable,
+					    bool old)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	if (enable) {
@@ -180,8 +180,8 @@ static void ivybridge_set_fifo_underrun_reporting(struct drm_device *dev,
 	}
 }
 
-static void broadwell_set_fifo_underrun_reporting(struct drm_device *dev,
-						  enum pipe pipe, bool enable)
+static void bdw_set_fifo_underrun_reporting(struct drm_device *dev,
+					    enum pipe pipe, bool enable)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
 
@@ -264,11 +264,11 @@ static bool __intel_set_cpu_fifo_underrun_reporting(struct drm_device *dev,
 	if (HAS_GMCH(dev_priv))
 		i9xx_set_fifo_underrun_reporting(dev, pipe, enable, old);
 	else if (IS_GEN_RANGE(dev_priv, 5, 6))
-		ironlake_set_fifo_underrun_reporting(dev, pipe, enable);
+		ilk_set_fifo_underrun_reporting(dev, pipe, enable);
 	else if (IS_GEN(dev_priv, 7))
-		ivybridge_set_fifo_underrun_reporting(dev, pipe, enable, old);
+		ivb_set_fifo_underrun_reporting(dev, pipe, enable, old);
 	else if (INTEL_GEN(dev_priv) >= 8)
-		broadwell_set_fifo_underrun_reporting(dev, pipe, enable);
+		bdw_set_fifo_underrun_reporting(dev, pipe, enable);
 
 	return old;
 }
@@ -427,7 +427,7 @@ void intel_check_cpu_fifo_underruns(struct drm_i915_private *dev_priv)
 		if (HAS_GMCH(dev_priv))
 			i9xx_check_fifo_underruns(crtc);
 		else if (IS_GEN(dev_priv, 7))
-			ivybridge_check_fifo_underruns(crtc);
+			ivb_check_fifo_underruns(crtc);
 	}
 
 	spin_unlock_irq(&dev_priv->irq_lock);
diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c
index 685589064d10..93ac0f296852 100644
--- a/drivers/gpu/drm/i915/display/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/display/intel_hdmi.c
@@ -85,16 +85,17 @@ assert_hdmi_transcoder_func_disabled(struct drm_i915_private *dev_priv,
 	     "HDMI transcoder function enabled, expecting disabled\n");
 }
 
-struct intel_hdmi *enc_to_intel_hdmi(struct drm_encoder *encoder)
+struct intel_hdmi *enc_to_intel_hdmi(struct intel_encoder *encoder)
 {
 	struct intel_digital_port *intel_dig_port =
-		container_of(encoder, struct intel_digital_port, base.base);
+		container_of(&encoder->base, struct intel_digital_port,
+			     base.base);
 	return &intel_dig_port->hdmi;
 }
 
-static struct intel_hdmi *intel_attached_hdmi(struct drm_connector *connector)
+static struct intel_hdmi *intel_attached_hdmi(struct intel_connector *connector)
 {
-	return enc_to_intel_hdmi(&intel_attached_encoder(connector)->base);
+	return enc_to_intel_hdmi(intel_attached_encoder(connector));
 }
 
 static u32 g4x_infoframe_index(unsigned int type)
@@ -602,7 +603,7 @@ u32 intel_hdmi_infoframes_enabled(struct intel_encoder *encoder,
 				  const struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
 	u32 val, ret = 0;
 	int i;
 
@@ -646,7 +647,7 @@ static void intel_write_infoframe(struct intel_encoder *encoder,
 				  enum hdmi_infoframe_type type,
 				  const union hdmi_infoframe *frame)
 {
-	struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
 	u8 buffer[VIDEO_DIP_DATA_SIZE];
 	ssize_t len;
 
@@ -675,7 +676,7 @@ void intel_read_infoframe(struct intel_encoder *encoder,
 			  enum hdmi_infoframe_type type,
 			  union hdmi_infoframe *frame)
 {
-	struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
 	u8 buffer[VIDEO_DIP_DATA_SIZE];
 	int ret;
 
@@ -855,7 +856,7 @@ static void g4x_set_infoframes(struct intel_encoder *encoder,
 			       const struct drm_connector_state *conn_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
 	struct intel_hdmi *intel_hdmi = &intel_dig_port->hdmi;
 	i915_reg_t reg = VIDEO_DIP_CTL;
 	u32 val = I915_READ(reg);
@@ -1038,7 +1039,7 @@ static void ibx_set_infoframes(struct intel_encoder *encoder,
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc);
-	struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
 	struct intel_hdmi *intel_hdmi = &intel_dig_port->hdmi;
 	i915_reg_t reg = TVIDEO_DIP_CTL(intel_crtc->pipe);
 	u32 val = I915_READ(reg);
@@ -1097,7 +1098,7 @@ static void cpt_set_infoframes(struct intel_encoder *encoder,
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc);
-	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
+	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
 	i915_reg_t reg = TVIDEO_DIP_CTL(intel_crtc->pipe);
 	u32 val = I915_READ(reg);
 
@@ -1146,7 +1147,7 @@ static void vlv_set_infoframes(struct intel_encoder *encoder,
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc);
-	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
+	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
 	i915_reg_t reg = VLV_TVIDEO_DIP_CTL(intel_crtc->pipe);
 	u32 val = I915_READ(reg);
 	u32 port = VIDEO_DIP_PORT(encoder->port);
@@ -1737,7 +1738,7 @@ static void intel_hdmi_prepare(struct intel_encoder *encoder,
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
-	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
+	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
 	const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode;
 	u32 hdmi_val;
 
@@ -1774,7 +1775,7 @@ static bool intel_hdmi_get_hw_state(struct intel_encoder *encoder,
 				    enum pipe *pipe)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
+	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
 	intel_wakeref_t wakeref;
 	bool ret;
 
@@ -1793,7 +1794,7 @@ static bool intel_hdmi_get_hw_state(struct intel_encoder *encoder,
 static void intel_hdmi_get_config(struct intel_encoder *encoder,
 				  struct intel_crtc_state *pipe_config)
 {
-	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
+	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	u32 tmp, flags = 0;
@@ -1874,7 +1875,7 @@ static void g4x_enable_hdmi(struct intel_encoder *encoder,
 {
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
+	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
 	u32 temp;
 
 	temp = I915_READ(intel_hdmi->hdmi_reg);
@@ -1896,7 +1897,7 @@ static void ibx_enable_hdmi(struct intel_encoder *encoder,
 {
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
+	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
 	u32 temp;
 
 	temp = I915_READ(intel_hdmi->hdmi_reg);
@@ -1947,7 +1948,7 @@ static void cpt_enable_hdmi(struct intel_encoder *encoder,
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc);
-	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
+	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
 	enum pipe pipe = crtc->pipe;
 	u32 temp;
 
@@ -2007,7 +2008,7 @@ static void intel_disable_hdmi(struct intel_encoder *encoder,
 {
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
+	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
 	struct intel_digital_port *intel_dig_port =
 		hdmi_to_dig_port(intel_hdmi);
 	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc);
@@ -2160,7 +2161,7 @@ static enum drm_mode_status
 intel_hdmi_mode_valid(struct drm_connector *connector,
 		      struct drm_display_mode *mode)
 {
-	struct intel_hdmi *hdmi = intel_attached_hdmi(connector);
+	struct intel_hdmi *hdmi = intel_attached_hdmi(to_intel_connector(connector));
 	struct drm_device *dev = intel_hdmi_to_dev(hdmi);
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	enum drm_mode_status status;
@@ -2316,7 +2317,7 @@ static int intel_hdmi_compute_bpc(struct intel_encoder *encoder,
 				  struct intel_crtc_state *crtc_state,
 				  int clock, bool force_dvi)
 {
-	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
+	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
 	int bpc;
 
 	for (bpc = 12; bpc >= 10; bpc -= 2) {
@@ -2334,7 +2335,7 @@ static int intel_hdmi_compute_clock(struct intel_encoder *encoder,
 				    struct intel_crtc_state *crtc_state,
 				    bool force_dvi)
 {
-	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
+	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
 	const struct drm_display_mode *adjusted_mode =
 		&crtc_state->hw.adjusted_mode;
 	int bpc, clock = adjusted_mode->crtc_clock;
@@ -2404,7 +2405,7 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder,
 			      struct intel_crtc_state *pipe_config,
 			      struct drm_connector_state *conn_state)
 {
-	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
+	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
 	struct drm_connector *connector = conn_state->connector;
@@ -2496,7 +2497,7 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder,
 static void
 intel_hdmi_unset_edid(struct drm_connector *connector)
 {
-	struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector);
+	struct intel_hdmi *intel_hdmi = intel_attached_hdmi(to_intel_connector(connector));
 
 	intel_hdmi->has_hdmi_sink = false;
 	intel_hdmi->has_audio = false;
@@ -2512,7 +2513,7 @@ static void
 intel_hdmi_dp_dual_mode_detect(struct drm_connector *connector, bool has_edid)
 {
 	struct drm_i915_private *dev_priv = to_i915(connector->dev);
-	struct intel_hdmi *hdmi = intel_attached_hdmi(connector);
+	struct intel_hdmi *hdmi = intel_attached_hdmi(to_intel_connector(connector));
 	enum port port = hdmi_to_dig_port(hdmi)->base.port;
 	struct i2c_adapter *adapter =
 		intel_gmbus_get_adapter(dev_priv, hdmi->ddc_bus);
@@ -2559,7 +2560,7 @@ static bool
 intel_hdmi_set_edid(struct drm_connector *connector)
 {
 	struct drm_i915_private *dev_priv = to_i915(connector->dev);
-	struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector);
+	struct intel_hdmi *intel_hdmi = intel_attached_hdmi(to_intel_connector(connector));
 	intel_wakeref_t wakeref;
 	struct edid *edid;
 	bool connected = false;
@@ -2600,7 +2601,7 @@ intel_hdmi_detect(struct drm_connector *connector, bool force)
 {
 	enum drm_connector_status status = connector_status_disconnected;
 	struct drm_i915_private *dev_priv = to_i915(connector->dev);
-	struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector);
+	struct intel_hdmi *intel_hdmi = intel_attached_hdmi(to_intel_connector(connector));
 	struct intel_encoder *encoder = &hdmi_to_dig_port(intel_hdmi)->base;
 	intel_wakeref_t wakeref;
 
@@ -2663,7 +2664,7 @@ static void intel_hdmi_pre_enable(struct intel_encoder *encoder,
 				  const struct drm_connector_state *conn_state)
 {
 	struct intel_digital_port *intel_dig_port =
-		enc_to_dig_port(&encoder->base);
+		enc_to_dig_port(encoder);
 
 	intel_hdmi_prepare(encoder, pipe_config);
 
@@ -2676,7 +2677,7 @@ static void vlv_hdmi_pre_enable(struct intel_encoder *encoder,
 				const struct intel_crtc_state *pipe_config,
 				const struct drm_connector_state *conn_state)
 {
-	struct intel_digital_port *dport = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dport = enc_to_dig_port(encoder);
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 
 	vlv_phy_pre_encoder_enable(encoder, pipe_config);
@@ -2746,7 +2747,7 @@ static void chv_hdmi_pre_enable(struct intel_encoder *encoder,
 				const struct intel_crtc_state *pipe_config,
 				const struct drm_connector_state *conn_state)
 {
-	struct intel_digital_port *dport = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dport = enc_to_dig_port(encoder);
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 
@@ -2772,7 +2773,7 @@ static struct i2c_adapter *
 intel_hdmi_get_i2c_adapter(struct drm_connector *connector)
 {
 	struct drm_i915_private *dev_priv = to_i915(connector->dev);
-	struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector);
+	struct intel_hdmi *intel_hdmi = intel_attached_hdmi(to_intel_connector(connector));
 
 	return intel_gmbus_get_adapter(dev_priv, intel_hdmi->ddc_bus);
 }
@@ -2816,7 +2817,7 @@ intel_hdmi_connector_register(struct drm_connector *connector)
 
 static void intel_hdmi_destroy(struct drm_connector *connector)
 {
-	struct cec_notifier *n = intel_attached_hdmi(connector)->cec_notifier;
+	struct cec_notifier *n = intel_attached_hdmi(to_intel_connector(connector))->cec_notifier;
 
 	cec_notifier_conn_unregister(n);
 
@@ -2906,7 +2907,7 @@ bool intel_hdmi_handle_sink_scrambling(struct intel_encoder *encoder,
 				       bool scrambling)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
+	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
 	struct drm_scrambling *sink_scrambling =
 		&connector->display_info.hdmi.scdc.scrambling;
 	struct i2c_adapter *adapter =
diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.h b/drivers/gpu/drm/i915/display/intel_hdmi.h
index cf1ea5427639..d3659d0b408b 100644
--- a/drivers/gpu/drm/i915/display/intel_hdmi.h
+++ b/drivers/gpu/drm/i915/display/intel_hdmi.h
@@ -29,7 +29,7 @@ void intel_hdmi_init(struct drm_i915_private *dev_priv, i915_reg_t hdmi_reg,
 		     enum port port);
 void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port,
 			       struct intel_connector *intel_connector);
-struct intel_hdmi *enc_to_intel_hdmi(struct drm_encoder *encoder);
+struct intel_hdmi *enc_to_intel_hdmi(struct intel_encoder *encoder);
 int intel_hdmi_compute_config(struct intel_encoder *encoder,
 			      struct intel_crtc_state *pipe_config,
 			      struct drm_connector_state *conn_state);
diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.c b/drivers/gpu/drm/i915/display/intel_hotplug.c
index fc29046d48ea..99d3a3c7989e 100644
--- a/drivers/gpu/drm/i915/display/intel_hotplug.c
+++ b/drivers/gpu/drm/i915/display/intel_hotplug.c
@@ -302,7 +302,7 @@ intel_encoder_hotplug(struct intel_encoder *encoder,
 static bool intel_encoder_has_hpd_pulse(struct intel_encoder *encoder)
 {
 	return intel_encoder_is_dig_port(encoder) &&
-		enc_to_dig_port(&encoder->base)->hpd_pulse != NULL;
+		enc_to_dig_port(encoder)->hpd_pulse != NULL;
 }
 
 static void i915_digport_work_func(struct work_struct *work)
@@ -335,7 +335,7 @@ static void i915_digport_work_func(struct work_struct *work)
 		if (!long_hpd && !short_hpd)
 			continue;
 
-		dig_port = enc_to_dig_port(&encoder->base);
+		dig_port = enc_to_dig_port(encoder);
 
 		ret = dig_port->hpd_pulse(dig_port, long_hpd);
 		if (ret == IRQ_NONE) {
diff --git a/drivers/gpu/drm/i915/display/intel_lspcon.c b/drivers/gpu/drm/i915/display/intel_lspcon.c
index 5145ff8b962b..d807c5648c87 100644
--- a/drivers/gpu/drm/i915/display/intel_lspcon.c
+++ b/drivers/gpu/drm/i915/display/intel_lspcon.c
@@ -434,8 +434,8 @@ void lspcon_write_infoframe(struct intel_encoder *encoder,
 			    const void *frame, ssize_t len)
 {
 	bool ret;
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
-	struct intel_lspcon *lspcon = enc_to_intel_lspcon(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+	struct intel_lspcon *lspcon = enc_to_intel_lspcon(encoder);
 
 	/* LSPCON only needs AVI IF */
 	if (type != HDMI_INFOFRAME_TYPE_AVI)
@@ -472,7 +472,7 @@ void lspcon_set_infoframes(struct intel_encoder *encoder,
 	ssize_t ret;
 	union hdmi_infoframe frame;
 	u8 buf[VIDEO_DIP_DATA_SIZE];
-	struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base);
+	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
 	struct intel_lspcon *lspcon = &dig_port->lspcon;
 	const struct drm_display_mode *adjusted_mode =
 		&crtc_state->hw.adjusted_mode;
@@ -522,7 +522,7 @@ u32 lspcon_infoframes_enabled(struct intel_encoder *encoder,
 			      const struct intel_crtc_state *pipe_config)
 {
 	/* FIXME actually read this from the hw */
-	return enc_to_intel_lspcon(&encoder->base)->active;
+	return enc_to_intel_lspcon(encoder)->active;
 }
 
 void lspcon_resume(struct intel_lspcon *lspcon)
diff --git a/drivers/gpu/drm/i915/display/intel_pipe_crc.c b/drivers/gpu/drm/i915/display/intel_pipe_crc.c
index 2746512f4466..520408e83681 100644
--- a/drivers/gpu/drm/i915/display/intel_pipe_crc.c
+++ b/drivers/gpu/drm/i915/display/intel_pipe_crc.c
@@ -98,7 +98,7 @@ static int i9xx_pipe_crc_auto_source(struct drm_i915_private *dev_priv,
 			break;
 		case INTEL_OUTPUT_DP:
 		case INTEL_OUTPUT_EDP:
-			dig_port = enc_to_dig_port(&encoder->base);
+			dig_port = enc_to_dig_port(encoder);
 			switch (dig_port->base.port) {
 			case PORT_B:
 				*source = INTEL_PIPE_CRC_SOURCE_DP_B;
diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index 16e9ff47d519..89c9cf5f38d2 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -1523,3 +1523,27 @@ bool intel_psr_enabled(struct intel_dp *intel_dp)
 
 	return ret;
 }
+
+void intel_psr_atomic_check(struct drm_connector *connector,
+			    struct drm_connector_state *old_state,
+			    struct drm_connector_state *new_state)
+{
+	struct drm_i915_private *dev_priv = to_i915(connector->dev);
+	struct intel_connector *intel_connector;
+	struct intel_digital_port *dig_port;
+	struct drm_crtc_state *crtc_state;
+
+	if (!CAN_PSR(dev_priv) || !new_state->crtc ||
+	    dev_priv->psr.initially_probed)
+		return;
+
+	intel_connector = to_intel_connector(connector);
+	dig_port = enc_to_dig_port(intel_connector->encoder);
+	if (dev_priv->psr.dp != &dig_port->dp)
+		return;
+
+	crtc_state = drm_atomic_get_new_crtc_state(new_state->state,
+						   new_state->crtc);
+	crtc_state->mode_changed = true;
+	dev_priv->psr.initially_probed = true;
+}
diff --git a/drivers/gpu/drm/i915/display/intel_psr.h b/drivers/gpu/drm/i915/display/intel_psr.h
index 46e4de8b8cd5..c58a1d438808 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.h
+++ b/drivers/gpu/drm/i915/display/intel_psr.h
@@ -8,6 +8,8 @@
 
 #include "intel_frontbuffer.h"
 
+struct drm_connector;
+struct drm_connector_state;
 struct drm_i915_private;
 struct intel_crtc_state;
 struct intel_dp;
@@ -35,5 +37,8 @@ void intel_psr_short_pulse(struct intel_dp *intel_dp);
 int intel_psr_wait_for_idle(const struct intel_crtc_state *new_crtc_state,
 			    u32 *out_value);
 bool intel_psr_enabled(struct intel_dp *intel_dp);
+void intel_psr_atomic_check(struct drm_connector *connector,
+			    struct drm_connector_state *old_state,
+			    struct drm_connector_state *new_state);
 
 #endif /* __INTEL_PSR_H__ */
diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c
index 8758ee2a4442..e8819fd21e03 100644
--- a/drivers/gpu/drm/i915/display/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/display/intel_sdvo.c
@@ -180,7 +180,7 @@ static struct intel_sdvo *to_sdvo(struct intel_encoder *encoder)
 	return container_of(encoder, struct intel_sdvo, base);
 }
 
-static struct intel_sdvo *intel_attached_sdvo(struct drm_connector *connector)
+static struct intel_sdvo *intel_attached_sdvo(struct intel_connector *connector)
 {
 	return to_sdvo(intel_attached_encoder(connector));
 }
@@ -1551,7 +1551,7 @@ static bool intel_sdvo_connector_get_hw_state(struct intel_connector *connector)
 {
 	struct intel_sdvo_connector *intel_sdvo_connector =
 		to_intel_sdvo_connector(&connector->base);
-	struct intel_sdvo *intel_sdvo = intel_attached_sdvo(&connector->base);
+	struct intel_sdvo *intel_sdvo = intel_attached_sdvo(connector);
 	u16 active_outputs = 0;
 
 	intel_sdvo_get_active_outputs(intel_sdvo, &active_outputs);
@@ -1823,7 +1823,7 @@ static enum drm_mode_status
 intel_sdvo_mode_valid(struct drm_connector *connector,
 		      struct drm_display_mode *mode)
 {
-	struct intel_sdvo *intel_sdvo = intel_attached_sdvo(connector);
+	struct intel_sdvo *intel_sdvo = intel_attached_sdvo(to_intel_connector(connector));
 	struct intel_sdvo_connector *intel_sdvo_connector =
 		to_intel_sdvo_connector(connector);
 	int max_dotclk = to_i915(connector->dev)->max_dotclk_freq;
@@ -1941,7 +1941,7 @@ intel_sdvo_multifunc_encoder(struct intel_sdvo *intel_sdvo)
 static struct edid *
 intel_sdvo_get_edid(struct drm_connector *connector)
 {
-	struct intel_sdvo *sdvo = intel_attached_sdvo(connector);
+	struct intel_sdvo *sdvo = intel_attached_sdvo(to_intel_connector(connector));
 	return drm_get_edid(connector, &sdvo->ddc);
 }
 
@@ -1959,7 +1959,7 @@ intel_sdvo_get_analog_edid(struct drm_connector *connector)
 static enum drm_connector_status
 intel_sdvo_tmds_sink_detect(struct drm_connector *connector)
 {
-	struct intel_sdvo *intel_sdvo = intel_attached_sdvo(connector);
+	struct intel_sdvo *intel_sdvo = intel_attached_sdvo(to_intel_connector(connector));
 	struct intel_sdvo_connector *intel_sdvo_connector =
 		to_intel_sdvo_connector(connector);
 	enum drm_connector_status status;
@@ -2028,7 +2028,7 @@ static enum drm_connector_status
 intel_sdvo_detect(struct drm_connector *connector, bool force)
 {
 	u16 response;
-	struct intel_sdvo *intel_sdvo = intel_attached_sdvo(connector);
+	struct intel_sdvo *intel_sdvo = intel_attached_sdvo(to_intel_connector(connector));
 	struct intel_sdvo_connector *intel_sdvo_connector = to_intel_sdvo_connector(connector);
 	enum drm_connector_status ret;
 
@@ -2175,7 +2175,7 @@ static const struct drm_display_mode sdvo_tv_modes[] = {
 
 static void intel_sdvo_get_tv_modes(struct drm_connector *connector)
 {
-	struct intel_sdvo *intel_sdvo = intel_attached_sdvo(connector);
+	struct intel_sdvo *intel_sdvo = intel_attached_sdvo(to_intel_connector(connector));
 	const struct drm_connector_state *conn_state = connector->state;
 	struct intel_sdvo_sdtv_resolution_request tv_res;
 	u32 reply = 0, format_map = 0;
@@ -2215,7 +2215,7 @@ static void intel_sdvo_get_tv_modes(struct drm_connector *connector)
 
 static void intel_sdvo_get_lvds_modes(struct drm_connector *connector)
 {
-	struct intel_sdvo *intel_sdvo = intel_attached_sdvo(connector);
+	struct intel_sdvo *intel_sdvo = intel_attached_sdvo(to_intel_connector(connector));
 	struct drm_i915_private *dev_priv = to_i915(connector->dev);
 	struct drm_display_mode *newmode;
 
@@ -2379,7 +2379,7 @@ intel_sdvo_connector_atomic_set_property(struct drm_connector *connector,
 static int
 intel_sdvo_connector_register(struct drm_connector *connector)
 {
-	struct intel_sdvo *sdvo = intel_attached_sdvo(connector);
+	struct intel_sdvo *sdvo = intel_attached_sdvo(to_intel_connector(connector));
 	int ret;
 
 	ret = intel_connector_register(connector);
@@ -2394,7 +2394,7 @@ intel_sdvo_connector_register(struct drm_connector *connector)
 static void
 intel_sdvo_connector_unregister(struct drm_connector *connector)
 {
-	struct intel_sdvo *sdvo = intel_attached_sdvo(connector);
+	struct intel_sdvo *sdvo = intel_attached_sdvo(to_intel_connector(connector));
 
 	sysfs_remove_link(&connector->kdev->kobj,
 			  sdvo->ddc.dev.kobj.name);
@@ -2932,7 +2932,7 @@ static void intel_sdvo_output_cleanup(struct intel_sdvo *intel_sdvo)
 
 	list_for_each_entry_safe(connector, tmp,
 				 &dev->mode_config.connector_list, head) {
-		if (intel_attached_encoder(connector) == &intel_sdvo->base) {
+		if (intel_attached_encoder(to_intel_connector(connector)) == &intel_sdvo->base) {
 			drm_connector_unregister(connector);
 			intel_connector_destroy(connector);
 		}
diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c
index 3f7b8f2ff671..fca77ec1e0dd 100644
--- a/drivers/gpu/drm/i915/display/intel_sprite.c
+++ b/drivers/gpu/drm/i915/display/intel_sprite.c
@@ -583,15 +583,16 @@ skl_program_plane(struct intel_plane *plane,
 	const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
 	u32 surf_addr = plane_state->color_plane[color_plane].offset;
 	u32 stride = skl_plane_stride(plane_state, color_plane);
-	u32 aux_dist = plane_state->color_plane[1].offset - surf_addr;
-	u32 aux_stride = skl_plane_stride(plane_state, 1);
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
+	int aux_plane = intel_main_to_aux_plane(fb, color_plane);
+	u32 aux_dist = plane_state->color_plane[aux_plane].offset - surf_addr;
+	u32 aux_stride = skl_plane_stride(plane_state, aux_plane);
 	int crtc_x = plane_state->uapi.dst.x1;
 	int crtc_y = plane_state->uapi.dst.y1;
 	u32 x = plane_state->color_plane[color_plane].x;
 	u32 y = plane_state->color_plane[color_plane].y;
 	u32 src_w = drm_rect_width(&plane_state->uapi.src) >> 16;
 	u32 src_h = drm_rect_height(&plane_state->uapi.src) >> 16;
-	const struct drm_framebuffer *fb = plane_state->hw.fb;
 	u8 alpha = plane_state->hw.alpha >> 8;
 	u32 plane_color_ctl = 0;
 	unsigned long irqflags;
@@ -2106,7 +2107,8 @@ static int skl_plane_check_fb(const struct intel_crtc_state *crtc_state,
 	     fb->modifier == I915_FORMAT_MOD_Yf_TILED ||
 	     fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
 	     fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS ||
-	     fb->modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS)) {
+	     fb->modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS ||
+	     fb->modifier == I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS)) {
 		DRM_DEBUG_KMS("Y/Yf tiling not supported in IF-ID mode\n");
 		return -EINVAL;
 	}
@@ -2578,7 +2580,16 @@ static const u64 skl_plane_format_modifiers_ccs[] = {
 	DRM_FORMAT_MOD_INVALID
 };
 
-static const u64 gen12_plane_format_modifiers_ccs[] = {
+static const u64 gen12_plane_format_modifiers_mc_ccs[] = {
+	I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS,
+	I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS,
+	I915_FORMAT_MOD_Y_TILED,
+	I915_FORMAT_MOD_X_TILED,
+	DRM_FORMAT_MOD_LINEAR,
+	DRM_FORMAT_MOD_INVALID
+};
+
+static const u64 gen12_plane_format_modifiers_rc_ccs[] = {
 	I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS,
 	I915_FORMAT_MOD_Y_TILED,
 	I915_FORMAT_MOD_X_TILED,
@@ -2743,10 +2754,21 @@ static bool skl_plane_format_mod_supported(struct drm_plane *_plane,
 	}
 }
 
+static bool gen12_plane_supports_mc_ccs(enum plane_id plane_id)
+{
+	return plane_id < PLANE_SPRITE4;
+}
+
 static bool gen12_plane_format_mod_supported(struct drm_plane *_plane,
 					     u32 format, u64 modifier)
 {
+	struct intel_plane *plane = to_intel_plane(_plane);
+
 	switch (modifier) {
+	case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS:
+		if (!gen12_plane_supports_mc_ccs(plane->id))
+			return false;
+		/* fall through */
 	case DRM_FORMAT_MOD_LINEAR:
 	case I915_FORMAT_MOD_X_TILED:
 	case I915_FORMAT_MOD_Y_TILED:
@@ -2764,11 +2786,6 @@ static bool gen12_plane_format_mod_supported(struct drm_plane *_plane,
 		if (is_ccs_modifier(modifier))
 			return true;
 		/* fall through */
-	case DRM_FORMAT_RGB565:
-	case DRM_FORMAT_XRGB2101010:
-	case DRM_FORMAT_XBGR2101010:
-	case DRM_FORMAT_ARGB2101010:
-	case DRM_FORMAT_ABGR2101010:
 	case DRM_FORMAT_YUYV:
 	case DRM_FORMAT_YVYU:
 	case DRM_FORMAT_UYVY:
@@ -2777,6 +2794,14 @@ static bool gen12_plane_format_mod_supported(struct drm_plane *_plane,
 	case DRM_FORMAT_P010:
 	case DRM_FORMAT_P012:
 	case DRM_FORMAT_P016:
+		if (modifier == I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS)
+			return true;
+		/* fall through */
+	case DRM_FORMAT_RGB565:
+	case DRM_FORMAT_XRGB2101010:
+	case DRM_FORMAT_XBGR2101010:
+	case DRM_FORMAT_ARGB2101010:
+	case DRM_FORMAT_ABGR2101010:
 	case DRM_FORMAT_XVYU2101010:
 	case DRM_FORMAT_C8:
 	case DRM_FORMAT_XBGR16161616F:
@@ -2910,6 +2935,14 @@ static const u32 *icl_get_plane_formats(struct drm_i915_private *dev_priv,
 	}
 }
 
+static const u64 *gen12_get_plane_modifiers(enum plane_id plane_id)
+{
+	if (gen12_plane_supports_mc_ccs(plane_id))
+		return gen12_plane_format_modifiers_mc_ccs;
+	else
+		return gen12_plane_format_modifiers_rc_ccs;
+}
+
 static bool skl_plane_has_ccs(struct drm_i915_private *dev_priv,
 			      enum pipe pipe, enum plane_id plane_id)
 {
@@ -2975,7 +3008,7 @@ skl_universal_plane_create(struct drm_i915_private *dev_priv,
 
 	plane->has_ccs = skl_plane_has_ccs(dev_priv, pipe, plane_id);
 	if (INTEL_GEN(dev_priv) >= 12) {
-		modifiers = gen12_plane_format_modifiers_ccs;
+		modifiers = gen12_get_plane_modifiers(plane_id);
 		plane_funcs = &gen12_plane_funcs;
 	} else {
 		if (plane->has_ccs)
diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c
index 50703536436c..c75e0ceecee6 100644
--- a/drivers/gpu/drm/i915/display/intel_tv.c
+++ b/drivers/gpu/drm/i915/display/intel_tv.c
@@ -898,7 +898,7 @@ static struct intel_tv *enc_to_tv(struct intel_encoder *encoder)
 	return container_of(encoder, struct intel_tv, base);
 }
 
-static struct intel_tv *intel_attached_tv(struct drm_connector *connector)
+static struct intel_tv *intel_attached_tv(struct intel_connector *connector)
 {
 	return enc_to_tv(intel_attached_encoder(connector));
 }
@@ -1527,7 +1527,7 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder,
 			   ((video_levels->black << TV_BLACK_LEVEL_SHIFT) |
 			    (video_levels->blank << TV_BLANK_LEVEL_SHIFT)));
 
-	assert_pipe_disabled(dev_priv, intel_crtc->pipe);
+	assert_pipe_disabled(dev_priv, pipe_config->cpu_transcoder);
 
 	/* Filter ctl must be set before TV_WIN_SIZE */
 	tv_filter_ctl = TV_AUTO_SCALE;
@@ -1662,7 +1662,7 @@ intel_tv_detect_type(struct intel_tv *intel_tv,
  */
 static void intel_tv_find_better_format(struct drm_connector *connector)
 {
-	struct intel_tv *intel_tv = intel_attached_tv(connector);
+	struct intel_tv *intel_tv = intel_attached_tv(to_intel_connector(connector));
 	const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state);
 	int i;
 
@@ -1689,7 +1689,7 @@ intel_tv_detect(struct drm_connector *connector,
 		struct drm_modeset_acquire_ctx *ctx,
 		bool force)
 {
-	struct intel_tv *intel_tv = intel_attached_tv(connector);
+	struct intel_tv *intel_tv = intel_attached_tv(to_intel_connector(connector));
 	enum drm_connector_status status;
 	int type;
 
diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c
index 6bab08db5d75..9e6aaa302e40 100644
--- a/drivers/gpu/drm/i915/display/intel_vdsc.c
+++ b/drivers/gpu/drm/i915/display/intel_vdsc.c
@@ -943,7 +943,7 @@ static void intel_dsc_dsi_pps_write(struct intel_encoder *encoder,
 				    const struct intel_crtc_state *crtc_state)
 {
 	const struct drm_dsc_config *vdsc_cfg = &crtc_state->dsc.config;
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	struct mipi_dsi_device *dsi;
 	struct drm_dsc_picture_parameter_set pps;
 	enum port port;
@@ -961,7 +961,7 @@ static void intel_dsc_dsi_pps_write(struct intel_encoder *encoder,
 static void intel_dsc_dp_pps_write(struct intel_encoder *encoder,
 				   const struct intel_crtc_state *crtc_state)
 {
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
 	const struct drm_dsc_config *vdsc_cfg = &crtc_state->dsc.config;
 	struct drm_dsc_pps_infoframe dp_dsc_pps_sdp;
diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c
index 21e820299107..daf4fc3dab6f 100644
--- a/drivers/gpu/drm/i915/display/vlv_dsi.c
+++ b/drivers/gpu/drm/i915/display/vlv_dsi.c
@@ -23,7 +23,6 @@
  * Author: Jani Nikula <jani.nikula@intel.com>
  */
 
-#include <linux/gpio/consumer.h>
 #include <linux/slab.h>
 
 #include <drm/drm_atomic_helper.h>
@@ -319,7 +318,7 @@ static int intel_dsi_compute_config(struct intel_encoder *encoder,
 static bool glk_dsi_enable_io(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 	u32 tmp;
 	bool cold_boot = false;
@@ -367,7 +366,7 @@ static bool glk_dsi_enable_io(struct intel_encoder *encoder)
 static void glk_dsi_device_ready(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 	u32 val;
 
@@ -438,7 +437,7 @@ static void glk_dsi_device_ready(struct intel_encoder *encoder)
 static void bxt_dsi_device_ready(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 	u32 val;
 
@@ -465,7 +464,7 @@ static void bxt_dsi_device_ready(struct intel_encoder *encoder)
 static void vlv_dsi_device_ready(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 	u32 val;
 
@@ -516,7 +515,7 @@ static void intel_dsi_device_ready(struct intel_encoder *encoder)
 static void glk_dsi_enter_low_power_mode(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 	u32 val;
 
@@ -546,7 +545,7 @@ static void glk_dsi_enter_low_power_mode(struct intel_encoder *encoder)
 static void glk_dsi_disable_mipi_io(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 	u32 tmp;
 
@@ -579,7 +578,7 @@ static void glk_dsi_clear_device_ready(struct intel_encoder *encoder)
 static void vlv_dsi_clear_device_ready(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 
 	DRM_DEBUG_KMS("\n");
@@ -625,7 +624,7 @@ static void intel_dsi_port_enable(struct intel_encoder *encoder,
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 
 	if (intel_dsi->dual_link == DSI_DUAL_LINK_FRONT_BACK) {
@@ -681,7 +680,7 @@ static void intel_dsi_port_disable(struct intel_encoder *encoder)
 {
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 
 	for_each_dsi_port(port, intel_dsi->ports) {
@@ -745,7 +744,7 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder,
 				 const struct intel_crtc_state *pipe_config,
 				 const struct drm_connector_state *conn_state)
 {
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	struct drm_crtc *crtc = pipe_config->uapi.crtc;
 	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
@@ -793,9 +792,6 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder,
 	if (!IS_GEMINILAKE(dev_priv))
 		intel_dsi_prepare(encoder, pipe_config);
 
-	/* Power on, try both CRC pmic gpio and VBT */
-	if (intel_dsi->gpio_panel)
-		gpiod_set_value_cansleep(intel_dsi->gpio_panel, 1);
 	intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_POWER_ON);
 	intel_dsi_msleep(intel_dsi, intel_dsi->panel_on_delay);
 
@@ -850,7 +846,7 @@ static void intel_dsi_disable(struct intel_encoder *encoder,
 			      const struct intel_crtc_state *old_crtc_state,
 			      const struct drm_connector_state *old_conn_state)
 {
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 
 	DRM_DEBUG_KMS("\n");
@@ -886,7 +882,7 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder,
 				   const struct drm_connector_state *old_conn_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 	u32 val;
 
@@ -895,7 +891,7 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder,
 	if (IS_GEN9_LP(dev_priv)) {
 		intel_crtc_vblank_off(old_crtc_state);
 
-		skylake_scaler_disable(old_crtc_state);
+		skl_scaler_disable(old_crtc_state);
 	}
 
 	if (is_vid_mode(intel_dsi)) {
@@ -945,11 +941,8 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder,
 	/* Assert reset */
 	intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_ASSERT_RESET);
 
-	/* Power off, try both CRC pmic gpio and VBT */
 	intel_dsi_msleep(intel_dsi, intel_dsi->panel_off_delay);
 	intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_POWER_OFF);
-	if (intel_dsi->gpio_panel)
-		gpiod_set_value_cansleep(intel_dsi->gpio_panel, 0);
 
 	/*
 	 * FIXME As we do with eDP, just make a note of the time here
@@ -962,7 +955,7 @@ static bool intel_dsi_get_hw_state(struct intel_encoder *encoder,
 				   enum pipe *pipe)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	intel_wakeref_t wakeref;
 	enum port port;
 	bool active = false;
@@ -1041,7 +1034,7 @@ static void bxt_dsi_get_pipe_config(struct intel_encoder *encoder,
 					&pipe_config->hw.adjusted_mode;
 	struct drm_display_mode *adjusted_mode_sw;
 	struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	unsigned int lane_count = intel_dsi->lane_count;
 	unsigned int bpp, fmt;
 	enum port port;
@@ -1234,7 +1227,7 @@ static void set_dsi_timings(struct drm_encoder *encoder,
 {
 	struct drm_device *dev = encoder->dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(to_intel_encoder(encoder));
 	enum port port;
 	unsigned int bpp = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format);
 	unsigned int lane_count = intel_dsi->lane_count;
@@ -1322,7 +1315,7 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder,
 	struct drm_device *dev = encoder->dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(to_intel_encoder(encoder));
 	const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
 	enum port port;
 	unsigned int bpp = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format);
@@ -1512,7 +1505,7 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder,
 static void intel_dsi_unprepare(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 	u32 val;
 
@@ -1539,12 +1532,9 @@ static void intel_dsi_unprepare(struct intel_encoder *encoder)
 
 static void intel_dsi_encoder_destroy(struct drm_encoder *encoder)
 {
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
-
-	/* dispose of the gpios */
-	if (intel_dsi->gpio_panel)
-		gpiod_put(intel_dsi->gpio_panel);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(to_intel_encoder(encoder));
 
+	intel_dsi_vbt_gpio_cleanup(intel_dsi);
 	intel_encoder_destroy(encoder);
 }
 
@@ -1825,6 +1815,7 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv)
 	struct drm_connector *connector;
 	struct drm_display_mode *current_mode, *fixed_mode;
 	enum port port;
+	enum pipe pipe;
 
 	DRM_DEBUG_KMS("\n");
 
@@ -1923,20 +1914,8 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv)
 
 	vlv_dphy_param_init(intel_dsi);
 
-	/*
-	 * In case of BYT with CRC PMIC, we need to use GPIO for
-	 * Panel control.
-	 */
-	if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) &&
-	    (dev_priv->vbt.dsi.config->pwm_blc == PPS_BLC_PMIC)) {
-		intel_dsi->gpio_panel =
-			gpiod_get(dev->dev, "panel", GPIOD_OUT_HIGH);
-
-		if (IS_ERR(intel_dsi->gpio_panel)) {
-			DRM_ERROR("Failed to own gpio for panel control\n");
-			intel_dsi->gpio_panel = NULL;
-		}
-	}
+	intel_dsi_vbt_gpio_init(intel_dsi,
+				intel_dsi_get_hw_state(intel_encoder, &pipe));
 
 	drm_connector_init(dev, connector, &intel_dsi_connector_funcs,
 			   DRM_MODE_CONNECTOR_DSI);
diff --git a/drivers/gpu/drm/i915/display/vlv_dsi_pll.c b/drivers/gpu/drm/i915/display/vlv_dsi_pll.c
index 95f39cd0ce02..6b89e67b120f 100644
--- a/drivers/gpu/drm/i915/display/vlv_dsi_pll.c
+++ b/drivers/gpu/drm/i915/display/vlv_dsi_pll.c
@@ -117,7 +117,7 @@ int vlv_dsi_pll_compute(struct intel_encoder *encoder,
 			struct intel_crtc_state *config)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	int ret;
 	u32 dsi_clk;
 
@@ -255,7 +255,7 @@ u32 vlv_dsi_get_pclk(struct intel_encoder *encoder,
 		     struct intel_crtc_state *config)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	int bpp = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format);
 	u32 dsi_clock, pclk;
 	u32 pll_ctl, pll_div;
@@ -321,7 +321,7 @@ u32 bxt_dsi_get_pclk(struct intel_encoder *encoder,
 	u32 pclk;
 	u32 dsi_clk;
 	u32 dsi_ratio;
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	int bpp = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format);
 
@@ -341,7 +341,7 @@ void vlv_dsi_reset_clocks(struct intel_encoder *encoder, enum port port)
 {
 	u32 temp;
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 
 	temp = I915_READ(MIPI_CTRL(port));
 	temp &= ~ESCAPE_CLOCK_DIVIDER_MASK;
@@ -455,7 +455,7 @@ int bxt_dsi_pll_compute(struct intel_encoder *encoder,
 			struct intel_crtc_state *config)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	u8 dsi_ratio, dsi_ratio_min, dsi_ratio_max;
 	u32 dsi_clk;
 
@@ -503,7 +503,7 @@ void bxt_dsi_pll_enable(struct intel_encoder *encoder,
 			const struct intel_crtc_state *config)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 	u32 val;
 
diff --git a/drivers/gpu/drm/i915/gem/Makefile b/drivers/gpu/drm/i915/gem/Makefile
deleted file mode 100644
index 7e73aa587967..000000000000
--- a/drivers/gpu/drm/i915/gem/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-# For building individual subdir files on the command line
-subdir-ccflags-y += -I$(srctree)/$(src)/..
-
-# Extra header tests
-header-test-pattern-$(CONFIG_DRM_I915_WERROR) := *.h
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index dc90b044a217..a2e57e62af30 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -69,6 +69,7 @@
 
 #include <drm/i915_drm.h>
 
+#include "gt/gen6_ppgtt.h"
 #include "gt/intel_context.h"
 #include "gt/intel_engine_heartbeat.h"
 #include "gt/intel_engine_pm.h"
@@ -705,7 +706,7 @@ i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags)
 	if (HAS_FULL_PPGTT(i915)) {
 		struct i915_ppgtt *ppgtt;
 
-		ppgtt = i915_ppgtt_create(i915);
+		ppgtt = i915_ppgtt_create(&i915->gt);
 		if (IS_ERR(ppgtt)) {
 			DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n",
 					 PTR_ERR(ppgtt));
@@ -760,12 +761,6 @@ void i915_gem_driver_release__contexts(struct drm_i915_private *i915)
 	flush_work(&i915->gem.contexts.free_work);
 }
 
-static int context_idr_cleanup(int id, void *p, void *data)
-{
-	context_close(p);
-	return 0;
-}
-
 static int vm_idr_cleanup(int id, void *p, void *data)
 {
 	i915_vm_put(p);
@@ -773,7 +768,8 @@ static int vm_idr_cleanup(int id, void *p, void *data)
 }
 
 static int gem_context_register(struct i915_gem_context *ctx,
-				struct drm_i915_file_private *fpriv)
+				struct drm_i915_file_private *fpriv,
+				u32 *id)
 {
 	struct i915_address_space *vm;
 	int ret;
@@ -791,14 +787,10 @@ static int gem_context_register(struct i915_gem_context *ctx,
 		 current->comm, pid_nr(ctx->pid));
 
 	/* And finally expose ourselves to userspace via the idr */
-	mutex_lock(&fpriv->context_idr_lock);
-	ret = idr_alloc(&fpriv->context_idr, ctx, 0, 0, GFP_KERNEL);
-	mutex_unlock(&fpriv->context_idr_lock);
-	if (ret >= 0)
-		goto out;
+	ret = xa_alloc(&fpriv->context_xa, id, ctx, xa_limit_32b, GFP_KERNEL);
+	if (ret)
+		put_pid(fetch_and_zero(&ctx->pid));
 
-	put_pid(fetch_and_zero(&ctx->pid));
-out:
 	return ret;
 }
 
@@ -808,11 +800,11 @@ int i915_gem_context_open(struct drm_i915_private *i915,
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	struct i915_gem_context *ctx;
 	int err;
+	u32 id;
 
-	mutex_init(&file_priv->context_idr_lock);
-	mutex_init(&file_priv->vm_idr_lock);
+	xa_init_flags(&file_priv->context_xa, XA_FLAGS_ALLOC);
 
-	idr_init(&file_priv->context_idr);
+	mutex_init(&file_priv->vm_idr_lock);
 	idr_init_base(&file_priv->vm_idr, 1);
 
 	ctx = i915_gem_create_context(i915, 0);
@@ -821,21 +813,19 @@ int i915_gem_context_open(struct drm_i915_private *i915,
 		goto err;
 	}
 
-	err = gem_context_register(ctx, file_priv);
+	err = gem_context_register(ctx, file_priv, &id);
 	if (err < 0)
 		goto err_ctx;
 
-	GEM_BUG_ON(err > 0);
-
+	GEM_BUG_ON(id);
 	return 0;
 
 err_ctx:
 	context_close(ctx);
 err:
 	idr_destroy(&file_priv->vm_idr);
-	idr_destroy(&file_priv->context_idr);
+	xa_destroy(&file_priv->context_xa);
 	mutex_destroy(&file_priv->vm_idr_lock);
-	mutex_destroy(&file_priv->context_idr_lock);
 	return err;
 }
 
@@ -843,10 +833,12 @@ void i915_gem_context_close(struct drm_file *file)
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	struct drm_i915_private *i915 = file_priv->dev_priv;
+	struct i915_gem_context *ctx;
+	unsigned long idx;
 
-	idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL);
-	idr_destroy(&file_priv->context_idr);
-	mutex_destroy(&file_priv->context_idr_lock);
+	xa_for_each(&file_priv->context_xa, idx, ctx)
+		context_close(ctx);
+	xa_destroy(&file_priv->context_xa);
 
 	idr_for_each(&file_priv->vm_idr, vm_idr_cleanup, NULL);
 	idr_destroy(&file_priv->vm_idr);
@@ -870,7 +862,7 @@ int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
 	if (args->flags)
 		return -EINVAL;
 
-	ppgtt = i915_ppgtt_create(i915);
+	ppgtt = i915_ppgtt_create(&i915->gt);
 	if (IS_ERR(ppgtt))
 		return PTR_ERR(ppgtt);
 
@@ -1244,12 +1236,14 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
 	 * image, or into the registers directory, does not stick). Pristine
 	 * and idle contexts will be configured on pinning.
 	 */
-	if (!intel_context_is_pinned(ce))
+	if (!intel_context_pin_if_active(ce))
 		return 0;
 
 	rq = intel_engine_create_kernel_request(ce->engine);
-	if (IS_ERR(rq))
-		return PTR_ERR(rq);
+	if (IS_ERR(rq)) {
+		ret = PTR_ERR(rq);
+		goto out_unpin;
+	}
 
 	/* Serialise with the remote context */
 	ret = intel_context_prepare_remote_request(ce, rq);
@@ -1257,6 +1251,8 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
 		ret = gen8_emit_rpcs_config(rq, ce, sseu);
 
 	i915_request_add(rq);
+out_unpin:
+	intel_context_unpin(ce);
 	return ret;
 }
 
@@ -2187,6 +2183,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 	struct drm_i915_gem_context_create_ext *args = data;
 	struct create_ext ext_data;
 	int ret;
+	u32 id;
 
 	if (!DRIVER_CAPS(i915)->has_logical_contexts)
 		return -ENODEV;
@@ -2218,11 +2215,11 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 			goto err_ctx;
 	}
 
-	ret = gem_context_register(ext_data.ctx, ext_data.fpriv);
+	ret = gem_context_register(ext_data.ctx, ext_data.fpriv, &id);
 	if (ret < 0)
 		goto err_ctx;
 
-	args->ctx_id = ret;
+	args->ctx_id = id;
 	DRM_DEBUG("HW context %d created\n", args->ctx_id);
 
 	return 0;
@@ -2245,11 +2242,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
 	if (!args->ctx_id)
 		return -ENOENT;
 
-	if (mutex_lock_interruptible(&file_priv->context_idr_lock))
-		return -EINTR;
-
-	ctx = idr_remove(&file_priv->context_idr, args->ctx_id);
-	mutex_unlock(&file_priv->context_idr_lock);
+	ctx = xa_erase(&file_priv->context_xa, args->ctx_id);
 	if (!ctx)
 		return -ENOENT;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index 14f3cc1b7583..3ae61a355d87 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -13,7 +13,6 @@
 
 #include "i915_drv.h"
 #include "i915_gem.h"
-#include "i915_gem_gtt.h"
 #include "i915_scheduler.h"
 #include "intel_device_info.h"
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index cbd2bcade3c8..d5a0f5ae4a8b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2173,7 +2173,7 @@ static int eb_submit(struct i915_execbuffer *eb)
 	}
 
 	if (intel_context_nopreempt(eb->context))
-		eb->request->flags |= I915_REQUEST_NOPREEMPT;
+		__set_bit(I915_FENCE_FLAG_NOPREEMPT, &eb->request->fence.flags);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
index 520cc9cac471..70543c83df06 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
@@ -16,46 +16,6 @@ const struct drm_i915_gem_object_ops i915_gem_lmem_obj_ops = {
 	.release = i915_gem_object_release_memory_region,
 };
 
-/* XXX: Time to vfunc your life up? */
-void __iomem *
-i915_gem_object_lmem_io_map_page(struct drm_i915_gem_object *obj,
-				 unsigned long n)
-{
-	resource_size_t offset;
-
-	offset = i915_gem_object_get_dma_address(obj, n);
-	offset -= obj->mm.region->region.start;
-
-	return io_mapping_map_wc(&obj->mm.region->iomap, offset, PAGE_SIZE);
-}
-
-void __iomem *
-i915_gem_object_lmem_io_map_page_atomic(struct drm_i915_gem_object *obj,
-					unsigned long n)
-{
-	resource_size_t offset;
-
-	offset = i915_gem_object_get_dma_address(obj, n);
-	offset -= obj->mm.region->region.start;
-
-	return io_mapping_map_atomic_wc(&obj->mm.region->iomap, offset);
-}
-
-void __iomem *
-i915_gem_object_lmem_io_map(struct drm_i915_gem_object *obj,
-			    unsigned long n,
-			    unsigned long size)
-{
-	resource_size_t offset;
-
-	GEM_BUG_ON(!i915_gem_object_is_contiguous(obj));
-
-	offset = i915_gem_object_get_dma_address(obj, n);
-	offset -= obj->mm.region->region.start;
-
-	return io_mapping_map_wc(&obj->mm.region->iomap, offset, size);
-}
-
 bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj)
 {
 	return obj->ops == &i915_gem_lmem_obj_ops;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h
index 7c176b8b7d2f..fc3f15580fe3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h
@@ -14,14 +14,6 @@ struct intel_memory_region;
 
 extern const struct drm_i915_gem_object_ops i915_gem_lmem_obj_ops;
 
-void __iomem *i915_gem_object_lmem_io_map(struct drm_i915_gem_object *obj,
-					  unsigned long n, unsigned long size);
-void __iomem *i915_gem_object_lmem_io_map_page(struct drm_i915_gem_object *obj,
-					       unsigned long n);
-void __iomem *
-i915_gem_object_lmem_io_map_page_atomic(struct drm_i915_gem_object *obj,
-					unsigned long n);
-
 bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj);
 
 struct drm_i915_gem_object *
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index 879fff8adc48..b9fdac2f9003 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -4,6 +4,7 @@
  * Copyright © 2014-2016 Intel Corporation
  */
 
+#include <linux/anon_inodes.h>
 #include <linux/mman.h>
 #include <linux/pfn_t.h>
 #include <linux/sizes.h>
@@ -212,6 +213,7 @@ static vm_fault_t i915_error_to_vmf_fault(int err)
 	case -EIO: /* shmemfs failure from swap device */
 	case -EFAULT: /* purged object */
 	case -ENODEV: /* bad object, how did you get here! */
+	case -ENXIO: /* unable to access backing store (on device) */
 		return VM_FAULT_SIGBUS;
 
 	case -ENOSPC: /* shmemfs allocation failure */
@@ -236,42 +238,38 @@ static vm_fault_t vm_fault_cpu(struct vm_fault *vmf)
 	struct vm_area_struct *area = vmf->vma;
 	struct i915_mmap_offset *mmo = area->vm_private_data;
 	struct drm_i915_gem_object *obj = mmo->obj;
-	unsigned long i, size = area->vm_end - area->vm_start;
-	bool write = area->vm_flags & VM_WRITE;
-	vm_fault_t ret = VM_FAULT_SIGBUS;
+	resource_size_t iomap;
 	int err;
 
-	if (!i915_gem_object_has_struct_page(obj))
-		return ret;
-
 	/* Sanity check that we allow writing into this object */
-	if (i915_gem_object_is_readonly(obj) && write)
-		return ret;
+	if (unlikely(i915_gem_object_is_readonly(obj) &&
+		     area->vm_flags & VM_WRITE))
+		return VM_FAULT_SIGBUS;
 
 	err = i915_gem_object_pin_pages(obj);
 	if (err)
-		return i915_error_to_vmf_fault(err);
+		goto out;
 
-	/* PTEs are revoked in obj->ops->put_pages() */
-	for (i = 0; i < size >> PAGE_SHIFT; i++) {
-		struct page *page = i915_gem_object_get_page(obj, i);
-
-		ret = vmf_insert_pfn(area,
-				     (unsigned long)area->vm_start + i * PAGE_SIZE,
-				     page_to_pfn(page));
-		if (ret != VM_FAULT_NOPAGE)
-			break;
+	iomap = -1;
+	if (!i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_STRUCT_PAGE)) {
+		iomap = obj->mm.region->iomap.base;
+		iomap -= obj->mm.region->region.start;
 	}
 
-	if (write) {
+	/* PTEs are revoked in obj->ops->put_pages() */
+	err = remap_io_sg(area,
+			  area->vm_start, area->vm_end - area->vm_start,
+			  obj->mm.pages->sgl, iomap);
+
+	if (area->vm_flags & VM_WRITE) {
 		GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
-		obj->cache_dirty = true; /* XXX flush after PAT update? */
 		obj->mm.dirty = true;
 	}
 
 	i915_gem_object_unpin_pages(obj);
 
-	return ret;
+out:
+	return i915_error_to_vmf_fault(err);
 }
 
 static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
@@ -560,7 +558,9 @@ __assign_mmap_offset(struct drm_file *file,
 	}
 
 	if (mmap_type != I915_MMAP_TYPE_GTT &&
-	    !i915_gem_object_has_struct_page(obj)) {
+	    !i915_gem_object_type_has(obj,
+				      I915_GEM_OBJECT_HAS_STRUCT_PAGE |
+				      I915_GEM_OBJECT_HAS_IOMEM)) {
 		err = -ENODEV;
 		goto out;
 	}
@@ -694,6 +694,46 @@ static const struct vm_operations_struct vm_ops_cpu = {
 	.close = vm_close,
 };
 
+static int singleton_release(struct inode *inode, struct file *file)
+{
+	struct drm_i915_private *i915 = file->private_data;
+
+	cmpxchg(&i915->gem.mmap_singleton, file, NULL);
+	drm_dev_put(&i915->drm);
+
+	return 0;
+}
+
+static const struct file_operations singleton_fops = {
+	.owner = THIS_MODULE,
+	.release = singleton_release,
+};
+
+static struct file *mmap_singleton(struct drm_i915_private *i915)
+{
+	struct file *file;
+
+	rcu_read_lock();
+	file = i915->gem.mmap_singleton;
+	if (file && !get_file_rcu(file))
+		file = NULL;
+	rcu_read_unlock();
+	if (file)
+		return file;
+
+	file = anon_inode_getfile("i915.gem", &singleton_fops, i915, O_RDWR);
+	if (IS_ERR(file))
+		return file;
+
+	/* Everyone shares a single global address space */
+	file->f_mapping = i915->drm.anon_inode->i_mapping;
+
+	smp_store_mb(i915->gem.mmap_singleton, file);
+	drm_dev_get(&i915->drm);
+
+	return file;
+}
+
 /*
  * This overcomes the limitation in drm_gem_mmap's assignment of a
  * drm_gem_object as the vma->vm_private_data. Since we need to
@@ -707,6 +747,7 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma)
 	struct drm_device *dev = priv->minor->dev;
 	struct i915_mmap_offset *mmo = NULL;
 	struct drm_gem_object *obj = NULL;
+	struct file *anon;
 
 	if (drm_dev_is_unplugged(dev))
 		return -ENODEV;
@@ -755,9 +796,26 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma)
 		vma->vm_flags &= ~VM_MAYWRITE;
 	}
 
+	anon = mmap_singleton(to_i915(obj->dev));
+	if (IS_ERR(anon)) {
+		drm_gem_object_put_unlocked(obj);
+		return PTR_ERR(anon);
+	}
+
 	vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_private_data = mmo;
 
+	/*
+	 * We keep the ref on mmo->obj, not vm_file, but we require
+	 * vma->vm_file->f_mapping, see vma_link(), for later revocation.
+	 * Our userspace is accustomed to having per-file resource cleanup
+	 * (i.e. contexts, objects and requests) on their close(fd), which
+	 * requires avoiding extraneous references to their filp, hence why
+	 * we prefer to use an anonymous file for their mmaps.
+	 */
+	fput(vma->vm_file);
+	vma->vm_file = anon;
+
 	switch (mmo->mmap_type) {
 	case I915_MMAP_TYPE_WC:
 		vma->vm_page_prot =
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 858f8bf49a04..db70a3306e59 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -16,6 +16,7 @@
 #include "display/intel_frontbuffer.h"
 #include "i915_gem_object_types.h"
 #include "i915_gem_gtt.h"
+#include "i915_vma_types.h"
 
 void i915_gem_init__objects(struct drm_i915_private *i915);
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 75197ca696a8..54aca5c9101e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -158,9 +158,7 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
 
 static void unmap_object(struct drm_i915_gem_object *obj, void *ptr)
 {
-	if (i915_gem_object_is_lmem(obj))
-		io_mapping_unmap((void __force __iomem *)ptr);
-	else if (is_vmalloc_addr(ptr))
+	if (is_vmalloc_addr(ptr))
 		vunmap(ptr);
 	else
 		kunmap(kmap_to_page(ptr));
@@ -236,46 +234,44 @@ unlock:
 	return err;
 }
 
+static inline pte_t iomap_pte(resource_size_t base,
+			      dma_addr_t offset,
+			      pgprot_t prot)
+{
+	return pte_mkspecial(pfn_pte((base + offset) >> PAGE_SHIFT, prot));
+}
+
 /* The 'mapping' part of i915_gem_object_pin_map() below */
 static void *i915_gem_object_map(struct drm_i915_gem_object *obj,
 				 enum i915_map_type type)
 {
-	unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
+	unsigned long n_pte = obj->base.size >> PAGE_SHIFT;
 	struct sg_table *sgt = obj->mm.pages;
-	struct sgt_iter sgt_iter;
-	struct page *page;
-	struct page *stack_pages[32];
-	struct page **pages = stack_pages;
-	unsigned long i = 0;
+	pte_t *stack[32], **mem;
+	struct vm_struct *area;
 	pgprot_t pgprot;
-	void *addr;
 
-	if (i915_gem_object_is_lmem(obj)) {
-		void __iomem *io;
-
-		if (type != I915_MAP_WC)
-			return NULL;
-
-		io = i915_gem_object_lmem_io_map(obj, 0, obj->base.size);
-		return (void __force *)io;
-	}
+	if (!i915_gem_object_has_struct_page(obj) && type != I915_MAP_WC)
+		return NULL;
 
 	/* A single page can always be kmapped */
-	if (n_pages == 1 && type == I915_MAP_WB)
+	if (n_pte == 1 && type == I915_MAP_WB)
 		return kmap(sg_page(sgt->sgl));
 
-	if (n_pages > ARRAY_SIZE(stack_pages)) {
+	mem = stack;
+	if (n_pte > ARRAY_SIZE(stack)) {
 		/* Too big for stack -- allocate temporary array instead */
-		pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL);
-		if (!pages)
+		mem = kvmalloc_array(n_pte, sizeof(*mem), GFP_KERNEL);
+		if (!mem)
 			return NULL;
 	}
 
-	for_each_sgt_page(page, sgt_iter, sgt)
-		pages[i++] = page;
-
-	/* Check that we have the expected number of pages */
-	GEM_BUG_ON(i != n_pages);
+	area = alloc_vm_area(obj->base.size, mem);
+	if (!area) {
+		if (mem != stack)
+			kvfree(mem);
+		return NULL;
+	}
 
 	switch (type) {
 	default:
@@ -288,12 +284,31 @@ static void *i915_gem_object_map(struct drm_i915_gem_object *obj,
 		pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
 		break;
 	}
-	addr = vmap(pages, n_pages, 0, pgprot);
 
-	if (pages != stack_pages)
-		kvfree(pages);
+	if (i915_gem_object_has_struct_page(obj)) {
+		struct sgt_iter iter;
+		struct page *page;
+		pte_t **ptes = mem;
+
+		for_each_sgt_page(page, iter, sgt)
+			**ptes++ = mk_pte(page, pgprot);
+	} else {
+		resource_size_t iomap;
+		struct sgt_iter iter;
+		pte_t **ptes = mem;
+		dma_addr_t addr;
+
+		iomap = obj->mm.region->iomap.base;
+		iomap -= obj->mm.region->region.start;
+
+		for_each_sgt_daddr(addr, iter, sgt)
+			**ptes++ = iomap_pte(iomap, addr, pgprot);
+	}
+
+	if (mem != stack)
+		kvfree(mem);
 
-	return addr;
+	return area->addr;
 }
 
 /* get, pin, and map the pages of the object into kernel space */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c
index d50adac12249..1515384d7e0e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_region.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c
@@ -107,7 +107,10 @@ void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj,
 {
 	INIT_LIST_HEAD(&obj->mm.blocks);
 	obj->mm.region = intel_memory_region_get(mem);
+
 	obj->flags |= flags;
+	if (obj->base.size <= mem->min_page_size)
+		obj->flags |= I915_BO_ALLOC_CONTIGUOUS;
 
 	mutex_lock(&mem->objects.lock);
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 4d69c3fc3439..a2a980d9d241 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -594,6 +594,8 @@ static int init_shmem(struct intel_memory_region *mem)
 			 err);
 	}
 
+	intel_memory_region_set_name(mem, "system");
+
 	return 0; /* Don't error, we can simply fallback to the kernel mnt */
 }
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index afb08a1704a2..451f3078d60d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -645,6 +645,8 @@ i915_gem_object_create_stolen(struct drm_i915_private *i915,
 
 static int init_stolen(struct intel_memory_region *mem)
 {
+	intel_memory_region_set_name(mem, "stolen");
+
 	/*
 	 * Initialise stolen early so that we may reserve preallocated
 	 * objects for the BIOS to KMS transition.
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h
index 549c1394bcdc..b8cf31b7bf14 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h
@@ -7,6 +7,12 @@
 #ifndef __HUGE_GEM_OBJECT_H
 #define __HUGE_GEM_OBJECT_H
 
+#include <linux/types.h>
+
+#include "gem/i915_gem_object_types.h"
+
+struct drm_i915_private;
+
 struct drm_i915_gem_object *
 huge_gem_object(struct drm_i915_private *i915,
 		phys_addr_t phys_size,
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index 2479395c1873..9311250d7d6f 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -1017,38 +1017,33 @@ __cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val)
 	return err;
 }
 
-static int __cpu_check_lmem(struct drm_i915_gem_object *obj, u32 dword, u32 val)
+static int __cpu_check_vmap(struct drm_i915_gem_object *obj, u32 dword, u32 val)
 {
-	unsigned long n;
+	unsigned long n = obj->base.size >> PAGE_SHIFT;
+	u32 *ptr;
 	int err;
 
-	i915_gem_object_lock(obj);
-	err = i915_gem_object_set_to_wc_domain(obj, false);
-	i915_gem_object_unlock(obj);
-	if (err)
-		return err;
-
-	err = i915_gem_object_pin_pages(obj);
+	err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
 	if (err)
 		return err;
 
-	for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) {
-		u32 __iomem *base;
-		u32 read_val;
-
-		base = i915_gem_object_lmem_io_map_page_atomic(obj, n);
+	ptr = i915_gem_object_pin_map(obj, I915_MAP_WC);
+	if (IS_ERR(ptr))
+		return PTR_ERR(ptr);
 
-		read_val = ioread32(base + dword);
-		io_mapping_unmap_atomic(base);
-		if (read_val != val) {
-			pr_err("n=%lu base[%u]=%u, val=%u\n",
-			       n, dword, read_val, val);
+	ptr += dword;
+	while (n--) {
+		if (*ptr != val) {
+			pr_err("base[%u]=%08x, val=%08x\n",
+			       dword, *ptr, val);
 			err = -EINVAL;
 			break;
 		}
+
+		ptr += PAGE_SIZE / sizeof(*ptr);
 	}
 
-	i915_gem_object_unpin_pages(obj);
+	i915_gem_object_unpin_map(obj);
 	return err;
 }
 
@@ -1056,10 +1051,8 @@ static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val)
 {
 	if (i915_gem_object_has_struct_page(obj))
 		return __cpu_check_shmem(obj, dword, val);
-	else if (i915_gem_object_is_lmem(obj))
-		return __cpu_check_lmem(obj, dword, val);
-
-	return -ENODEV;
+	else
+		return __cpu_check_vmap(obj, dword, val);
 }
 
 static int __igt_write_huge(struct intel_context *ce,
@@ -1872,7 +1865,7 @@ int i915_gem_huge_page_mock_selftests(void)
 	mkwrite_device_info(dev_priv)->ppgtt_type = INTEL_PPGTT_FULL;
 	mkwrite_device_info(dev_priv)->ppgtt_size = 48;
 
-	ppgtt = i915_ppgtt_create(dev_priv);
+	ppgtt = i915_ppgtt_create(&dev_priv->gt);
 	if (IS_ERR(ppgtt)) {
 		err = PTR_ERR(ppgtt);
 		goto out_unlock;
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
index 49edc51111d5..3f6079e1dfb6 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
@@ -325,7 +325,10 @@ static int igt_gem_coherency(void *arg)
 	values = offsets + ncachelines;
 
 	ctx.engine = random_engine(i915, &prng);
-	GEM_BUG_ON(!ctx.engine);
+	if (!ctx.engine) {
+		err = -ENODEV;
+		goto out_free;
+	}
 	pr_info("%s: using %s\n", __func__, ctx.engine->name);
 	intel_engine_pm_get(ctx.engine);
 
@@ -354,7 +357,7 @@ static int igt_gem_coherency(void *arg)
 					ctx.obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
 					if (IS_ERR(ctx.obj)) {
 						err = PTR_ERR(ctx.obj);
-						goto free;
+						goto out_pm;
 					}
 
 					i915_random_reorder(offsets, ncachelines, &prng);
@@ -405,14 +408,15 @@ static int igt_gem_coherency(void *arg)
 			}
 		}
 	}
-free:
+out_pm:
 	intel_engine_pm_put(ctx.engine);
+out_free:
 	kfree(offsets);
 	return err;
 
 put_object:
 	i915_gem_object_put(ctx.obj);
-	goto free;
+	goto out_pm;
 }
 
 int i915_gem_coherency_live_selftests(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index cbf796da64e3..ef7c74cff28a 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -9,6 +9,7 @@
 #include "gt/intel_engine_pm.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
+#include "gem/i915_gem_region.h"
 #include "huge_gem_object.h"
 #include "i915_selftest.h"
 #include "selftests/i915_random.h"
@@ -725,114 +726,359 @@ err_obj:
 	goto out;
 }
 
-#define expand32(x) (((x) << 0) | ((x) << 8) | ((x) << 16) | ((x) << 24))
-static int igt_mmap(void *arg, enum i915_mmap_type type)
+static int gtt_set(struct drm_i915_gem_object *obj)
 {
-	struct drm_i915_private *i915 = arg;
-	struct drm_i915_gem_object *obj;
-	struct i915_mmap_offset *mmo;
-	struct vm_area_struct *area;
-	unsigned long addr;
-	void *vaddr;
-	int err = 0, i;
+	struct i915_vma *vma;
+	void __iomem *map;
+	int err = 0;
 
-	if (!i915_ggtt_has_aperture(&i915->ggtt))
-		return 0;
+	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
 
-	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
-	if (IS_ERR(obj))
-		return PTR_ERR(obj);
+	intel_gt_pm_get(vma->vm->gt);
+	map = i915_vma_pin_iomap(vma);
+	i915_vma_unpin(vma);
+	if (IS_ERR(map)) {
+		err = PTR_ERR(map);
+		goto out;
+	}
+
+	memset_io(map, POISON_INUSE, obj->base.size);
+	i915_vma_unpin_iomap(vma);
+
+out:
+	intel_gt_pm_put(vma->vm->gt);
+	return err;
+}
+
+static int gtt_check(struct drm_i915_gem_object *obj)
+{
+	struct i915_vma *vma;
+	void __iomem *map;
+	int err = 0;
 
-	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
-	if (IS_ERR(vaddr)) {
-		err = PTR_ERR(vaddr);
+	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	intel_gt_pm_get(vma->vm->gt);
+	map = i915_vma_pin_iomap(vma);
+	i915_vma_unpin(vma);
+	if (IS_ERR(map)) {
+		err = PTR_ERR(map);
 		goto out;
 	}
-	memset(vaddr, POISON_INUSE, PAGE_SIZE);
+
+	if (memchr_inv((void __force *)map, POISON_FREE, obj->base.size)) {
+		pr_err("%s: Write via mmap did not land in backing store (GTT)\n",
+		       obj->mm.region->name);
+		err = -EINVAL;
+	}
+	i915_vma_unpin_iomap(vma);
+
+out:
+	intel_gt_pm_put(vma->vm->gt);
+	return err;
+}
+
+static int wc_set(struct drm_i915_gem_object *obj)
+{
+	void *vaddr;
+
+	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
+	if (IS_ERR(vaddr))
+		return PTR_ERR(vaddr);
+
+	memset(vaddr, POISON_INUSE, obj->base.size);
 	i915_gem_object_flush_map(obj);
 	i915_gem_object_unpin_map(obj);
 
-	mmo = mmap_offset_attach(obj, type, NULL);
-	if (IS_ERR(mmo)) {
-		err = PTR_ERR(mmo);
-		goto out;
+	return 0;
+}
+
+static int wc_check(struct drm_i915_gem_object *obj)
+{
+	void *vaddr;
+	int err = 0;
+
+	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
+	if (IS_ERR(vaddr))
+		return PTR_ERR(vaddr);
+
+	if (memchr_inv(vaddr, POISON_FREE, obj->base.size)) {
+		pr_err("%s: Write via mmap did not land in backing store (WC)\n",
+		       obj->mm.region->name);
+		err = -EINVAL;
 	}
+	i915_gem_object_unpin_map(obj);
+
+	return err;
+}
+
+static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type)
+{
+	if (type == I915_MMAP_TYPE_GTT &&
+	    !i915_ggtt_has_aperture(&to_i915(obj->base.dev)->ggtt))
+		return false;
+
+	if (type != I915_MMAP_TYPE_GTT &&
+	    !i915_gem_object_type_has(obj,
+				      I915_GEM_OBJECT_HAS_STRUCT_PAGE |
+				      I915_GEM_OBJECT_HAS_IOMEM))
+		return false;
+
+	return true;
+}
+
+#define expand32(x) (((x) << 0) | ((x) << 8) | ((x) << 16) | ((x) << 24))
+static int __igt_mmap(struct drm_i915_private *i915,
+		      struct drm_i915_gem_object *obj,
+		      enum i915_mmap_type type)
+{
+	struct i915_mmap_offset *mmo;
+	struct vm_area_struct *area;
+	unsigned long addr;
+	int err, i;
+
+	if (!can_mmap(obj, type))
+		return 0;
+
+	err = wc_set(obj);
+	if (err == -ENXIO)
+		err = gtt_set(obj);
+	if (err)
+		return err;
+
+	mmo = mmap_offset_attach(obj, type, NULL);
+	if (IS_ERR(mmo))
+		return PTR_ERR(mmo);
 
 	addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED);
-	if (IS_ERR_VALUE(addr)) {
-		err = addr;
-		goto out;
-	}
+	if (IS_ERR_VALUE(addr))
+		return addr;
 
-	pr_debug("igt_mmap() @ %lx\n", addr);
+	pr_debug("igt_mmap(%s, %d) @ %lx\n", obj->mm.region->name, type, addr);
 
 	area = find_vma(current->mm, addr);
 	if (!area) {
-		pr_err("Did not create a vm_area_struct for the mmap\n");
+		pr_err("%s: Did not create a vm_area_struct for the mmap\n",
+		       obj->mm.region->name);
 		err = -EINVAL;
 		goto out_unmap;
 	}
 
 	if (area->vm_private_data != mmo) {
-		pr_err("vm_area_struct did not point back to our mmap_offset object!\n");
+		pr_err("%s: vm_area_struct did not point back to our mmap_offset object!\n",
+		       obj->mm.region->name);
 		err = -EINVAL;
 		goto out_unmap;
 	}
 
-	for (i = 0; i < PAGE_SIZE / sizeof(u32); i++) {
+	for (i = 0; i < obj->base.size / sizeof(u32); i++) {
 		u32 __user *ux = u64_to_user_ptr((u64)(addr + i * sizeof(*ux)));
 		u32 x;
 
 		if (get_user(x, ux)) {
-			pr_err("Unable to read from mmap, offset:%zd\n",
-			       i * sizeof(x));
+			pr_err("%s: Unable to read from mmap, offset:%zd\n",
+			       obj->mm.region->name, i * sizeof(x));
 			err = -EFAULT;
-			break;
+			goto out_unmap;
 		}
 
 		if (x != expand32(POISON_INUSE)) {
-			pr_err("Read incorrect value from mmap, offset:%zd, found:%x, expected:%x\n",
+			pr_err("%s: Read incorrect value from mmap, offset:%zd, found:%x, expected:%x\n",
+			       obj->mm.region->name,
 			       i * sizeof(x), x, expand32(POISON_INUSE));
 			err = -EINVAL;
-			break;
+			goto out_unmap;
 		}
 
 		x = expand32(POISON_FREE);
 		if (put_user(x, ux)) {
-			pr_err("Unable to write to mmap, offset:%zd\n",
-			       i * sizeof(x));
+			pr_err("%s: Unable to write to mmap, offset:%zd\n",
+			       obj->mm.region->name, i * sizeof(x));
 			err = -EFAULT;
-			break;
+			goto out_unmap;
 		}
 	}
 
+	if (type == I915_MMAP_TYPE_GTT)
+		intel_gt_flush_ggtt_writes(&i915->gt);
+
+	err = wc_check(obj);
+	if (err == -ENXIO)
+		err = gtt_check(obj);
 out_unmap:
-	vm_munmap(addr, PAGE_SIZE);
+	vm_munmap(addr, obj->base.size);
+	return err;
+}
 
-	vaddr = i915_gem_object_pin_map(obj, I915_MAP_FORCE_WC);
-	if (IS_ERR(vaddr)) {
-		err = PTR_ERR(vaddr);
-		goto out;
-	}
-	if (err == 0 && memchr_inv(vaddr, POISON_FREE, PAGE_SIZE)) {
-		pr_err("Write via mmap did not land in backing store\n");
-		err = -EINVAL;
+static int igt_mmap(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_memory_region *mr;
+	enum intel_region_id id;
+
+	for_each_memory_region(mr, i915, id) {
+		unsigned long sizes[] = {
+			PAGE_SIZE,
+			mr->min_page_size,
+			SZ_4M,
+		};
+		int i;
+
+		for (i = 0; i < ARRAY_SIZE(sizes); i++) {
+			struct drm_i915_gem_object *obj;
+			int err;
+
+			obj = i915_gem_object_create_region(mr, sizes[i], 0);
+			if (obj == ERR_PTR(-ENODEV))
+				continue;
+
+			if (IS_ERR(obj))
+				return PTR_ERR(obj);
+
+			err = __igt_mmap(i915, obj, I915_MMAP_TYPE_GTT);
+			if (err == 0)
+				err = __igt_mmap(i915, obj, I915_MMAP_TYPE_WC);
+
+			i915_gem_object_put(obj);
+			if (err)
+				return err;
+		}
 	}
-	i915_gem_object_unpin_map(obj);
 
-out:
-	i915_gem_object_put(obj);
-	return err;
+	return 0;
 }
 
-static int igt_mmap_gtt(void *arg)
+static int __igt_mmap_gpu(struct drm_i915_private *i915,
+			  struct drm_i915_gem_object *obj,
+			  enum i915_mmap_type type)
 {
-	return igt_mmap(arg, I915_MMAP_TYPE_GTT);
+	struct intel_engine_cs *engine;
+	struct i915_mmap_offset *mmo;
+	unsigned long addr;
+	u32 __user *ux;
+	u32 bbe;
+	int err;
+
+	/*
+	 * Verify that the mmap access into the backing store aligns with
+	 * that of the GPU, i.e. that mmap is indeed writing into the same
+	 * page as being read by the GPU.
+	 */
+
+	if (!can_mmap(obj, type))
+		return 0;
+
+	err = wc_set(obj);
+	if (err == -ENXIO)
+		err = gtt_set(obj);
+	if (err)
+		return err;
+
+	mmo = mmap_offset_attach(obj, type, NULL);
+	if (IS_ERR(mmo))
+		return PTR_ERR(mmo);
+
+	addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED);
+	if (IS_ERR_VALUE(addr))
+		return addr;
+
+	ux = u64_to_user_ptr((u64)addr);
+	bbe = MI_BATCH_BUFFER_END;
+	if (put_user(bbe, ux)) {
+		pr_err("%s: Unable to write to mmap\n", obj->mm.region->name);
+		err = -EFAULT;
+		goto out_unmap;
+	}
+
+	if (type == I915_MMAP_TYPE_GTT)
+		intel_gt_flush_ggtt_writes(&i915->gt);
+
+	for_each_uabi_engine(engine, i915) {
+		struct i915_request *rq;
+		struct i915_vma *vma;
+
+		vma = i915_vma_instance(obj, engine->kernel_context->vm, NULL);
+		if (IS_ERR(vma)) {
+			err = PTR_ERR(vma);
+			goto out_unmap;
+		}
+
+		err = i915_vma_pin(vma, 0, 0, PIN_USER);
+		if (err)
+			goto out_unmap;
+
+		rq = i915_request_create(engine->kernel_context);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto out_unpin;
+		}
+
+		i915_vma_lock(vma);
+		err = i915_request_await_object(rq, vma->obj, false);
+		if (err == 0)
+			err = i915_vma_move_to_active(vma, rq, 0);
+		i915_vma_unlock(vma);
+
+		err = engine->emit_bb_start(rq, vma->node.start, 0, 0);
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+			struct drm_printer p =
+				drm_info_printer(engine->i915->drm.dev);
+
+			pr_err("%s(%s, %s): Failed to execute batch\n",
+			       __func__, engine->name, obj->mm.region->name);
+			intel_engine_dump(engine, &p,
+					  "%s\n", engine->name);
+
+			intel_gt_set_wedged(engine->gt);
+			err = -EIO;
+		}
+		i915_request_put(rq);
+
+out_unpin:
+		i915_vma_unpin(vma);
+		if (err)
+			goto out_unmap;
+	}
+
+out_unmap:
+	vm_munmap(addr, obj->base.size);
+	return err;
 }
 
-static int igt_mmap_cpu(void *arg)
+static int igt_mmap_gpu(void *arg)
 {
-	return igt_mmap(arg, I915_MMAP_TYPE_WC);
+	struct drm_i915_private *i915 = arg;
+	struct intel_memory_region *mr;
+	enum intel_region_id id;
+
+	for_each_memory_region(mr, i915, id) {
+		struct drm_i915_gem_object *obj;
+		int err;
+
+		obj = i915_gem_object_create_region(mr, PAGE_SIZE, 0);
+		if (obj == ERR_PTR(-ENODEV))
+			continue;
+
+		if (IS_ERR(obj))
+			return PTR_ERR(obj);
+
+		err = __igt_mmap_gpu(i915, obj, I915_MMAP_TYPE_GTT);
+		if (err == 0)
+			err = __igt_mmap_gpu(i915, obj, I915_MMAP_TYPE_WC);
+
+		i915_gem_object_put(obj);
+		if (err)
+			return err;
+	}
+
+	return 0;
 }
 
 static int check_present_pte(pte_t *pte, unsigned long addr, void *data)
@@ -887,32 +1133,24 @@ static int prefault_range(u64 start, u64 len)
 	return __get_user(c, end - 1);
 }
 
-static int igt_mmap_revoke(void *arg, enum i915_mmap_type type)
+static int __igt_mmap_revoke(struct drm_i915_private *i915,
+			     struct drm_i915_gem_object *obj,
+			     enum i915_mmap_type type)
 {
-	struct drm_i915_private *i915 = arg;
-	struct drm_i915_gem_object *obj;
 	struct i915_mmap_offset *mmo;
 	unsigned long addr;
 	int err;
 
-	if (!i915_ggtt_has_aperture(&i915->ggtt))
+	if (!can_mmap(obj, type))
 		return 0;
 
-	obj = i915_gem_object_create_internal(i915, SZ_4M);
-	if (IS_ERR(obj))
-		return PTR_ERR(obj);
-
 	mmo = mmap_offset_attach(obj, type, NULL);
-	if (IS_ERR(mmo)) {
-		err = PTR_ERR(mmo);
-		goto out;
-	}
+	if (IS_ERR(mmo))
+		return PTR_ERR(mmo);
 
 	addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED);
-	if (IS_ERR_VALUE(addr)) {
-		err = addr;
-		goto out;
-	}
+	if (IS_ERR_VALUE(addr))
+		return addr;
 
 	err = prefault_range(addr, obj->base.size);
 	if (err)
@@ -922,8 +1160,10 @@ static int igt_mmap_revoke(void *arg, enum i915_mmap_type type)
 		   !atomic_read(&obj->bind_count));
 
 	err = check_present(addr, obj->base.size);
-	if (err)
+	if (err) {
+		pr_err("%s: was not present\n", obj->mm.region->name);
 		goto out_unmap;
+	}
 
 	/*
 	 * After unbinding the object from the GGTT, its address may be reused
@@ -947,24 +1187,43 @@ static int igt_mmap_revoke(void *arg, enum i915_mmap_type type)
 	}
 
 	err = check_absent(addr, obj->base.size);
-	if (err)
+	if (err) {
+		pr_err("%s: was not absent\n", obj->mm.region->name);
 		goto out_unmap;
+	}
 
 out_unmap:
 	vm_munmap(addr, obj->base.size);
-out:
-	i915_gem_object_put(obj);
 	return err;
 }
 
-static int igt_mmap_gtt_revoke(void *arg)
+static int igt_mmap_revoke(void *arg)
 {
-	return igt_mmap_revoke(arg, I915_MMAP_TYPE_GTT);
-}
+	struct drm_i915_private *i915 = arg;
+	struct intel_memory_region *mr;
+	enum intel_region_id id;
 
-static int igt_mmap_cpu_revoke(void *arg)
-{
-	return igt_mmap_revoke(arg, I915_MMAP_TYPE_WC);
+	for_each_memory_region(mr, i915, id) {
+		struct drm_i915_gem_object *obj;
+		int err;
+
+		obj = i915_gem_object_create_region(mr, PAGE_SIZE, 0);
+		if (obj == ERR_PTR(-ENODEV))
+			continue;
+
+		if (IS_ERR(obj))
+			return PTR_ERR(obj);
+
+		err = __igt_mmap_revoke(i915, obj, I915_MMAP_TYPE_GTT);
+		if (err == 0)
+			err = __igt_mmap_revoke(i915, obj, I915_MMAP_TYPE_WC);
+
+		i915_gem_object_put(obj);
+		if (err)
+			return err;
+	}
+
+	return 0;
 }
 
 int i915_gem_mman_live_selftests(struct drm_i915_private *i915)
@@ -973,10 +1232,9 @@ int i915_gem_mman_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(igt_partial_tiling),
 		SUBTEST(igt_smoke_tiling),
 		SUBTEST(igt_mmap_offset_exhaustion),
-		SUBTEST(igt_mmap_gtt),
-		SUBTEST(igt_mmap_cpu),
-		SUBTEST(igt_mmap_gtt_revoke),
-		SUBTEST(igt_mmap_cpu_revoke),
+		SUBTEST(igt_mmap),
+		SUBTEST(igt_mmap_revoke),
+		SUBTEST(igt_mmap_gpu),
 	};
 
 	return i915_subtests(tests, i915);
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
index 7d7e13dc2fdf..384143aa7776 100644
--- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
@@ -77,12 +77,13 @@ live_context(struct drm_i915_private *i915, struct file *file)
 {
 	struct i915_gem_context *ctx;
 	int err;
+	u32 id;
 
 	ctx = i915_gem_create_context(i915, 0);
 	if (IS_ERR(ctx))
 		return ctx;
 
-	err = gem_context_register(ctx, to_drm_file(file)->driver_priv);
+	err = gem_context_register(ctx, to_drm_file(file)->driver_priv, &id);
 	if (err < 0)
 		goto err_ctx;
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h b/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h
index 370360b4a148..688511afa883 100644
--- a/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h
@@ -7,6 +7,8 @@
 #ifndef __MOCK_GEM_OBJECT_H__
 #define __MOCK_GEM_OBJECT_H__
 
+#include "gem/i915_gem_object_types.h"
+
 struct mock_object {
 	struct drm_i915_gem_object base;
 };
diff --git a/drivers/gpu/drm/i915/gt/Makefile b/drivers/gpu/drm/i915/gt/Makefile
deleted file mode 100644
index 7e73aa587967..000000000000
--- a/drivers/gpu/drm/i915/gt/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-# For building individual subdir files on the command line
-subdir-ccflags-y += -I$(srctree)/$(src)/..
-
-# Extra header tests
-header-test-pattern-$(CONFIG_DRM_I915_WERROR) := *.h
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
new file mode 100644
index 000000000000..f10b2c41571c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@@ -0,0 +1,482 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <linux/log2.h>
+
+#include "gen6_ppgtt.h"
+#include "i915_scatterlist.h"
+#include "i915_trace.h"
+#include "i915_vgpu.h"
+#include "intel_gt.h"
+
+/* Write pde (index) from the page directory @pd to the page table @pt */
+static inline void gen6_write_pde(const struct gen6_ppgtt *ppgtt,
+				  const unsigned int pde,
+				  const struct i915_page_table *pt)
+{
+	/* Caller needs to make sure the write completes if necessary */
+	iowrite32(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID,
+		  ppgtt->pd_addr + pde);
+}
+
+void gen7_ppgtt_enable(struct intel_gt *gt)
+{
+	struct drm_i915_private *i915 = gt->i915;
+	struct intel_uncore *uncore = gt->uncore;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	u32 ecochk;
+
+	intel_uncore_rmw(uncore, GAC_ECO_BITS, 0, ECOBITS_PPGTT_CACHE64B);
+
+	ecochk = intel_uncore_read(uncore, GAM_ECOCHK);
+	if (IS_HASWELL(i915)) {
+		ecochk |= ECOCHK_PPGTT_WB_HSW;
+	} else {
+		ecochk |= ECOCHK_PPGTT_LLC_IVB;
+		ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
+	}
+	intel_uncore_write(uncore, GAM_ECOCHK, ecochk);
+
+	for_each_engine(engine, gt, id) {
+		/* GFX_MODE is per-ring on gen7+ */
+		ENGINE_WRITE(engine,
+			     RING_MODE_GEN7,
+			     _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
+	}
+}
+
+void gen6_ppgtt_enable(struct intel_gt *gt)
+{
+	struct intel_uncore *uncore = gt->uncore;
+
+	intel_uncore_rmw(uncore,
+			 GAC_ECO_BITS,
+			 0,
+			 ECOBITS_SNB_BIT | ECOBITS_PPGTT_CACHE64B);
+
+	intel_uncore_rmw(uncore,
+			 GAB_CTL,
+			 0,
+			 GAB_CTL_CONT_AFTER_PAGEFAULT);
+
+	intel_uncore_rmw(uncore,
+			 GAM_ECOCHK,
+			 0,
+			 ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
+
+	if (HAS_PPGTT(uncore->i915)) /* may be disabled for VT-d */
+		intel_uncore_write(uncore,
+				   GFX_MODE,
+				   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
+}
+
+/* PPGTT support for Sandybdrige/Gen6 and later */
+static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
+				   u64 start, u64 length)
+{
+	struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
+	const unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
+	const gen6_pte_t scratch_pte = vm->scratch[0].encode;
+	unsigned int pde = first_entry / GEN6_PTES;
+	unsigned int pte = first_entry % GEN6_PTES;
+	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
+
+	while (num_entries) {
+		struct i915_page_table * const pt =
+			i915_pt_entry(ppgtt->base.pd, pde++);
+		const unsigned int count = min(num_entries, GEN6_PTES - pte);
+		gen6_pte_t *vaddr;
+
+		GEM_BUG_ON(px_base(pt) == px_base(&vm->scratch[1]));
+
+		num_entries -= count;
+
+		GEM_BUG_ON(count > atomic_read(&pt->used));
+		if (!atomic_sub_return(count, &pt->used))
+			ppgtt->scan_for_unused_pt = true;
+
+		/*
+		 * Note that the hw doesn't support removing PDE on the fly
+		 * (they are cached inside the context with no means to
+		 * invalidate the cache), so we can only reset the PTE
+		 * entries back to scratch.
+		 */
+
+		vaddr = kmap_atomic_px(pt);
+		memset32(vaddr + pte, scratch_pte, count);
+		kunmap_atomic(vaddr);
+
+		pte = 0;
+	}
+}
+
+static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
+				      struct i915_vma *vma,
+				      enum i915_cache_level cache_level,
+				      u32 flags)
+{
+	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+	struct i915_page_directory * const pd = ppgtt->pd;
+	unsigned int first_entry = vma->node.start / I915_GTT_PAGE_SIZE;
+	unsigned int act_pt = first_entry / GEN6_PTES;
+	unsigned int act_pte = first_entry % GEN6_PTES;
+	const u32 pte_encode = vm->pte_encode(0, cache_level, flags);
+	struct sgt_dma iter = sgt_dma(vma);
+	gen6_pte_t *vaddr;
+
+	GEM_BUG_ON(pd->entry[act_pt] == &vm->scratch[1]);
+
+	vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt));
+	do {
+		vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
+
+		iter.dma += I915_GTT_PAGE_SIZE;
+		if (iter.dma == iter.max) {
+			iter.sg = __sg_next(iter.sg);
+			if (!iter.sg)
+				break;
+
+			iter.dma = sg_dma_address(iter.sg);
+			iter.max = iter.dma + iter.sg->length;
+		}
+
+		if (++act_pte == GEN6_PTES) {
+			kunmap_atomic(vaddr);
+			vaddr = kmap_atomic_px(i915_pt_entry(pd, ++act_pt));
+			act_pte = 0;
+		}
+	} while (1);
+	kunmap_atomic(vaddr);
+
+	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
+}
+
+static void gen6_flush_pd(struct gen6_ppgtt *ppgtt, u64 start, u64 end)
+{
+	struct i915_page_directory * const pd = ppgtt->base.pd;
+	struct i915_page_table *pt;
+	unsigned int pde;
+
+	start = round_down(start, SZ_64K);
+	end = round_up(end, SZ_64K) - start;
+
+	mutex_lock(&ppgtt->flush);
+
+	gen6_for_each_pde(pt, pd, start, end, pde)
+		gen6_write_pde(ppgtt, pde, pt);
+
+	mb();
+	ioread32(ppgtt->pd_addr + pde - 1);
+	gen6_ggtt_invalidate(ppgtt->base.vm.gt->ggtt);
+	mb();
+
+	mutex_unlock(&ppgtt->flush);
+}
+
+static int gen6_alloc_va_range(struct i915_address_space *vm,
+			       u64 start, u64 length)
+{
+	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
+	struct i915_page_directory * const pd = ppgtt->base.pd;
+	struct i915_page_table *pt, *alloc = NULL;
+	intel_wakeref_t wakeref;
+	u64 from = start;
+	unsigned int pde;
+	int ret = 0;
+
+	wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm);
+
+	spin_lock(&pd->lock);
+	gen6_for_each_pde(pt, pd, start, length, pde) {
+		const unsigned int count = gen6_pte_count(start, length);
+
+		if (px_base(pt) == px_base(&vm->scratch[1])) {
+			spin_unlock(&pd->lock);
+
+			pt = fetch_and_zero(&alloc);
+			if (!pt)
+				pt = alloc_pt(vm);
+			if (IS_ERR(pt)) {
+				ret = PTR_ERR(pt);
+				goto unwind_out;
+			}
+
+			fill32_px(pt, vm->scratch[0].encode);
+
+			spin_lock(&pd->lock);
+			if (pd->entry[pde] == &vm->scratch[1]) {
+				pd->entry[pde] = pt;
+			} else {
+				alloc = pt;
+				pt = pd->entry[pde];
+			}
+		}
+
+		atomic_add(count, &pt->used);
+	}
+	spin_unlock(&pd->lock);
+
+	if (i915_vma_is_bound(ppgtt->vma, I915_VMA_GLOBAL_BIND))
+		gen6_flush_pd(ppgtt, from, start);
+
+	goto out;
+
+unwind_out:
+	gen6_ppgtt_clear_range(vm, from, start - from);
+out:
+	if (alloc)
+		free_px(vm, alloc);
+	intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref);
+	return ret;
+}
+
+static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
+{
+	struct i915_address_space * const vm = &ppgtt->base.vm;
+	struct i915_page_directory * const pd = ppgtt->base.pd;
+	int ret;
+
+	ret = setup_scratch_page(vm, __GFP_HIGHMEM);
+	if (ret)
+		return ret;
+
+	vm->scratch[0].encode =
+		vm->pte_encode(px_dma(&vm->scratch[0]),
+			       I915_CACHE_NONE, PTE_READ_ONLY);
+
+	if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[1])))) {
+		cleanup_scratch_page(vm);
+		return -ENOMEM;
+	}
+
+	fill32_px(&vm->scratch[1], vm->scratch[0].encode);
+	memset_p(pd->entry, &vm->scratch[1], I915_PDES);
+
+	return 0;
+}
+
+static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt)
+{
+	struct i915_page_directory * const pd = ppgtt->base.pd;
+	struct i915_page_dma * const scratch =
+		px_base(&ppgtt->base.vm.scratch[1]);
+	struct i915_page_table *pt;
+	u32 pde;
+
+	gen6_for_all_pdes(pt, pd, pde)
+		if (px_base(pt) != scratch)
+			free_px(&ppgtt->base.vm, pt);
+}
+
+static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
+{
+	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
+
+	__i915_vma_put(ppgtt->vma);
+
+	gen6_ppgtt_free_pd(ppgtt);
+	free_scratch(vm);
+
+	mutex_destroy(&ppgtt->flush);
+	mutex_destroy(&ppgtt->pin_mutex);
+	kfree(ppgtt->base.pd);
+}
+
+static int pd_vma_set_pages(struct i915_vma *vma)
+{
+	vma->pages = ERR_PTR(-ENODEV);
+	return 0;
+}
+
+static void pd_vma_clear_pages(struct i915_vma *vma)
+{
+	GEM_BUG_ON(!vma->pages);
+
+	vma->pages = NULL;
+}
+
+static int pd_vma_bind(struct i915_vma *vma,
+		       enum i915_cache_level cache_level,
+		       u32 unused)
+{
+	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm);
+	struct gen6_ppgtt *ppgtt = vma->private;
+	u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE;
+
+	px_base(ppgtt->base.pd)->ggtt_offset = ggtt_offset * sizeof(gen6_pte_t);
+	ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset;
+
+	gen6_flush_pd(ppgtt, 0, ppgtt->base.vm.total);
+	return 0;
+}
+
+static void pd_vma_unbind(struct i915_vma *vma)
+{
+	struct gen6_ppgtt *ppgtt = vma->private;
+	struct i915_page_directory * const pd = ppgtt->base.pd;
+	struct i915_page_dma * const scratch =
+		px_base(&ppgtt->base.vm.scratch[1]);
+	struct i915_page_table *pt;
+	unsigned int pde;
+
+	if (!ppgtt->scan_for_unused_pt)
+		return;
+
+	/* Free all no longer used page tables */
+	gen6_for_all_pdes(pt, ppgtt->base.pd, pde) {
+		if (px_base(pt) == scratch || atomic_read(&pt->used))
+			continue;
+
+		free_px(&ppgtt->base.vm, pt);
+		pd->entry[pde] = scratch;
+	}
+
+	ppgtt->scan_for_unused_pt = false;
+}
+
+static const struct i915_vma_ops pd_vma_ops = {
+	.set_pages = pd_vma_set_pages,
+	.clear_pages = pd_vma_clear_pages,
+	.bind_vma = pd_vma_bind,
+	.unbind_vma = pd_vma_unbind,
+};
+
+static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
+{
+	struct i915_ggtt *ggtt = ppgtt->base.vm.gt->ggtt;
+	struct i915_vma *vma;
+
+	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
+	GEM_BUG_ON(size > ggtt->vm.total);
+
+	vma = i915_vma_alloc();
+	if (!vma)
+		return ERR_PTR(-ENOMEM);
+
+	i915_active_init(&vma->active, NULL, NULL);
+
+	kref_init(&vma->ref);
+	mutex_init(&vma->pages_mutex);
+	vma->vm = i915_vm_get(&ggtt->vm);
+	vma->ops = &pd_vma_ops;
+	vma->private = ppgtt;
+
+	vma->size = size;
+	vma->fence_size = size;
+	atomic_set(&vma->flags, I915_VMA_GGTT);
+	vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */
+
+	INIT_LIST_HEAD(&vma->obj_link);
+	INIT_LIST_HEAD(&vma->closed_link);
+
+	return vma;
+}
+
+int gen6_ppgtt_pin(struct i915_ppgtt *base)
+{
+	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
+	int err;
+
+	GEM_BUG_ON(!atomic_read(&ppgtt->base.vm.open));
+
+	/*
+	 * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt
+	 * which will be pinned into every active context.
+	 * (When vma->pin_count becomes atomic, I expect we will naturally
+	 * need a larger, unpacked, type and kill this redundancy.)
+	 */
+	if (atomic_add_unless(&ppgtt->pin_count, 1, 0))
+		return 0;
+
+	if (mutex_lock_interruptible(&ppgtt->pin_mutex))
+		return -EINTR;
+
+	/*
+	 * PPGTT PDEs reside in the GGTT and consists of 512 entries. The
+	 * allocator works in address space sizes, so it's multiplied by page
+	 * size. We allocate at the top of the GTT to avoid fragmentation.
+	 */
+	err = 0;
+	if (!atomic_read(&ppgtt->pin_count))
+		err = i915_ggtt_pin(ppgtt->vma, GEN6_PD_ALIGN, PIN_HIGH);
+	if (!err)
+		atomic_inc(&ppgtt->pin_count);
+	mutex_unlock(&ppgtt->pin_mutex);
+
+	return err;
+}
+
+void gen6_ppgtt_unpin(struct i915_ppgtt *base)
+{
+	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
+
+	GEM_BUG_ON(!atomic_read(&ppgtt->pin_count));
+	if (atomic_dec_and_test(&ppgtt->pin_count))
+		i915_vma_unpin(ppgtt->vma);
+}
+
+void gen6_ppgtt_unpin_all(struct i915_ppgtt *base)
+{
+	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
+
+	if (!atomic_read(&ppgtt->pin_count))
+		return;
+
+	i915_vma_unpin(ppgtt->vma);
+	atomic_set(&ppgtt->pin_count, 0);
+}
+
+struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
+{
+	struct i915_ggtt * const ggtt = gt->ggtt;
+	struct gen6_ppgtt *ppgtt;
+	int err;
+
+	ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
+	if (!ppgtt)
+		return ERR_PTR(-ENOMEM);
+
+	mutex_init(&ppgtt->flush);
+	mutex_init(&ppgtt->pin_mutex);
+
+	ppgtt_init(&ppgtt->base, gt);
+	ppgtt->base.vm.top = 1;
+
+	ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND;
+	ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range;
+	ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range;
+	ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries;
+	ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup;
+
+	ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
+
+	ppgtt->base.pd = __alloc_pd(sizeof(*ppgtt->base.pd));
+	if (!ppgtt->base.pd) {
+		err = -ENOMEM;
+		goto err_free;
+	}
+
+	err = gen6_ppgtt_init_scratch(ppgtt);
+	if (err)
+		goto err_pd;
+
+	ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE);
+	if (IS_ERR(ppgtt->vma)) {
+		err = PTR_ERR(ppgtt->vma);
+		goto err_scratch;
+	}
+
+	return &ppgtt->base;
+
+err_scratch:
+	free_scratch(&ppgtt->base.vm);
+err_pd:
+	kfree(ppgtt->base.pd);
+err_free:
+	mutex_destroy(&ppgtt->pin_mutex);
+	kfree(ppgtt);
+	return ERR_PTR(err);
+}
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.h b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h
new file mode 100644
index 000000000000..72e481806c96
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef __GEN6_PPGTT_H__
+#define __GEN6_PPGTT_H__
+
+#include "intel_gtt.h"
+
+struct gen6_ppgtt {
+	struct i915_ppgtt base;
+
+	struct mutex flush;
+	struct i915_vma *vma;
+	gen6_pte_t __iomem *pd_addr;
+
+	atomic_t pin_count;
+	struct mutex pin_mutex;
+
+	bool scan_for_unused_pt;
+};
+
+static inline u32 gen6_pte_index(u32 addr)
+{
+	return i915_pte_index(addr, GEN6_PDE_SHIFT);
+}
+
+static inline u32 gen6_pte_count(u32 addr, u32 length)
+{
+	return i915_pte_count(addr, length, GEN6_PDE_SHIFT);
+}
+
+static inline u32 gen6_pde_index(u32 addr)
+{
+	return i915_pde_index(addr, GEN6_PDE_SHIFT);
+}
+
+#define __to_gen6_ppgtt(base) container_of(base, struct gen6_ppgtt, base)
+
+static inline struct gen6_ppgtt *to_gen6_ppgtt(struct i915_ppgtt *base)
+{
+	BUILD_BUG_ON(offsetof(struct gen6_ppgtt, base));
+	return __to_gen6_ppgtt(base);
+}
+
+/*
+ * gen6_for_each_pde() iterates over every pde from start until start+length.
+ * If start and start+length are not perfectly divisible, the macro will round
+ * down and up as needed. Start=0 and length=2G effectively iterates over
+ * every PDE in the system. The macro modifies ALL its parameters except 'pd',
+ * so each of the other parameters should preferably be a simple variable, or
+ * at most an lvalue with no side-effects!
+ */
+#define gen6_for_each_pde(pt, pd, start, length, iter)			\
+	for (iter = gen6_pde_index(start);				\
+	     length > 0 && iter < I915_PDES &&				\
+		     (pt = i915_pt_entry(pd, iter), true);		\
+	     ({ u32 temp = ALIGN(start+1, 1 << GEN6_PDE_SHIFT);		\
+		    temp = min(temp - start, length);			\
+		    start += temp, length -= temp; }), ++iter)
+
+#define gen6_for_all_pdes(pt, pd, iter)					\
+	for (iter = 0;							\
+	     iter < I915_PDES &&					\
+		     (pt = i915_pt_entry(pd, iter), true);		\
+	     ++iter)
+
+int gen6_ppgtt_pin(struct i915_ppgtt *base);
+void gen6_ppgtt_unpin(struct i915_ppgtt *base);
+void gen6_ppgtt_unpin_all(struct i915_ppgtt *base);
+void gen6_ppgtt_enable(struct intel_gt *gt);
+void gen7_ppgtt_enable(struct intel_gt *gt);
+struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
new file mode 100644
index 000000000000..077b8f7cf6cb
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -0,0 +1,723 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <linux/log2.h>
+
+#include "gen8_ppgtt.h"
+#include "i915_scatterlist.h"
+#include "i915_trace.h"
+#include "i915_vgpu.h"
+#include "intel_gt.h"
+#include "intel_gtt.h"
+
+static u64 gen8_pde_encode(const dma_addr_t addr,
+			   const enum i915_cache_level level)
+{
+	u64 pde = addr | _PAGE_PRESENT | _PAGE_RW;
+
+	if (level != I915_CACHE_NONE)
+		pde |= PPAT_CACHED_PDE;
+	else
+		pde |= PPAT_UNCACHED;
+
+	return pde;
+}
+
+static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
+{
+	struct drm_i915_private *i915 = ppgtt->vm.i915;
+	struct intel_uncore *uncore = ppgtt->vm.gt->uncore;
+	enum vgt_g2v_type msg;
+	int i;
+
+	if (create)
+		atomic_inc(px_used(ppgtt->pd)); /* never remove */
+	else
+		atomic_dec(px_used(ppgtt->pd));
+
+	mutex_lock(&i915->vgpu.lock);
+
+	if (i915_vm_is_4lvl(&ppgtt->vm)) {
+		const u64 daddr = px_dma(ppgtt->pd);
+
+		intel_uncore_write(uncore,
+				   vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
+		intel_uncore_write(uncore,
+				   vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
+
+		msg = create ?
+			VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
+			VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY;
+	} else {
+		for (i = 0; i < GEN8_3LVL_PDPES; i++) {
+			const u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
+
+			intel_uncore_write(uncore,
+					   vgtif_reg(pdp[i].lo),
+					   lower_32_bits(daddr));
+			intel_uncore_write(uncore,
+					   vgtif_reg(pdp[i].hi),
+					   upper_32_bits(daddr));
+		}
+
+		msg = create ?
+			VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
+			VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY;
+	}
+
+	/* g2v_notify atomically (via hv trap) consumes the message packet. */
+	intel_uncore_write(uncore, vgtif_reg(g2v_notify), msg);
+
+	mutex_unlock(&i915->vgpu.lock);
+}
+
+/* Index shifts into the pagetable are offset by GEN8_PTE_SHIFT [12] */
+#define GEN8_PAGE_SIZE (SZ_4K) /* page and page-directory sizes are the same */
+#define GEN8_PTE_SHIFT (ilog2(GEN8_PAGE_SIZE))
+#define GEN8_PDES (GEN8_PAGE_SIZE / sizeof(u64))
+#define gen8_pd_shift(lvl) ((lvl) * ilog2(GEN8_PDES))
+#define gen8_pd_index(i, lvl) i915_pde_index((i), gen8_pd_shift(lvl))
+#define __gen8_pte_shift(lvl) (GEN8_PTE_SHIFT + gen8_pd_shift(lvl))
+#define __gen8_pte_index(a, lvl) i915_pde_index((a), __gen8_pte_shift(lvl))
+
+#define as_pd(x) container_of((x), typeof(struct i915_page_directory), pt)
+
+static inline unsigned int
+gen8_pd_range(u64 start, u64 end, int lvl, unsigned int *idx)
+{
+	const int shift = gen8_pd_shift(lvl);
+	const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);
+
+	GEM_BUG_ON(start >= end);
+	end += ~mask >> gen8_pd_shift(1);
+
+	*idx = i915_pde_index(start, shift);
+	if ((start ^ end) & mask)
+		return GEN8_PDES - *idx;
+	else
+		return i915_pde_index(end, shift) - *idx;
+}
+
+static inline bool gen8_pd_contains(u64 start, u64 end, int lvl)
+{
+	const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);
+
+	GEM_BUG_ON(start >= end);
+	return (start ^ end) & mask && (start & ~mask) == 0;
+}
+
+static inline unsigned int gen8_pt_count(u64 start, u64 end)
+{
+	GEM_BUG_ON(start >= end);
+	if ((start ^ end) >> gen8_pd_shift(1))
+		return GEN8_PDES - (start & (GEN8_PDES - 1));
+	else
+		return end - start;
+}
+
+static inline unsigned int
+gen8_pd_top_count(const struct i915_address_space *vm)
+{
+	unsigned int shift = __gen8_pte_shift(vm->top);
+	return (vm->total + (1ull << shift) - 1) >> shift;
+}
+
+static inline struct i915_page_directory *
+gen8_pdp_for_page_index(struct i915_address_space * const vm, const u64 idx)
+{
+	struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm);
+
+	if (vm->top == 2)
+		return ppgtt->pd;
+	else
+		return i915_pd_entry(ppgtt->pd, gen8_pd_index(idx, vm->top));
+}
+
+static inline struct i915_page_directory *
+gen8_pdp_for_page_address(struct i915_address_space * const vm, const u64 addr)
+{
+	return gen8_pdp_for_page_index(vm, addr >> GEN8_PTE_SHIFT);
+}
+
+static void __gen8_ppgtt_cleanup(struct i915_address_space *vm,
+				 struct i915_page_directory *pd,
+				 int count, int lvl)
+{
+	if (lvl) {
+		void **pde = pd->entry;
+
+		do {
+			if (!*pde)
+				continue;
+
+			__gen8_ppgtt_cleanup(vm, *pde, GEN8_PDES, lvl - 1);
+		} while (pde++, --count);
+	}
+
+	free_px(vm, pd);
+}
+
+static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
+{
+	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+
+	if (intel_vgpu_active(vm->i915))
+		gen8_ppgtt_notify_vgt(ppgtt, false);
+
+	__gen8_ppgtt_cleanup(vm, ppgtt->pd, gen8_pd_top_count(vm), vm->top);
+	free_scratch(vm);
+}
+
+static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
+			      struct i915_page_directory * const pd,
+			      u64 start, const u64 end, int lvl)
+{
+	const struct i915_page_scratch * const scratch = &vm->scratch[lvl];
+	unsigned int idx, len;
+
+	GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
+
+	len = gen8_pd_range(start, end, lvl--, &idx);
+	DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
+	    __func__, vm, lvl + 1, start, end,
+	    idx, len, atomic_read(px_used(pd)));
+	GEM_BUG_ON(!len || len >= atomic_read(px_used(pd)));
+
+	do {
+		struct i915_page_table *pt = pd->entry[idx];
+
+		if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) &&
+		    gen8_pd_contains(start, end, lvl)) {
+			DBG("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n",
+			    __func__, vm, lvl + 1, idx, start, end);
+			clear_pd_entry(pd, idx, scratch);
+			__gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl);
+			start += (u64)I915_PDES << gen8_pd_shift(lvl);
+			continue;
+		}
+
+		if (lvl) {
+			start = __gen8_ppgtt_clear(vm, as_pd(pt),
+						   start, end, lvl);
+		} else {
+			unsigned int count;
+			u64 *vaddr;
+
+			count = gen8_pt_count(start, end);
+			DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n",
+			    __func__, vm, lvl, start, end,
+			    gen8_pd_index(start, 0), count,
+			    atomic_read(&pt->used));
+			GEM_BUG_ON(!count || count >= atomic_read(&pt->used));
+
+			vaddr = kmap_atomic_px(pt);
+			memset64(vaddr + gen8_pd_index(start, 0),
+				 vm->scratch[0].encode,
+				 count);
+			kunmap_atomic(vaddr);
+
+			atomic_sub(count, &pt->used);
+			start += count;
+		}
+
+		if (release_pd_entry(pd, idx, pt, scratch))
+			free_px(vm, pt);
+	} while (idx++, --len);
+
+	return start;
+}
+
+static void gen8_ppgtt_clear(struct i915_address_space *vm,
+			     u64 start, u64 length)
+{
+	GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
+	GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
+	GEM_BUG_ON(range_overflows(start, length, vm->total));
+
+	start >>= GEN8_PTE_SHIFT;
+	length >>= GEN8_PTE_SHIFT;
+	GEM_BUG_ON(length == 0);
+
+	__gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd,
+			   start, start + length, vm->top);
+}
+
+static int __gen8_ppgtt_alloc(struct i915_address_space * const vm,
+			      struct i915_page_directory * const pd,
+			      u64 * const start, const u64 end, int lvl)
+{
+	const struct i915_page_scratch * const scratch = &vm->scratch[lvl];
+	struct i915_page_table *alloc = NULL;
+	unsigned int idx, len;
+	int ret = 0;
+
+	GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
+
+	len = gen8_pd_range(*start, end, lvl--, &idx);
+	DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
+	    __func__, vm, lvl + 1, *start, end,
+	    idx, len, atomic_read(px_used(pd)));
+	GEM_BUG_ON(!len || (idx + len - 1) >> gen8_pd_shift(1));
+
+	spin_lock(&pd->lock);
+	GEM_BUG_ON(!atomic_read(px_used(pd))); /* Must be pinned! */
+	do {
+		struct i915_page_table *pt = pd->entry[idx];
+
+		if (!pt) {
+			spin_unlock(&pd->lock);
+
+			DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n",
+			    __func__, vm, lvl + 1, idx);
+
+			pt = fetch_and_zero(&alloc);
+			if (lvl) {
+				if (!pt) {
+					pt = &alloc_pd(vm)->pt;
+					if (IS_ERR(pt)) {
+						ret = PTR_ERR(pt);
+						goto out;
+					}
+				}
+
+				fill_px(pt, vm->scratch[lvl].encode);
+			} else {
+				if (!pt) {
+					pt = alloc_pt(vm);
+					if (IS_ERR(pt)) {
+						ret = PTR_ERR(pt);
+						goto out;
+					}
+				}
+
+				if (intel_vgpu_active(vm->i915) ||
+				    gen8_pt_count(*start, end) < I915_PDES)
+					fill_px(pt, vm->scratch[lvl].encode);
+			}
+
+			spin_lock(&pd->lock);
+			if (likely(!pd->entry[idx]))
+				set_pd_entry(pd, idx, pt);
+			else
+				alloc = pt, pt = pd->entry[idx];
+		}
+
+		if (lvl) {
+			atomic_inc(&pt->used);
+			spin_unlock(&pd->lock);
+
+			ret = __gen8_ppgtt_alloc(vm, as_pd(pt),
+						 start, end, lvl);
+			if (unlikely(ret)) {
+				if (release_pd_entry(pd, idx, pt, scratch))
+					free_px(vm, pt);
+				goto out;
+			}
+
+			spin_lock(&pd->lock);
+			atomic_dec(&pt->used);
+			GEM_BUG_ON(!atomic_read(&pt->used));
+		} else {
+			unsigned int count = gen8_pt_count(*start, end);
+
+			DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n",
+			    __func__, vm, lvl, *start, end,
+			    gen8_pd_index(*start, 0), count,
+			    atomic_read(&pt->used));
+
+			atomic_add(count, &pt->used);
+			/* All other pdes may be simultaneously removed */
+			GEM_BUG_ON(atomic_read(&pt->used) > NALLOC * I915_PDES);
+			*start += count;
+		}
+	} while (idx++, --len);
+	spin_unlock(&pd->lock);
+out:
+	if (alloc)
+		free_px(vm, alloc);
+	return ret;
+}
+
+static int gen8_ppgtt_alloc(struct i915_address_space *vm,
+			    u64 start, u64 length)
+{
+	u64 from;
+	int err;
+
+	GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
+	GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
+	GEM_BUG_ON(range_overflows(start, length, vm->total));
+
+	start >>= GEN8_PTE_SHIFT;
+	length >>= GEN8_PTE_SHIFT;
+	GEM_BUG_ON(length == 0);
+	from = start;
+
+	err = __gen8_ppgtt_alloc(vm, i915_vm_to_ppgtt(vm)->pd,
+				 &start, start + length, vm->top);
+	if (unlikely(err && from != start))
+		__gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd,
+				   from, start, vm->top);
+
+	return err;
+}
+
+static __always_inline u64
+gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
+		      struct i915_page_directory *pdp,
+		      struct sgt_dma *iter,
+		      u64 idx,
+		      enum i915_cache_level cache_level,
+		      u32 flags)
+{
+	struct i915_page_directory *pd;
+	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
+	gen8_pte_t *vaddr;
+
+	pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
+	vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
+	do {
+		vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma;
+
+		iter->dma += I915_GTT_PAGE_SIZE;
+		if (iter->dma >= iter->max) {
+			iter->sg = __sg_next(iter->sg);
+			if (!iter->sg) {
+				idx = 0;
+				break;
+			}
+
+			iter->dma = sg_dma_address(iter->sg);
+			iter->max = iter->dma + iter->sg->length;
+		}
+
+		if (gen8_pd_index(++idx, 0) == 0) {
+			if (gen8_pd_index(idx, 1) == 0) {
+				/* Limited by sg length for 3lvl */
+				if (gen8_pd_index(idx, 2) == 0)
+					break;
+
+				pd = pdp->entry[gen8_pd_index(idx, 2)];
+			}
+
+			kunmap_atomic(vaddr);
+			vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
+		}
+	} while (1);
+	kunmap_atomic(vaddr);
+
+	return idx;
+}
+
+static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
+				   struct sgt_dma *iter,
+				   enum i915_cache_level cache_level,
+				   u32 flags)
+{
+	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
+	u64 start = vma->node.start;
+	dma_addr_t rem = iter->sg->length;
+
+	GEM_BUG_ON(!i915_vm_is_4lvl(vma->vm));
+
+	do {
+		struct i915_page_directory * const pdp =
+			gen8_pdp_for_page_address(vma->vm, start);
+		struct i915_page_directory * const pd =
+			i915_pd_entry(pdp, __gen8_pte_index(start, 2));
+		gen8_pte_t encode = pte_encode;
+		unsigned int maybe_64K = -1;
+		unsigned int page_size;
+		gen8_pte_t *vaddr;
+		u16 index;
+
+		if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
+		    IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
+		    rem >= I915_GTT_PAGE_SIZE_2M &&
+		    !__gen8_pte_index(start, 0)) {
+			index = __gen8_pte_index(start, 1);
+			encode |= GEN8_PDE_PS_2M;
+			page_size = I915_GTT_PAGE_SIZE_2M;
+
+			vaddr = kmap_atomic_px(pd);
+		} else {
+			struct i915_page_table *pt =
+				i915_pt_entry(pd, __gen8_pte_index(start, 1));
+
+			index = __gen8_pte_index(start, 0);
+			page_size = I915_GTT_PAGE_SIZE;
+
+			if (!index &&
+			    vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
+			    IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
+			    (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
+			     rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE))
+				maybe_64K = __gen8_pte_index(start, 1);
+
+			vaddr = kmap_atomic_px(pt);
+		}
+
+		do {
+			GEM_BUG_ON(iter->sg->length < page_size);
+			vaddr[index++] = encode | iter->dma;
+
+			start += page_size;
+			iter->dma += page_size;
+			rem -= page_size;
+			if (iter->dma >= iter->max) {
+				iter->sg = __sg_next(iter->sg);
+				if (!iter->sg)
+					break;
+
+				rem = iter->sg->length;
+				iter->dma = sg_dma_address(iter->sg);
+				iter->max = iter->dma + rem;
+
+				if (maybe_64K != -1 && index < I915_PDES &&
+				    !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
+				      (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
+				       rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE)))
+					maybe_64K = -1;
+
+				if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
+					break;
+			}
+		} while (rem >= page_size && index < I915_PDES);
+
+		kunmap_atomic(vaddr);
+
+		/*
+		 * Is it safe to mark the 2M block as 64K? -- Either we have
+		 * filled whole page-table with 64K entries, or filled part of
+		 * it and have reached the end of the sg table and we have
+		 * enough padding.
+		 */
+		if (maybe_64K != -1 &&
+		    (index == I915_PDES ||
+		     (i915_vm_has_scratch_64K(vma->vm) &&
+		      !iter->sg && IS_ALIGNED(vma->node.start +
+					      vma->node.size,
+					      I915_GTT_PAGE_SIZE_2M)))) {
+			vaddr = kmap_atomic_px(pd);
+			vaddr[maybe_64K] |= GEN8_PDE_IPS_64K;
+			kunmap_atomic(vaddr);
+			page_size = I915_GTT_PAGE_SIZE_64K;
+
+			/*
+			 * We write all 4K page entries, even when using 64K
+			 * pages. In order to verify that the HW isn't cheating
+			 * by using the 4K PTE instead of the 64K PTE, we want
+			 * to remove all the surplus entries. If the HW skipped
+			 * the 64K PTE, it will read/write into the scratch page
+			 * instead - which we detect as missing results during
+			 * selftests.
+			 */
+			if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) {
+				u16 i;
+
+				encode = vma->vm->scratch[0].encode;
+				vaddr = kmap_atomic_px(i915_pt_entry(pd, maybe_64K));
+
+				for (i = 1; i < index; i += 16)
+					memset64(vaddr + i, encode, 15);
+
+				kunmap_atomic(vaddr);
+			}
+		}
+
+		vma->page_sizes.gtt |= page_size;
+	} while (iter->sg);
+}
+
+static void gen8_ppgtt_insert(struct i915_address_space *vm,
+			      struct i915_vma *vma,
+			      enum i915_cache_level cache_level,
+			      u32 flags)
+{
+	struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm);
+	struct sgt_dma iter = sgt_dma(vma);
+
+	if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) {
+		gen8_ppgtt_insert_huge(vma, &iter, cache_level, flags);
+	} else  {
+		u64 idx = vma->node.start >> GEN8_PTE_SHIFT;
+
+		do {
+			struct i915_page_directory * const pdp =
+				gen8_pdp_for_page_index(vm, idx);
+
+			idx = gen8_ppgtt_insert_pte(ppgtt, pdp, &iter, idx,
+						    cache_level, flags);
+		} while (idx);
+
+		vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
+	}
+}
+
+static int gen8_init_scratch(struct i915_address_space *vm)
+{
+	int ret;
+	int i;
+
+	/*
+	 * If everybody agrees to not to write into the scratch page,
+	 * we can reuse it for all vm, keeping contexts and processes separate.
+	 */
+	if (vm->has_read_only && vm->gt->vm && !i915_is_ggtt(vm->gt->vm)) {
+		struct i915_address_space *clone = vm->gt->vm;
+
+		GEM_BUG_ON(!clone->has_read_only);
+
+		vm->scratch_order = clone->scratch_order;
+		memcpy(vm->scratch, clone->scratch, sizeof(vm->scratch));
+		px_dma(&vm->scratch[0]) = 0; /* no xfer of ownership */
+		return 0;
+	}
+
+	ret = setup_scratch_page(vm, __GFP_HIGHMEM);
+	if (ret)
+		return ret;
+
+	vm->scratch[0].encode =
+		gen8_pte_encode(px_dma(&vm->scratch[0]),
+				I915_CACHE_LLC, vm->has_read_only);
+
+	for (i = 1; i <= vm->top; i++) {
+		if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[i]))))
+			goto free_scratch;
+
+		fill_px(&vm->scratch[i], vm->scratch[i - 1].encode);
+		vm->scratch[i].encode =
+			gen8_pde_encode(px_dma(&vm->scratch[i]),
+					I915_CACHE_LLC);
+	}
+
+	return 0;
+
+free_scratch:
+	free_scratch(vm);
+	return -ENOMEM;
+}
+
+static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
+{
+	struct i915_address_space *vm = &ppgtt->vm;
+	struct i915_page_directory *pd = ppgtt->pd;
+	unsigned int idx;
+
+	GEM_BUG_ON(vm->top != 2);
+	GEM_BUG_ON(gen8_pd_top_count(vm) != GEN8_3LVL_PDPES);
+
+	for (idx = 0; idx < GEN8_3LVL_PDPES; idx++) {
+		struct i915_page_directory *pde;
+
+		pde = alloc_pd(vm);
+		if (IS_ERR(pde))
+			return PTR_ERR(pde);
+
+		fill_px(pde, vm->scratch[1].encode);
+		set_pd_entry(pd, idx, pde);
+		atomic_inc(px_used(pde)); /* keep pinned */
+	}
+	wmb();
+
+	return 0;
+}
+
+static struct i915_page_directory *
+gen8_alloc_top_pd(struct i915_address_space *vm)
+{
+	const unsigned int count = gen8_pd_top_count(vm);
+	struct i915_page_directory *pd;
+
+	GEM_BUG_ON(count > ARRAY_SIZE(pd->entry));
+
+	pd = __alloc_pd(offsetof(typeof(*pd), entry[count]));
+	if (unlikely(!pd))
+		return ERR_PTR(-ENOMEM);
+
+	if (unlikely(setup_page_dma(vm, px_base(pd)))) {
+		kfree(pd);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	fill_page_dma(px_base(pd), vm->scratch[vm->top].encode, count);
+	atomic_inc(px_used(pd)); /* mark as pinned */
+	return pd;
+}
+
+/*
+ * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
+ * with a net effect resembling a 2-level page table in normal x86 terms. Each
+ * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
+ * space.
+ *
+ */
+struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt)
+{
+	struct i915_ppgtt *ppgtt;
+	int err;
+
+	ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
+	if (!ppgtt)
+		return ERR_PTR(-ENOMEM);
+
+	ppgtt_init(ppgtt, gt);
+	ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2;
+
+	/*
+	 * From bdw, there is hw support for read-only pages in the PPGTT.
+	 *
+	 * Gen11 has HSDES#:1807136187 unresolved. Disable ro support
+	 * for now.
+	 *
+	 * Gen12 has inherited the same read-only fault issue from gen11.
+	 */
+	ppgtt->vm.has_read_only = !IS_GEN_RANGE(gt->i915, 11, 12);
+
+	/*
+	 * There are only few exceptions for gen >=6. chv and bxt.
+	 * And we are not sure about the latter so play safe for now.
+	 */
+	if (IS_CHERRYVIEW(gt->i915) || IS_BROXTON(gt->i915))
+		ppgtt->vm.pt_kmap_wc = true;
+
+	err = gen8_init_scratch(&ppgtt->vm);
+	if (err)
+		goto err_free;
+
+	ppgtt->pd = gen8_alloc_top_pd(&ppgtt->vm);
+	if (IS_ERR(ppgtt->pd)) {
+		err = PTR_ERR(ppgtt->pd);
+		goto err_free_scratch;
+	}
+
+	if (!i915_vm_is_4lvl(&ppgtt->vm)) {
+		err = gen8_preallocate_top_level_pdp(ppgtt);
+		if (err)
+			goto err_free_pd;
+	}
+
+	ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
+	ppgtt->vm.insert_entries = gen8_ppgtt_insert;
+	ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
+	ppgtt->vm.clear_range = gen8_ppgtt_clear;
+
+	if (intel_vgpu_active(gt->i915))
+		gen8_ppgtt_notify_vgt(ppgtt, true);
+
+	ppgtt->vm.cleanup = gen8_ppgtt_cleanup;
+
+	return ppgtt;
+
+err_free_pd:
+	__gen8_ppgtt_cleanup(&ppgtt->vm, ppgtt->pd,
+			     gen8_pd_top_count(&ppgtt->vm), ppgtt->vm.top);
+err_free_scratch:
+	free_scratch(&ppgtt->vm);
+err_free:
+	kfree(ppgtt);
+	return ERR_PTR(err);
+}
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
new file mode 100644
index 000000000000..76a08b9c1f5c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef __GEN8_PPGTT_H__
+#define __GEN8_PPGTT_H__
+
+struct intel_gt;
+
+struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index fbaa9df6f436..23137b2a8689 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -43,30 +43,76 @@ intel_context_create(struct intel_engine_cs *engine)
 	return ce;
 }
 
-int __intel_context_do_pin(struct intel_context *ce)
+int intel_context_alloc_state(struct intel_context *ce)
 {
-	int err;
+	int err = 0;
 
 	if (mutex_lock_interruptible(&ce->pin_mutex))
 		return -EINTR;
 
-	if (likely(!atomic_read(&ce->pin_count))) {
-		intel_wakeref_t wakeref;
+	if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
+		err = ce->ops->alloc(ce);
+		if (unlikely(err))
+			goto unlock;
 
-		if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
-			err = ce->ops->alloc(ce);
-			if (unlikely(err))
-				goto err;
+		set_bit(CONTEXT_ALLOC_BIT, &ce->flags);
+	}
 
-			__set_bit(CONTEXT_ALLOC_BIT, &ce->flags);
+unlock:
+	mutex_unlock(&ce->pin_mutex);
+	return err;
+}
+
+static int intel_context_active_acquire(struct intel_context *ce)
+{
+	int err;
+
+	err = i915_active_acquire(&ce->active);
+	if (err)
+		return err;
+
+	/* Preallocate tracking nodes */
+	if (!intel_context_is_barrier(ce)) {
+		err = i915_active_acquire_preallocate_barrier(&ce->active,
+							      ce->engine);
+		if (err) {
+			i915_active_release(&ce->active);
+			return err;
 		}
+	}
+
+	return 0;
+}
+
+static void intel_context_active_release(struct intel_context *ce)
+{
+	/* Nodes preallocated in intel_context_active() */
+	i915_active_acquire_barrier(&ce->active);
+	i915_active_release(&ce->active);
+}
 
-		err = 0;
-		with_intel_runtime_pm(ce->engine->uncore->rpm, wakeref)
-			err = ce->ops->pin(ce);
+int __intel_context_do_pin(struct intel_context *ce)
+{
+	int err;
+
+	if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
+		err = intel_context_alloc_state(ce);
 		if (err)
+			return err;
+	}
+
+	if (mutex_lock_interruptible(&ce->pin_mutex))
+		return -EINTR;
+
+	if (likely(!atomic_read(&ce->pin_count))) {
+		err = intel_context_active_acquire(ce);
+		if (unlikely(err))
 			goto err;
 
+		err = ce->ops->pin(ce);
+		if (unlikely(err))
+			goto err_active;
+
 		CE_TRACE(ce, "pin ring:{head:%04x, tail:%04x}\n",
 			 ce->ring->head, ce->ring->tail);
 
@@ -79,6 +125,8 @@ int __intel_context_do_pin(struct intel_context *ce)
 	mutex_unlock(&ce->pin_mutex);
 	return 0;
 
+err_active:
+	intel_context_active_release(ce);
 err:
 	mutex_unlock(&ce->pin_mutex);
 	return err;
@@ -86,22 +134,20 @@ err:
 
 void intel_context_unpin(struct intel_context *ce)
 {
-	if (likely(atomic_add_unless(&ce->pin_count, -1, 1)))
+	if (!atomic_dec_and_test(&ce->pin_count))
 		return;
 
-	/* We may be called from inside intel_context_pin() to evict another */
-	intel_context_get(ce);
-	mutex_lock_nested(&ce->pin_mutex, SINGLE_DEPTH_NESTING);
-
-	if (likely(atomic_dec_and_test(&ce->pin_count))) {
-		CE_TRACE(ce, "retire\n");
-
-		ce->ops->unpin(ce);
+	CE_TRACE(ce, "unpin\n");
+	ce->ops->unpin(ce);
 
-		intel_context_active_release(ce);
-	}
-
-	mutex_unlock(&ce->pin_mutex);
+	/*
+	 * Once released, we may asynchronously drop the active reference.
+	 * As that may be the only reference keeping the context alive,
+	 * take an extra now so that it is not freed before we finish
+	 * dereferencing it.
+	 */
+	intel_context_get(ce);
+	intel_context_active_release(ce);
 	intel_context_put(ce);
 }
 
@@ -114,6 +160,10 @@ static int __context_pin_state(struct i915_vma *vma)
 	if (err)
 		return err;
 
+	err = i915_active_acquire(&vma->active);
+	if (err)
+		goto err_unpin;
+
 	/*
 	 * And mark it as a globally pinned object to let the shrinker know
 	 * it cannot reclaim the object until we release it.
@@ -122,14 +172,44 @@ static int __context_pin_state(struct i915_vma *vma)
 	vma->obj->mm.dirty = true;
 
 	return 0;
+
+err_unpin:
+	i915_vma_unpin(vma);
+	return err;
 }
 
 static void __context_unpin_state(struct i915_vma *vma)
 {
 	i915_vma_make_shrinkable(vma);
+	i915_active_release(&vma->active);
 	__i915_vma_unpin(vma);
 }
 
+static int __ring_active(struct intel_ring *ring)
+{
+	int err;
+
+	err = i915_active_acquire(&ring->vma->active);
+	if (err)
+		return err;
+
+	err = intel_ring_pin(ring);
+	if (err)
+		goto err_active;
+
+	return 0;
+
+err_active:
+	i915_active_release(&ring->vma->active);
+	return err;
+}
+
+static void __ring_retire(struct intel_ring *ring)
+{
+	intel_ring_unpin(ring);
+	i915_active_release(&ring->vma->active);
+}
+
 __i915_active_call
 static void __intel_context_retire(struct i915_active *active)
 {
@@ -142,7 +222,7 @@ static void __intel_context_retire(struct i915_active *active)
 		__context_unpin_state(ce->state);
 
 	intel_timeline_unpin(ce->timeline);
-	intel_ring_unpin(ce->ring);
+	__ring_retire(ce->ring);
 
 	intel_context_put(ce);
 }
@@ -152,9 +232,11 @@ static int __intel_context_active(struct i915_active *active)
 	struct intel_context *ce = container_of(active, typeof(*ce), active);
 	int err;
 
+	CE_TRACE(ce, "active\n");
+
 	intel_context_get(ce);
 
-	err = intel_ring_pin(ce->ring);
+	err = __ring_active(ce->ring);
 	if (err)
 		goto err_put;
 
@@ -174,40 +256,12 @@ static int __intel_context_active(struct i915_active *active)
 err_timeline:
 	intel_timeline_unpin(ce->timeline);
 err_ring:
-	intel_ring_unpin(ce->ring);
+	__ring_retire(ce->ring);
 err_put:
 	intel_context_put(ce);
 	return err;
 }
 
-int intel_context_active_acquire(struct intel_context *ce)
-{
-	int err;
-
-	err = i915_active_acquire(&ce->active);
-	if (err)
-		return err;
-
-	/* Preallocate tracking nodes */
-	if (!intel_context_is_barrier(ce)) {
-		err = i915_active_acquire_preallocate_barrier(&ce->active,
-							      ce->engine);
-		if (err) {
-			i915_active_release(&ce->active);
-			return err;
-		}
-	}
-
-	return 0;
-}
-
-void intel_context_active_release(struct intel_context *ce)
-{
-	/* Nodes preallocated in intel_context_active() */
-	i915_active_acquire_barrier(&ce->active);
-	i915_active_release(&ce->active);
-}
-
 void
 intel_context_init(struct intel_context *ce,
 		   struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 1d4a1b1357cf..30bd248827d8 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -19,7 +19,7 @@
 
 #define CE_TRACE(ce, fmt, ...) do {					\
 	const struct intel_context *ce__ = (ce);			\
-	ENGINE_TRACE(ce__->engine, "context:%llx" fmt,			\
+	ENGINE_TRACE(ce__->engine, "context:%llx " fmt,			\
 		     ce__->timeline->fence_context,			\
 		     ##__VA_ARGS__);					\
 } while (0)
@@ -31,6 +31,8 @@ void intel_context_fini(struct intel_context *ce);
 struct intel_context *
 intel_context_create(struct intel_engine_cs *engine);
 
+int intel_context_alloc_state(struct intel_context *ce);
+
 void intel_context_free(struct intel_context *ce);
 
 /**
@@ -76,9 +78,14 @@ static inline void intel_context_unlock_pinned(struct intel_context *ce)
 
 int __intel_context_do_pin(struct intel_context *ce);
 
+static inline bool intel_context_pin_if_active(struct intel_context *ce)
+{
+	return atomic_inc_not_zero(&ce->pin_count);
+}
+
 static inline int intel_context_pin(struct intel_context *ce)
 {
-	if (likely(atomic_inc_not_zero(&ce->pin_count)))
+	if (likely(intel_context_pin_if_active(ce)))
 		return 0;
 
 	return __intel_context_do_pin(ce);
@@ -116,9 +123,6 @@ static inline void intel_context_exit(struct intel_context *ce)
 		ce->ops->exit(ce);
 }
 
-int intel_context_active_acquire(struct intel_context *ce);
-void intel_context_active_release(struct intel_context *ce);
-
 static inline struct intel_context *intel_context_get(struct intel_context *ce)
 {
 	kref_get(&ce->ref);
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 9527a659546c..ca1420fb8b53 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -17,6 +17,8 @@
 #include "intel_engine_types.h"
 #include "intel_sseu.h"
 
+#define CONTEXT_REDZONE POISON_INUSE
+
 struct i915_gem_context;
 struct i915_vma;
 struct intel_context;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 79ecac5ac0ab..5df003061e44 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -202,7 +202,7 @@ void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask);
 u64 intel_engine_get_active_head(const struct intel_engine_cs *engine);
 u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine);
 
-void intel_engine_get_instdone(struct intel_engine_cs *engine,
+void intel_engine_get_instdone(const struct intel_engine_cs *engine,
 			       struct intel_instdone *instdone);
 
 void intel_engine_init_execlists(struct intel_engine_cs *engine);
@@ -282,7 +282,7 @@ static inline void __intel_engine_reset(struct intel_engine_cs *engine,
 
 bool intel_engines_are_idle(struct intel_gt *gt);
 bool intel_engine_is_idle(struct intel_engine_cs *engine);
-bool intel_engine_flush_submission(struct intel_engine_cs *engine);
+void intel_engine_flush_submission(struct intel_engine_cs *engine);
 
 void intel_engines_reset_default_submission(struct intel_gt *gt);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index ddf9543b1261..f451ef376548 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -914,8 +914,8 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
 }
 
 static u32
-read_subslice_reg(struct intel_engine_cs *engine, int slice, int subslice,
-		  i915_reg_t reg)
+read_subslice_reg(const struct intel_engine_cs *engine,
+		  int slice, int subslice, i915_reg_t reg)
 {
 	struct drm_i915_private *i915 = engine->i915;
 	struct intel_uncore *uncore = engine->uncore;
@@ -959,7 +959,7 @@ read_subslice_reg(struct intel_engine_cs *engine, int slice, int subslice,
 }
 
 /* NB: please notice the memset */
-void intel_engine_get_instdone(struct intel_engine_cs *engine,
+void intel_engine_get_instdone(const struct intel_engine_cs *engine,
 			       struct intel_instdone *instdone)
 {
 	struct drm_i915_private *i915 = engine->i915;
@@ -1047,10 +1047,9 @@ static bool ring_is_idle(struct intel_engine_cs *engine)
 	return idle;
 }
 
-bool intel_engine_flush_submission(struct intel_engine_cs *engine)
+void intel_engine_flush_submission(struct intel_engine_cs *engine)
 {
 	struct tasklet_struct *t = &engine->execlists.tasklet;
-	bool active = tasklet_is_locked(t);
 
 	if (__tasklet_is_scheduled(t)) {
 		local_bh_disable();
@@ -1061,13 +1060,10 @@ bool intel_engine_flush_submission(struct intel_engine_cs *engine)
 			tasklet_unlock(t);
 		}
 		local_bh_enable();
-		active = true;
 	}
 
 	/* Otherwise flush the tasklet if it was running on another cpu */
 	tasklet_unlock_wait(t);
-
-	return active;
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 742628e40201..6c6fd185457c 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -199,7 +199,7 @@ int intel_engine_pulse(struct intel_engine_cs *engine)
 		goto out_unlock;
 	}
 
-	rq->flags |= I915_REQUEST_SENTINEL;
+	__set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags);
 	idle_pulse(engine, rq);
 
 	__i915_request_commit(rq);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 010620b78202..ea90ab3e396e 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -20,6 +20,7 @@ static int __engine_unpark(struct intel_wakeref *wf)
 {
 	struct intel_engine_cs *engine =
 		container_of(wf, typeof(*engine), wakeref);
+	struct intel_context *ce;
 	void *map;
 
 	ENGINE_TRACE(engine, "\n");
@@ -34,6 +35,27 @@ static int __engine_unpark(struct intel_wakeref *wf)
 	if (!IS_ERR_OR_NULL(map))
 		engine->pinned_default_state = map;
 
+	/* Discard stale context state from across idling */
+	ce = engine->kernel_context;
+	if (ce) {
+		GEM_BUG_ON(test_bit(CONTEXT_VALID_BIT, &ce->flags));
+
+		/* First poison the image to verify we never fully trust it */
+		if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && ce->state) {
+			struct drm_i915_gem_object *obj = ce->state->obj;
+			int type = i915_coherent_map_type(engine->i915);
+
+			map = i915_gem_object_pin_map(obj, type);
+			if (!IS_ERR(map)) {
+				memset(map, CONTEXT_REDZONE, obj->base.size);
+				i915_gem_object_flush_map(obj);
+				i915_gem_object_unpin_map(obj);
+			}
+		}
+
+		ce->ops->reset(ce);
+	}
+
 	if (engine->unpark)
 		engine->unpark(engine);
 
@@ -123,16 +145,16 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 	unsigned long flags;
 	bool result = true;
 
+	/* GPU is pointing to the void, as good as in the kernel context. */
+	if (intel_gt_is_wedged(engine->gt))
+		return true;
+
 	GEM_BUG_ON(!intel_context_is_barrier(ce));
 
 	/* Already inside the kernel context, safe to power down. */
 	if (engine->wakeref_serial == engine->serial)
 		return true;
 
-	/* GPU is pointing to the void, as good as in the kernel context. */
-	if (intel_gt_is_wedged(engine->gt))
-		return true;
-
 	/*
 	 * Note, we do this without taking the timeline->mutex. We cannot
 	 * as we may be called while retiring the kernel context and so
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index 7f7150a733f4..9e7f12bef828 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -11,6 +11,7 @@
 #include "i915_drv.h"
 #include "intel_engine.h"
 #include "intel_engine_user.h"
+#include "intel_gt.h"
 
 struct intel_engine_cs *
 intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance)
@@ -200,6 +201,9 @@ void intel_engines_driver_register(struct drm_i915_private *i915)
 				     uabi_node);
 		char old[sizeof(engine->name)];
 
+		if (intel_gt_has_init_error(engine->gt))
+			continue; /* ignore incomplete engines */
+
 		GEM_BUG_ON(engine->class >= ARRAY_SIZE(uabi_classes));
 		engine->uabi_class = uabi_classes[engine->class];
 
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
new file mode 100644
index 000000000000..79096722ce16
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -0,0 +1,1486 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <linux/stop_machine.h>
+
+#include <asm/set_memory.h>
+#include <asm/smp.h>
+
+#include "intel_gt.h"
+#include "i915_drv.h"
+#include "i915_scatterlist.h"
+#include "i915_vgpu.h"
+
+#include "intel_gtt.h"
+
+static int
+i915_get_ggtt_vma_pages(struct i915_vma *vma);
+
+static void i915_ggtt_color_adjust(const struct drm_mm_node *node,
+				   unsigned long color,
+				   u64 *start,
+				   u64 *end)
+{
+	if (i915_node_color_differs(node, color))
+		*start += I915_GTT_PAGE_SIZE;
+
+	/*
+	 * Also leave a space between the unallocated reserved node after the
+	 * GTT and any objects within the GTT, i.e. we use the color adjustment
+	 * to insert a guard page to prevent prefetches crossing over the
+	 * GTT boundary.
+	 */
+	node = list_next_entry(node, node_list);
+	if (node->color != color)
+		*end -= I915_GTT_PAGE_SIZE;
+}
+
+static int ggtt_init_hw(struct i915_ggtt *ggtt)
+{
+	struct drm_i915_private *i915 = ggtt->vm.i915;
+
+	i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT);
+
+	ggtt->vm.is_ggtt = true;
+
+	/* Only VLV supports read-only GGTT mappings */
+	ggtt->vm.has_read_only = IS_VALLEYVIEW(i915);
+
+	if (!HAS_LLC(i915) && !HAS_PPGTT(i915))
+		ggtt->vm.mm.color_adjust = i915_ggtt_color_adjust;
+
+	if (ggtt->mappable_end) {
+		if (!io_mapping_init_wc(&ggtt->iomap,
+					ggtt->gmadr.start,
+					ggtt->mappable_end)) {
+			ggtt->vm.cleanup(&ggtt->vm);
+			return -EIO;
+		}
+
+		ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start,
+					      ggtt->mappable_end);
+	}
+
+	i915_ggtt_init_fences(ggtt);
+
+	return 0;
+}
+
+/**
+ * i915_ggtt_init_hw - Initialize GGTT hardware
+ * @i915: i915 device
+ */
+int i915_ggtt_init_hw(struct drm_i915_private *i915)
+{
+	int ret;
+
+	stash_init(&i915->mm.wc_stash);
+
+	/*
+	 * Note that we use page colouring to enforce a guard page at the
+	 * end of the address space. This is required as the CS may prefetch
+	 * beyond the end of the batch buffer, across the page boundary,
+	 * and beyond the end of the GTT if we do not provide a guard.
+	 */
+	ret = ggtt_init_hw(&i915->ggtt);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+/*
+ * Certain Gen5 chipsets require require idling the GPU before
+ * unmapping anything from the GTT when VT-d is enabled.
+ */
+static bool needs_idle_maps(struct drm_i915_private *i915)
+{
+	/*
+	 * Query intel_iommu to see if we need the workaround. Presumably that
+	 * was loaded first.
+	 */
+	return IS_GEN(i915, 5) && IS_MOBILE(i915) && intel_vtd_active();
+}
+
+static void ggtt_suspend_mappings(struct i915_ggtt *ggtt)
+{
+	struct drm_i915_private *i915 = ggtt->vm.i915;
+
+	/*
+	 * Don't bother messing with faults pre GEN6 as we have little
+	 * documentation supporting that it's a good idea.
+	 */
+	if (INTEL_GEN(i915) < 6)
+		return;
+
+	intel_gt_check_and_clear_faults(ggtt->vm.gt);
+
+	ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
+
+	ggtt->invalidate(ggtt);
+}
+
+void i915_gem_suspend_gtt_mappings(struct drm_i915_private *i915)
+{
+	ggtt_suspend_mappings(&i915->ggtt);
+}
+
+void gen6_ggtt_invalidate(struct i915_ggtt *ggtt)
+{
+	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
+
+	spin_lock_irq(&uncore->lock);
+	intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
+	intel_uncore_read_fw(uncore, GFX_FLSH_CNTL_GEN6);
+	spin_unlock_irq(&uncore->lock);
+}
+
+static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
+{
+	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
+
+	/*
+	 * Note that as an uncached mmio write, this will flush the
+	 * WCB of the writes into the GGTT before it triggers the invalidate.
+	 */
+	intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
+}
+
+static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
+{
+	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
+	struct drm_i915_private *i915 = ggtt->vm.i915;
+
+	gen8_ggtt_invalidate(ggtt);
+
+	if (INTEL_GEN(i915) >= 12)
+		intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR,
+				      GEN12_GUC_TLB_INV_CR_INVALIDATE);
+	else
+		intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE);
+}
+
+static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt)
+{
+	intel_gtt_chipset_flush();
+}
+
+static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
+{
+	writeq(pte, addr);
+}
+
+static void gen8_ggtt_insert_page(struct i915_address_space *vm,
+				  dma_addr_t addr,
+				  u64 offset,
+				  enum i915_cache_level level,
+				  u32 unused)
+{
+	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+	gen8_pte_t __iomem *pte =
+		(gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
+
+	gen8_set_pte(pte, gen8_pte_encode(addr, level, 0));
+
+	ggtt->invalidate(ggtt);
+}
+
+static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
+				     struct i915_vma *vma,
+				     enum i915_cache_level level,
+				     u32 flags)
+{
+	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+	struct sgt_iter sgt_iter;
+	gen8_pte_t __iomem *gtt_entries;
+	const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0);
+	dma_addr_t addr;
+
+	/*
+	 * Note that we ignore PTE_READ_ONLY here. The caller must be careful
+	 * not to allow the user to override access to a read only page.
+	 */
+
+	gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
+	gtt_entries += vma->node.start / I915_GTT_PAGE_SIZE;
+	for_each_sgt_daddr(addr, sgt_iter, vma->pages)
+		gen8_set_pte(gtt_entries++, pte_encode | addr);
+
+	/*
+	 * We want to flush the TLBs only after we're certain all the PTE
+	 * updates have finished.
+	 */
+	ggtt->invalidate(ggtt);
+}
+
+static void gen6_ggtt_insert_page(struct i915_address_space *vm,
+				  dma_addr_t addr,
+				  u64 offset,
+				  enum i915_cache_level level,
+				  u32 flags)
+{
+	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+	gen6_pte_t __iomem *pte =
+		(gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
+
+	iowrite32(vm->pte_encode(addr, level, flags), pte);
+
+	ggtt->invalidate(ggtt);
+}
+
+/*
+ * Binds an object into the global gtt with the specified cache level.
+ * The object will be accessible to the GPU via commands whose operands
+ * reference offsets within the global GTT as well as accessible by the GPU
+ * through the GMADR mapped BAR (i915->mm.gtt->gtt).
+ */
+static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
+				     struct i915_vma *vma,
+				     enum i915_cache_level level,
+				     u32 flags)
+{
+	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+	gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm;
+	unsigned int i = vma->node.start / I915_GTT_PAGE_SIZE;
+	struct sgt_iter iter;
+	dma_addr_t addr;
+
+	for_each_sgt_daddr(addr, iter, vma->pages)
+		iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]);
+
+	/*
+	 * We want to flush the TLBs only after we're certain all the PTE
+	 * updates have finished.
+	 */
+	ggtt->invalidate(ggtt);
+}
+
+static void nop_clear_range(struct i915_address_space *vm,
+			    u64 start, u64 length)
+{
+}
+
+static void gen8_ggtt_clear_range(struct i915_address_space *vm,
+				  u64 start, u64 length)
+{
+	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+	unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
+	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
+	const gen8_pte_t scratch_pte = vm->scratch[0].encode;
+	gen8_pte_t __iomem *gtt_base =
+		(gen8_pte_t __iomem *)ggtt->gsm + first_entry;
+	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
+	int i;
+
+	if (WARN(num_entries > max_entries,
+		 "First entry = %d; Num entries = %d (max=%d)\n",
+		 first_entry, num_entries, max_entries))
+		num_entries = max_entries;
+
+	for (i = 0; i < num_entries; i++)
+		gen8_set_pte(&gtt_base[i], scratch_pte);
+}
+
+static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
+{
+	/*
+	 * Make sure the internal GAM fifo has been cleared of all GTT
+	 * writes before exiting stop_machine(). This guarantees that
+	 * any aperture accesses waiting to start in another process
+	 * cannot back up behind the GTT writes causing a hang.
+	 * The register can be any arbitrary GAM register.
+	 */
+	intel_uncore_posting_read_fw(vm->gt->uncore, GFX_FLSH_CNTL_GEN6);
+}
+
+struct insert_page {
+	struct i915_address_space *vm;
+	dma_addr_t addr;
+	u64 offset;
+	enum i915_cache_level level;
+};
+
+static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
+{
+	struct insert_page *arg = _arg;
+
+	gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0);
+	bxt_vtd_ggtt_wa(arg->vm);
+
+	return 0;
+}
+
+static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
+					  dma_addr_t addr,
+					  u64 offset,
+					  enum i915_cache_level level,
+					  u32 unused)
+{
+	struct insert_page arg = { vm, addr, offset, level };
+
+	stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
+}
+
+struct insert_entries {
+	struct i915_address_space *vm;
+	struct i915_vma *vma;
+	enum i915_cache_level level;
+	u32 flags;
+};
+
+static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
+{
+	struct insert_entries *arg = _arg;
+
+	gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags);
+	bxt_vtd_ggtt_wa(arg->vm);
+
+	return 0;
+}
+
+static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
+					     struct i915_vma *vma,
+					     enum i915_cache_level level,
+					     u32 flags)
+{
+	struct insert_entries arg = { vm, vma, level, flags };
+
+	stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
+}
+
+struct clear_range {
+	struct i915_address_space *vm;
+	u64 start;
+	u64 length;
+};
+
+static int bxt_vtd_ggtt_clear_range__cb(void *_arg)
+{
+	struct clear_range *arg = _arg;
+
+	gen8_ggtt_clear_range(arg->vm, arg->start, arg->length);
+	bxt_vtd_ggtt_wa(arg->vm);
+
+	return 0;
+}
+
+static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm,
+					  u64 start,
+					  u64 length)
+{
+	struct clear_range arg = { vm, start, length };
+
+	stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL);
+}
+
+static void gen6_ggtt_clear_range(struct i915_address_space *vm,
+				  u64 start, u64 length)
+{
+	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+	unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
+	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
+	gen6_pte_t scratch_pte, __iomem *gtt_base =
+		(gen6_pte_t __iomem *)ggtt->gsm + first_entry;
+	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
+	int i;
+
+	if (WARN(num_entries > max_entries,
+		 "First entry = %d; Num entries = %d (max=%d)\n",
+		 first_entry, num_entries, max_entries))
+		num_entries = max_entries;
+
+	scratch_pte = vm->scratch[0].encode;
+	for (i = 0; i < num_entries; i++)
+		iowrite32(scratch_pte, &gtt_base[i]);
+}
+
+static void i915_ggtt_insert_page(struct i915_address_space *vm,
+				  dma_addr_t addr,
+				  u64 offset,
+				  enum i915_cache_level cache_level,
+				  u32 unused)
+{
+	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
+		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
+
+	intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
+}
+
+static void i915_ggtt_insert_entries(struct i915_address_space *vm,
+				     struct i915_vma *vma,
+				     enum i915_cache_level cache_level,
+				     u32 unused)
+{
+	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
+		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
+
+	intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT,
+				    flags);
+}
+
+static void i915_ggtt_clear_range(struct i915_address_space *vm,
+				  u64 start, u64 length)
+{
+	intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
+}
+
+static int ggtt_bind_vma(struct i915_vma *vma,
+			 enum i915_cache_level cache_level,
+			 u32 flags)
+{
+	struct drm_i915_gem_object *obj = vma->obj;
+	u32 pte_flags;
+
+	/* Applicable to VLV (gen8+ do not support RO in the GGTT) */
+	pte_flags = 0;
+	if (i915_gem_object_is_readonly(obj))
+		pte_flags |= PTE_READ_ONLY;
+
+	vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
+
+	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
+
+	/*
+	 * Without aliasing PPGTT there's no difference between
+	 * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
+	 * upgrade to both bound if we bind either to avoid double-binding.
+	 */
+	atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags);
+
+	return 0;
+}
+
+static void ggtt_unbind_vma(struct i915_vma *vma)
+{
+	vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
+}
+
+static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
+{
+	u64 size;
+	int ret;
+
+	if (!USES_GUC(ggtt->vm.i915))
+		return 0;
+
+	GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP);
+	size = ggtt->vm.total - GUC_GGTT_TOP;
+
+	ret = i915_gem_gtt_reserve(&ggtt->vm, &ggtt->uc_fw, size,
+				   GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE,
+				   PIN_NOEVICT);
+	if (ret)
+		DRM_DEBUG_DRIVER("Failed to reserve top of GGTT for GuC\n");
+
+	return ret;
+}
+
+static void ggtt_release_guc_top(struct i915_ggtt *ggtt)
+{
+	if (drm_mm_node_allocated(&ggtt->uc_fw))
+		drm_mm_remove_node(&ggtt->uc_fw);
+}
+
+static void cleanup_init_ggtt(struct i915_ggtt *ggtt)
+{
+	ggtt_release_guc_top(ggtt);
+	if (drm_mm_node_allocated(&ggtt->error_capture))
+		drm_mm_remove_node(&ggtt->error_capture);
+	mutex_destroy(&ggtt->error_mutex);
+}
+
+static int init_ggtt(struct i915_ggtt *ggtt)
+{
+	/*
+	 * Let GEM Manage all of the aperture.
+	 *
+	 * However, leave one page at the end still bound to the scratch page.
+	 * There are a number of places where the hardware apparently prefetches
+	 * past the end of the object, and we've seen multiple hangs with the
+	 * GPU head pointer stuck in a batchbuffer bound at the last page of the
+	 * aperture.  One page should be enough to keep any prefetching inside
+	 * of the aperture.
+	 */
+	unsigned long hole_start, hole_end;
+	struct drm_mm_node *entry;
+	int ret;
+
+	/*
+	 * GuC requires all resources that we're sharing with it to be placed in
+	 * non-WOPCM memory. If GuC is not present or not in use we still need a
+	 * small bias as ring wraparound at offset 0 sometimes hangs. No idea
+	 * why.
+	 */
+	ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE,
+			       intel_wopcm_guc_size(&ggtt->vm.i915->wopcm));
+
+	ret = intel_vgt_balloon(ggtt);
+	if (ret)
+		return ret;
+
+	mutex_init(&ggtt->error_mutex);
+	if (ggtt->mappable_end) {
+		/* Reserve a mappable slot for our lockless error capture */
+		ret = drm_mm_insert_node_in_range(&ggtt->vm.mm,
+						  &ggtt->error_capture,
+						  PAGE_SIZE, 0,
+						  I915_COLOR_UNEVICTABLE,
+						  0, ggtt->mappable_end,
+						  DRM_MM_INSERT_LOW);
+		if (ret)
+			return ret;
+	}
+
+	/*
+	 * The upper portion of the GuC address space has a sizeable hole
+	 * (several MB) that is inaccessible by GuC. Reserve this range within
+	 * GGTT as it can comfortably hold GuC/HuC firmware images.
+	 */
+	ret = ggtt_reserve_guc_top(ggtt);
+	if (ret)
+		goto err;
+
+	/* Clear any non-preallocated blocks */
+	drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) {
+		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
+			      hole_start, hole_end);
+		ggtt->vm.clear_range(&ggtt->vm, hole_start,
+				     hole_end - hole_start);
+	}
+
+	/* And finally clear the reserved guard page */
+	ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE);
+
+	return 0;
+
+err:
+	cleanup_init_ggtt(ggtt);
+	return ret;
+}
+
+static int aliasing_gtt_bind_vma(struct i915_vma *vma,
+				 enum i915_cache_level cache_level,
+				 u32 flags)
+{
+	u32 pte_flags;
+	int ret;
+
+	/* Currently applicable only to VLV */
+	pte_flags = 0;
+	if (i915_gem_object_is_readonly(vma->obj))
+		pte_flags |= PTE_READ_ONLY;
+
+	if (flags & I915_VMA_LOCAL_BIND) {
+		struct i915_ppgtt *alias = i915_vm_to_ggtt(vma->vm)->alias;
+
+		if (flags & I915_VMA_ALLOC) {
+			ret = alias->vm.allocate_va_range(&alias->vm,
+							  vma->node.start,
+							  vma->size);
+			if (ret)
+				return ret;
+
+			set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma));
+		}
+
+		GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT,
+				     __i915_vma_flags(vma)));
+		alias->vm.insert_entries(&alias->vm, vma,
+					 cache_level, pte_flags);
+	}
+
+	if (flags & I915_VMA_GLOBAL_BIND)
+		vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
+
+	return 0;
+}
+
+static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
+{
+	if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) {
+		struct i915_address_space *vm = vma->vm;
+
+		vm->clear_range(vm, vma->node.start, vma->size);
+	}
+
+	if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) {
+		struct i915_address_space *vm =
+			&i915_vm_to_ggtt(vma->vm)->alias->vm;
+
+		vm->clear_range(vm, vma->node.start, vma->size);
+	}
+}
+
+static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
+{
+	struct i915_ppgtt *ppgtt;
+	int err;
+
+	ppgtt = i915_ppgtt_create(ggtt->vm.gt);
+	if (IS_ERR(ppgtt))
+		return PTR_ERR(ppgtt);
+
+	if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) {
+		err = -ENODEV;
+		goto err_ppgtt;
+	}
+
+	/*
+	 * Note we only pre-allocate as far as the end of the global
+	 * GTT. On 48b / 4-level page-tables, the difference is very,
+	 * very significant! We have to preallocate as GVT/vgpu does
+	 * not like the page directory disappearing.
+	 */
+	err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, ggtt->vm.total);
+	if (err)
+		goto err_ppgtt;
+
+	ggtt->alias = ppgtt;
+	ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags;
+
+	GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma);
+	ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma;
+
+	GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma);
+	ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma;
+
+	return 0;
+
+err_ppgtt:
+	i915_vm_put(&ppgtt->vm);
+	return err;
+}
+
+static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt)
+{
+	struct i915_ppgtt *ppgtt;
+
+	ppgtt = fetch_and_zero(&ggtt->alias);
+	if (!ppgtt)
+		return;
+
+	i915_vm_put(&ppgtt->vm);
+
+	ggtt->vm.vma_ops.bind_vma   = ggtt_bind_vma;
+	ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
+}
+
+int i915_init_ggtt(struct drm_i915_private *i915)
+{
+	int ret;
+
+	ret = init_ggtt(&i915->ggtt);
+	if (ret)
+		return ret;
+
+	if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) {
+		ret = init_aliasing_ppgtt(&i915->ggtt);
+		if (ret)
+			cleanup_init_ggtt(&i915->ggtt);
+	}
+
+	return 0;
+}
+
+static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
+{
+	struct i915_vma *vma, *vn;
+
+	atomic_set(&ggtt->vm.open, 0);
+
+	rcu_barrier(); /* flush the RCU'ed__i915_vm_release */
+	flush_workqueue(ggtt->vm.i915->wq);
+
+	mutex_lock(&ggtt->vm.mutex);
+
+	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link)
+		WARN_ON(__i915_vma_unbind(vma));
+
+	if (drm_mm_node_allocated(&ggtt->error_capture))
+		drm_mm_remove_node(&ggtt->error_capture);
+	mutex_destroy(&ggtt->error_mutex);
+
+	ggtt_release_guc_top(ggtt);
+	intel_vgt_deballoon(ggtt);
+
+	ggtt->vm.cleanup(&ggtt->vm);
+
+	mutex_unlock(&ggtt->vm.mutex);
+	i915_address_space_fini(&ggtt->vm);
+
+	arch_phys_wc_del(ggtt->mtrr);
+
+	if (ggtt->iomap.size)
+		io_mapping_fini(&ggtt->iomap);
+}
+
+/**
+ * i915_ggtt_driver_release - Clean up GGTT hardware initialization
+ * @i915: i915 device
+ */
+void i915_ggtt_driver_release(struct drm_i915_private *i915)
+{
+	struct pagevec *pvec;
+
+	fini_aliasing_ppgtt(&i915->ggtt);
+
+	ggtt_cleanup_hw(&i915->ggtt);
+
+	pvec = &i915->mm.wc_stash.pvec;
+	if (pvec->nr) {
+		set_pages_array_wb(pvec->pages, pvec->nr);
+		__pagevec_release(pvec);
+	}
+}
+
+static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
+{
+	snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
+	snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
+	return snb_gmch_ctl << 20;
+}
+
+static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
+{
+	bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
+	bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
+	if (bdw_gmch_ctl)
+		bdw_gmch_ctl = 1 << bdw_gmch_ctl;
+
+#ifdef CONFIG_X86_32
+	/* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */
+	if (bdw_gmch_ctl > 4)
+		bdw_gmch_ctl = 4;
+#endif
+
+	return bdw_gmch_ctl << 20;
+}
+
+static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
+{
+	gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
+	gmch_ctrl &= SNB_GMCH_GGMS_MASK;
+
+	if (gmch_ctrl)
+		return 1 << (20 + gmch_ctrl);
+
+	return 0;
+}
+
+static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
+{
+	struct drm_i915_private *i915 = ggtt->vm.i915;
+	struct pci_dev *pdev = i915->drm.pdev;
+	phys_addr_t phys_addr;
+	int ret;
+
+	/* For Modern GENs the PTEs and register space are split in the BAR */
+	phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2;
+
+	/*
+	 * On BXT+/CNL+ writes larger than 64 bit to the GTT pagetable range
+	 * will be dropped. For WC mappings in general we have 64 byte burst
+	 * writes when the WC buffer is flushed, so we can't use it, but have to
+	 * resort to an uncached mapping. The WC issue is easily caught by the
+	 * readback check when writing GTT PTE entries.
+	 */
+	if (IS_GEN9_LP(i915) || INTEL_GEN(i915) >= 10)
+		ggtt->gsm = ioremap_nocache(phys_addr, size);
+	else
+		ggtt->gsm = ioremap_wc(phys_addr, size);
+	if (!ggtt->gsm) {
+		DRM_ERROR("Failed to map the ggtt page table\n");
+		return -ENOMEM;
+	}
+
+	ret = setup_scratch_page(&ggtt->vm, GFP_DMA32);
+	if (ret) {
+		DRM_ERROR("Scratch setup failed\n");
+		/* iounmap will also get called at remove, but meh */
+		iounmap(ggtt->gsm);
+		return ret;
+	}
+
+	ggtt->vm.scratch[0].encode =
+		ggtt->vm.pte_encode(px_dma(&ggtt->vm.scratch[0]),
+				    I915_CACHE_NONE, 0);
+
+	return 0;
+}
+
+int ggtt_set_pages(struct i915_vma *vma)
+{
+	int ret;
+
+	GEM_BUG_ON(vma->pages);
+
+	ret = i915_get_ggtt_vma_pages(vma);
+	if (ret)
+		return ret;
+
+	vma->page_sizes = vma->obj->mm.page_sizes;
+
+	return 0;
+}
+
+static void gen6_gmch_remove(struct i915_address_space *vm)
+{
+	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+
+	iounmap(ggtt->gsm);
+	cleanup_scratch_page(vm);
+}
+
+static struct resource pci_resource(struct pci_dev *pdev, int bar)
+{
+	return (struct resource)DEFINE_RES_MEM(pci_resource_start(pdev, bar),
+					       pci_resource_len(pdev, bar));
+}
+
+static int gen8_gmch_probe(struct i915_ggtt *ggtt)
+{
+	struct drm_i915_private *i915 = ggtt->vm.i915;
+	struct pci_dev *pdev = i915->drm.pdev;
+	unsigned int size;
+	u16 snb_gmch_ctl;
+	int err;
+
+	/* TODO: We're not aware of mappable constraints on gen8 yet */
+	if (!IS_DGFX(i915)) {
+		ggtt->gmadr = pci_resource(pdev, 2);
+		ggtt->mappable_end = resource_size(&ggtt->gmadr);
+	}
+
+	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39));
+	if (!err)
+		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
+	if (err)
+		DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
+
+	pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
+	if (IS_CHERRYVIEW(i915))
+		size = chv_get_total_gtt_size(snb_gmch_ctl);
+	else
+		size = gen8_get_total_gtt_size(snb_gmch_ctl);
+
+	ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
+	ggtt->vm.cleanup = gen6_gmch_remove;
+	ggtt->vm.insert_page = gen8_ggtt_insert_page;
+	ggtt->vm.clear_range = nop_clear_range;
+	if (intel_scanout_needs_vtd_wa(i915))
+		ggtt->vm.clear_range = gen8_ggtt_clear_range;
+
+	ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
+
+	/* Serialize GTT updates with aperture access on BXT if VT-d is on. */
+	if (intel_ggtt_update_needs_vtd_wa(i915) ||
+	    IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) {
+		ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
+		ggtt->vm.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
+		if (ggtt->vm.clear_range != nop_clear_range)
+			ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL;
+	}
+
+	ggtt->invalidate = gen8_ggtt_invalidate;
+
+	ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
+	ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
+	ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
+	ggtt->vm.vma_ops.clear_pages = clear_pages;
+
+	ggtt->vm.pte_encode = gen8_pte_encode;
+
+	setup_private_pat(ggtt->vm.gt->uncore);
+
+	return ggtt_probe_common(ggtt, size);
+}
+
+static u64 snb_pte_encode(dma_addr_t addr,
+			  enum i915_cache_level level,
+			  u32 flags)
+{
+	gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
+
+	switch (level) {
+	case I915_CACHE_L3_LLC:
+	case I915_CACHE_LLC:
+		pte |= GEN6_PTE_CACHE_LLC;
+		break;
+	case I915_CACHE_NONE:
+		pte |= GEN6_PTE_UNCACHED;
+		break;
+	default:
+		MISSING_CASE(level);
+	}
+
+	return pte;
+}
+
+static u64 ivb_pte_encode(dma_addr_t addr,
+			  enum i915_cache_level level,
+			  u32 flags)
+{
+	gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
+
+	switch (level) {
+	case I915_CACHE_L3_LLC:
+		pte |= GEN7_PTE_CACHE_L3_LLC;
+		break;
+	case I915_CACHE_LLC:
+		pte |= GEN6_PTE_CACHE_LLC;
+		break;
+	case I915_CACHE_NONE:
+		pte |= GEN6_PTE_UNCACHED;
+		break;
+	default:
+		MISSING_CASE(level);
+	}
+
+	return pte;
+}
+
+static u64 byt_pte_encode(dma_addr_t addr,
+			  enum i915_cache_level level,
+			  u32 flags)
+{
+	gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
+
+	if (!(flags & PTE_READ_ONLY))
+		pte |= BYT_PTE_WRITEABLE;
+
+	if (level != I915_CACHE_NONE)
+		pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
+
+	return pte;
+}
+
+static u64 hsw_pte_encode(dma_addr_t addr,
+			  enum i915_cache_level level,
+			  u32 flags)
+{
+	gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
+
+	if (level != I915_CACHE_NONE)
+		pte |= HSW_WB_LLC_AGE3;
+
+	return pte;
+}
+
+static u64 iris_pte_encode(dma_addr_t addr,
+			   enum i915_cache_level level,
+			   u32 flags)
+{
+	gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
+
+	switch (level) {
+	case I915_CACHE_NONE:
+		break;
+	case I915_CACHE_WT:
+		pte |= HSW_WT_ELLC_LLC_AGE3;
+		break;
+	default:
+		pte |= HSW_WB_ELLC_LLC_AGE3;
+		break;
+	}
+
+	return pte;
+}
+
+static int gen6_gmch_probe(struct i915_ggtt *ggtt)
+{
+	struct drm_i915_private *i915 = ggtt->vm.i915;
+	struct pci_dev *pdev = i915->drm.pdev;
+	unsigned int size;
+	u16 snb_gmch_ctl;
+	int err;
+
+	ggtt->gmadr = pci_resource(pdev, 2);
+	ggtt->mappable_end = resource_size(&ggtt->gmadr);
+
+	/*
+	 * 64/512MB is the current min/max we actually know of, but this is
+	 * just a coarse sanity check.
+	 */
+	if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) {
+		DRM_ERROR("Unknown GMADR size (%pa)\n", &ggtt->mappable_end);
+		return -ENXIO;
+	}
+
+	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40));
+	if (!err)
+		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
+	if (err)
+		DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
+	pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
+
+	size = gen6_get_total_gtt_size(snb_gmch_ctl);
+	ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE;
+
+	ggtt->vm.clear_range = nop_clear_range;
+	if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915))
+		ggtt->vm.clear_range = gen6_ggtt_clear_range;
+	ggtt->vm.insert_page = gen6_ggtt_insert_page;
+	ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
+	ggtt->vm.cleanup = gen6_gmch_remove;
+
+	ggtt->invalidate = gen6_ggtt_invalidate;
+
+	if (HAS_EDRAM(i915))
+		ggtt->vm.pte_encode = iris_pte_encode;
+	else if (IS_HASWELL(i915))
+		ggtt->vm.pte_encode = hsw_pte_encode;
+	else if (IS_VALLEYVIEW(i915))
+		ggtt->vm.pte_encode = byt_pte_encode;
+	else if (INTEL_GEN(i915) >= 7)
+		ggtt->vm.pte_encode = ivb_pte_encode;
+	else
+		ggtt->vm.pte_encode = snb_pte_encode;
+
+	ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
+	ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
+	ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
+	ggtt->vm.vma_ops.clear_pages = clear_pages;
+
+	return ggtt_probe_common(ggtt, size);
+}
+
+static void i915_gmch_remove(struct i915_address_space *vm)
+{
+	intel_gmch_remove();
+}
+
+static int i915_gmch_probe(struct i915_ggtt *ggtt)
+{
+	struct drm_i915_private *i915 = ggtt->vm.i915;
+	phys_addr_t gmadr_base;
+	int ret;
+
+	ret = intel_gmch_probe(i915->bridge_dev, i915->drm.pdev, NULL);
+	if (!ret) {
+		DRM_ERROR("failed to set up gmch\n");
+		return -EIO;
+	}
+
+	intel_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end);
+
+	ggtt->gmadr =
+		(struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end);
+
+	ggtt->do_idle_maps = needs_idle_maps(i915);
+	ggtt->vm.insert_page = i915_ggtt_insert_page;
+	ggtt->vm.insert_entries = i915_ggtt_insert_entries;
+	ggtt->vm.clear_range = i915_ggtt_clear_range;
+	ggtt->vm.cleanup = i915_gmch_remove;
+
+	ggtt->invalidate = gmch_ggtt_invalidate;
+
+	ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
+	ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
+	ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
+	ggtt->vm.vma_ops.clear_pages = clear_pages;
+
+	if (unlikely(ggtt->do_idle_maps))
+		dev_notice(i915->drm.dev,
+			   "Applying Ironlake quirks for intel_iommu\n");
+
+	return 0;
+}
+
+static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt)
+{
+	struct drm_i915_private *i915 = gt->i915;
+	int ret;
+
+	ggtt->vm.gt = gt;
+	ggtt->vm.i915 = i915;
+	ggtt->vm.dma = &i915->drm.pdev->dev;
+
+	if (INTEL_GEN(i915) <= 5)
+		ret = i915_gmch_probe(ggtt);
+	else if (INTEL_GEN(i915) < 8)
+		ret = gen6_gmch_probe(ggtt);
+	else
+		ret = gen8_gmch_probe(ggtt);
+	if (ret)
+		return ret;
+
+	if ((ggtt->vm.total - 1) >> 32) {
+		DRM_ERROR("We never expected a Global GTT with more than 32bits"
+			  " of address space! Found %lldM!\n",
+			  ggtt->vm.total >> 20);
+		ggtt->vm.total = 1ULL << 32;
+		ggtt->mappable_end =
+			min_t(u64, ggtt->mappable_end, ggtt->vm.total);
+	}
+
+	if (ggtt->mappable_end > ggtt->vm.total) {
+		DRM_ERROR("mappable aperture extends past end of GGTT,"
+			  " aperture=%pa, total=%llx\n",
+			  &ggtt->mappable_end, ggtt->vm.total);
+		ggtt->mappable_end = ggtt->vm.total;
+	}
+
+	/* GMADR is the PCI mmio aperture into the global GTT. */
+	DRM_DEBUG_DRIVER("GGTT size = %lluM\n", ggtt->vm.total >> 20);
+	DRM_DEBUG_DRIVER("GMADR size = %lluM\n", (u64)ggtt->mappable_end >> 20);
+	DRM_DEBUG_DRIVER("DSM size = %lluM\n",
+			 (u64)resource_size(&intel_graphics_stolen_res) >> 20);
+
+	return 0;
+}
+
+/**
+ * i915_ggtt_probe_hw - Probe GGTT hardware location
+ * @i915: i915 device
+ */
+int i915_ggtt_probe_hw(struct drm_i915_private *i915)
+{
+	int ret;
+
+	ret = ggtt_probe_hw(&i915->ggtt, &i915->gt);
+	if (ret)
+		return ret;
+
+	if (intel_vtd_active())
+		dev_info(i915->drm.dev, "VT-d active for gfx access\n");
+
+	return 0;
+}
+
+int i915_ggtt_enable_hw(struct drm_i915_private *i915)
+{
+	if (INTEL_GEN(i915) < 6 && !intel_enable_gtt())
+		return -EIO;
+
+	return 0;
+}
+
+void i915_ggtt_enable_guc(struct i915_ggtt *ggtt)
+{
+	GEM_BUG_ON(ggtt->invalidate != gen8_ggtt_invalidate);
+
+	ggtt->invalidate = guc_ggtt_invalidate;
+
+	ggtt->invalidate(ggtt);
+}
+
+void i915_ggtt_disable_guc(struct i915_ggtt *ggtt)
+{
+	/* XXX Temporary pardon for error unload */
+	if (ggtt->invalidate == gen8_ggtt_invalidate)
+		return;
+
+	/* We should only be called after i915_ggtt_enable_guc() */
+	GEM_BUG_ON(ggtt->invalidate != guc_ggtt_invalidate);
+
+	ggtt->invalidate = gen8_ggtt_invalidate;
+
+	ggtt->invalidate(ggtt);
+}
+
+static void ggtt_restore_mappings(struct i915_ggtt *ggtt)
+{
+	struct i915_vma *vma;
+	bool flush = false;
+	int open;
+
+	intel_gt_check_and_clear_faults(ggtt->vm.gt);
+
+	mutex_lock(&ggtt->vm.mutex);
+
+	/* First fill our portion of the GTT with scratch pages */
+	ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
+
+	/* Skip rewriting PTE on VMA unbind. */
+	open = atomic_xchg(&ggtt->vm.open, 0);
+
+	/* clflush objects bound into the GGTT and rebind them. */
+	list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) {
+		struct drm_i915_gem_object *obj = vma->obj;
+
+		if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
+			continue;
+
+		clear_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma));
+		WARN_ON(i915_vma_bind(vma,
+				      obj ? obj->cache_level : 0,
+				      PIN_GLOBAL, NULL));
+		if (obj) { /* only used during resume => exclusive access */
+			flush |= fetch_and_zero(&obj->write_domain);
+			obj->read_domains |= I915_GEM_DOMAIN_GTT;
+		}
+	}
+
+	atomic_set(&ggtt->vm.open, open);
+	ggtt->invalidate(ggtt);
+
+	mutex_unlock(&ggtt->vm.mutex);
+
+	if (flush)
+		wbinvd_on_all_cpus();
+}
+
+void i915_gem_restore_gtt_mappings(struct drm_i915_private *i915)
+{
+	struct i915_ggtt *ggtt = &i915->ggtt;
+
+	ggtt_restore_mappings(ggtt);
+
+	if (INTEL_GEN(i915) >= 8)
+		setup_private_pat(ggtt->vm.gt->uncore);
+}
+
+static struct scatterlist *
+rotate_pages(struct drm_i915_gem_object *obj, unsigned int offset,
+	     unsigned int width, unsigned int height,
+	     unsigned int stride,
+	     struct sg_table *st, struct scatterlist *sg)
+{
+	unsigned int column, row;
+	unsigned int src_idx;
+
+	for (column = 0; column < width; column++) {
+		src_idx = stride * (height - 1) + column + offset;
+		for (row = 0; row < height; row++) {
+			st->nents++;
+			/*
+			 * We don't need the pages, but need to initialize
+			 * the entries so the sg list can be happily traversed.
+			 * The only thing we need are DMA addresses.
+			 */
+			sg_set_page(sg, NULL, I915_GTT_PAGE_SIZE, 0);
+			sg_dma_address(sg) =
+				i915_gem_object_get_dma_address(obj, src_idx);
+			sg_dma_len(sg) = I915_GTT_PAGE_SIZE;
+			sg = sg_next(sg);
+			src_idx -= stride;
+		}
+	}
+
+	return sg;
+}
+
+static noinline struct sg_table *
+intel_rotate_pages(struct intel_rotation_info *rot_info,
+		   struct drm_i915_gem_object *obj)
+{
+	unsigned int size = intel_rotation_info_size(rot_info);
+	struct sg_table *st;
+	struct scatterlist *sg;
+	int ret = -ENOMEM;
+	int i;
+
+	/* Allocate target SG list. */
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (!st)
+		goto err_st_alloc;
+
+	ret = sg_alloc_table(st, size, GFP_KERNEL);
+	if (ret)
+		goto err_sg_alloc;
+
+	st->nents = 0;
+	sg = st->sgl;
+
+	for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) {
+		sg = rotate_pages(obj, rot_info->plane[i].offset,
+				  rot_info->plane[i].width, rot_info->plane[i].height,
+				  rot_info->plane[i].stride, st, sg);
+	}
+
+	return st;
+
+err_sg_alloc:
+	kfree(st);
+err_st_alloc:
+
+	DRM_DEBUG_DRIVER("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
+			 obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
+
+	return ERR_PTR(ret);
+}
+
+static struct scatterlist *
+remap_pages(struct drm_i915_gem_object *obj, unsigned int offset,
+	    unsigned int width, unsigned int height,
+	    unsigned int stride,
+	    struct sg_table *st, struct scatterlist *sg)
+{
+	unsigned int row;
+
+	for (row = 0; row < height; row++) {
+		unsigned int left = width * I915_GTT_PAGE_SIZE;
+
+		while (left) {
+			dma_addr_t addr;
+			unsigned int length;
+
+			/*
+			 * We don't need the pages, but need to initialize
+			 * the entries so the sg list can be happily traversed.
+			 * The only thing we need are DMA addresses.
+			 */
+
+			addr = i915_gem_object_get_dma_address_len(obj, offset, &length);
+
+			length = min(left, length);
+
+			st->nents++;
+
+			sg_set_page(sg, NULL, length, 0);
+			sg_dma_address(sg) = addr;
+			sg_dma_len(sg) = length;
+			sg = sg_next(sg);
+
+			offset += length / I915_GTT_PAGE_SIZE;
+			left -= length;
+		}
+
+		offset += stride - width;
+	}
+
+	return sg;
+}
+
+static noinline struct sg_table *
+intel_remap_pages(struct intel_remapped_info *rem_info,
+		  struct drm_i915_gem_object *obj)
+{
+	unsigned int size = intel_remapped_info_size(rem_info);
+	struct sg_table *st;
+	struct scatterlist *sg;
+	int ret = -ENOMEM;
+	int i;
+
+	/* Allocate target SG list. */
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (!st)
+		goto err_st_alloc;
+
+	ret = sg_alloc_table(st, size, GFP_KERNEL);
+	if (ret)
+		goto err_sg_alloc;
+
+	st->nents = 0;
+	sg = st->sgl;
+
+	for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) {
+		sg = remap_pages(obj, rem_info->plane[i].offset,
+				 rem_info->plane[i].width, rem_info->plane[i].height,
+				 rem_info->plane[i].stride, st, sg);
+	}
+
+	i915_sg_trim(st);
+
+	return st;
+
+err_sg_alloc:
+	kfree(st);
+err_st_alloc:
+
+	DRM_DEBUG_DRIVER("Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n",
+			 obj->base.size, rem_info->plane[0].width, rem_info->plane[0].height, size);
+
+	return ERR_PTR(ret);
+}
+
+static noinline struct sg_table *
+intel_partial_pages(const struct i915_ggtt_view *view,
+		    struct drm_i915_gem_object *obj)
+{
+	struct sg_table *st;
+	struct scatterlist *sg, *iter;
+	unsigned int count = view->partial.size;
+	unsigned int offset;
+	int ret = -ENOMEM;
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (!st)
+		goto err_st_alloc;
+
+	ret = sg_alloc_table(st, count, GFP_KERNEL);
+	if (ret)
+		goto err_sg_alloc;
+
+	iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset);
+	GEM_BUG_ON(!iter);
+
+	sg = st->sgl;
+	st->nents = 0;
+	do {
+		unsigned int len;
+
+		len = min(iter->length - (offset << PAGE_SHIFT),
+			  count << PAGE_SHIFT);
+		sg_set_page(sg, NULL, len, 0);
+		sg_dma_address(sg) =
+			sg_dma_address(iter) + (offset << PAGE_SHIFT);
+		sg_dma_len(sg) = len;
+
+		st->nents++;
+		count -= len >> PAGE_SHIFT;
+		if (count == 0) {
+			sg_mark_end(sg);
+			i915_sg_trim(st); /* Drop any unused tail entries. */
+
+			return st;
+		}
+
+		sg = __sg_next(sg);
+		iter = __sg_next(iter);
+		offset = 0;
+	} while (1);
+
+err_sg_alloc:
+	kfree(st);
+err_st_alloc:
+	return ERR_PTR(ret);
+}
+
+static int
+i915_get_ggtt_vma_pages(struct i915_vma *vma)
+{
+	int ret;
+
+	/*
+	 * The vma->pages are only valid within the lifespan of the borrowed
+	 * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so
+	 * must be the vma->pages. A simple rule is that vma->pages must only
+	 * be accessed when the obj->mm.pages are pinned.
+	 */
+	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj));
+
+	switch (vma->ggtt_view.type) {
+	default:
+		GEM_BUG_ON(vma->ggtt_view.type);
+		/* fall through */
+	case I915_GGTT_VIEW_NORMAL:
+		vma->pages = vma->obj->mm.pages;
+		return 0;
+
+	case I915_GGTT_VIEW_ROTATED:
+		vma->pages =
+			intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj);
+		break;
+
+	case I915_GGTT_VIEW_REMAPPED:
+		vma->pages =
+			intel_remap_pages(&vma->ggtt_view.remapped, vma->obj);
+		break;
+
+	case I915_GGTT_VIEW_PARTIAL:
+		vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj);
+		break;
+	}
+
+	ret = 0;
+	if (IS_ERR(vma->pages)) {
+		ret = PTR_ERR(vma->pages);
+		vma->pages = NULL;
+		DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
+			  vma->ggtt_view.type, ret);
+	}
+	return ret;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index ec84b5e62fef..da2b6e2ae692 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -38,8 +38,6 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
 void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt)
 {
 	gt->ggtt = ggtt;
-
-	intel_gt_sanitize(gt, false);
 }
 
 static void init_unused_ring(struct intel_gt *gt, u32 base)
@@ -77,10 +75,6 @@ int intel_gt_init_hw(struct intel_gt *gt)
 	struct intel_uncore *uncore = gt->uncore;
 	int ret;
 
-	ret = intel_gt_terminally_wedged(gt);
-	if (ret)
-		return ret;
-
 	gt->last_init_time = ktime_get();
 
 	/* Double layer security blanket, see i915_gem_init() */
@@ -372,7 +366,7 @@ static void intel_gt_fini_scratch(struct intel_gt *gt)
 static struct i915_address_space *kernel_vm(struct intel_gt *gt)
 {
 	if (INTEL_PPGTT(gt->i915) > INTEL_PPGTT_ALIASING)
-		return &i915_ppgtt_create(gt->i915)->vm;
+		return &i915_ppgtt_create(gt)->vm;
 	else
 		return i915_vm_get(&gt->ggtt->vm);
 }
@@ -410,14 +404,13 @@ static int __engines_record_defaults(struct intel_gt *gt)
 		struct intel_context *ce;
 		struct i915_request *rq;
 
+		/* We must be able to switch to something! */
+		GEM_BUG_ON(!engine->kernel_context);
+
 		err = intel_renderstate_init(&so, engine);
 		if (err)
 			goto out;
 
-		/* We must be able to switch to something! */
-		GEM_BUG_ON(!engine->kernel_context);
-		engine->serial++; /* force the kernel context switch */
-
 		ce = intel_context_create(engine);
 		if (IS_ERR(ce)) {
 			err = PTR_ERR(ce);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index 2355cf129e9c..1dac441cb8f4 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -58,9 +58,14 @@ static inline u32 intel_gt_scratch_offset(const struct intel_gt *gt,
 	return i915_ggtt_offset(gt->scratch) + field;
 }
 
-static inline bool intel_gt_is_wedged(struct intel_gt *gt)
+static inline bool intel_gt_is_wedged(const struct intel_gt *gt)
 {
 	return __intel_reset_failed(&gt->reset);
 }
 
+static inline bool intel_gt_has_init_error(const struct intel_gt *gt)
+{
+	return test_bit(I915_WEDGED_ON_INIT, &gt->reset.flags);
+}
+
 #endif /* __INTEL_GT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index 45b68a17da4d..d1c2f034296a 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -126,17 +126,7 @@ static bool reset_engines(struct intel_gt *gt)
 	return __intel_gt_reset(gt, ALL_ENGINES) == 0;
 }
 
-/**
- * intel_gt_sanitize: called after the GPU has lost power
- * @gt: the i915 GT container
- * @force: ignore a failed reset and sanitize engine state anyway
- *
- * Anytime we reset the GPU, either with an explicit GPU reset or through a
- * PCI power cycle, the GPU loses state and we must reset our state tracking
- * to match. Note that calling intel_gt_sanitize() if the GPU has not
- * been reset results in much confusion!
- */
-void intel_gt_sanitize(struct intel_gt *gt, bool force)
+static void gt_sanitize(struct intel_gt *gt, bool force)
 {
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
@@ -189,6 +179,10 @@ int intel_gt_resume(struct intel_gt *gt)
 	enum intel_engine_id id;
 	int err;
 
+	err = intel_gt_has_init_error(gt);
+	if (err)
+		return err;
+
 	GT_TRACE(gt, "\n");
 
 	/*
@@ -201,30 +195,26 @@ int intel_gt_resume(struct intel_gt *gt)
 
 	intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
 	intel_rc6_sanitize(&gt->rc6);
+	gt_sanitize(gt, true);
+	if (intel_gt_is_wedged(gt)) {
+		err = -EIO;
+		goto out_fw;
+	}
 
 	/* Only when the HW is re-initialised, can we replay the requests */
 	err = intel_gt_init_hw(gt);
 	if (err) {
 		dev_err(gt->i915->drm.dev,
 			"Failed to initialize GPU, declaring it wedged!\n");
-		intel_gt_set_wedged(gt);
-		goto err_fw;
+		goto err_wedged;
 	}
 
 	intel_rps_enable(&gt->rps);
 	intel_llc_enable(&gt->llc);
 
 	for_each_engine(engine, gt, id) {
-		struct intel_context *ce;
-
 		intel_engine_pm_get(engine);
 
-		ce = engine->kernel_context;
-		if (ce) {
-			GEM_BUG_ON(!intel_context_is_pinned(ce));
-			ce->ops->reset(ce);
-		}
-
 		engine->serial++; /* kernel context lost */
 		err = engine->resume(engine);
 
@@ -233,7 +223,7 @@ int intel_gt_resume(struct intel_gt *gt)
 			dev_err(gt->i915->drm.dev,
 				"Failed to restart %s (%d)\n",
 				engine->name, err);
-			break;
+			goto err_wedged;
 		}
 	}
 
@@ -243,11 +233,14 @@ int intel_gt_resume(struct intel_gt *gt)
 
 	user_forcewake(gt, false);
 
-err_fw:
+out_fw:
 	intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
 	intel_gt_pm_put(gt);
-
 	return err;
+
+err_wedged:
+	intel_gt_set_wedged(gt);
+	goto out_fw;
 }
 
 static void wait_for_suspend(struct intel_gt *gt)
@@ -315,7 +308,7 @@ void intel_gt_suspend_late(struct intel_gt *gt)
 		intel_llc_disable(&gt->llc);
 	}
 
-	intel_gt_sanitize(gt, false);
+	gt_sanitize(gt, false);
 
 	GT_TRACE(gt, "\n");
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
index 4a9e48c12bd4..60f0e2fbe55c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
@@ -51,8 +51,6 @@ void intel_gt_pm_init_early(struct intel_gt *gt);
 void intel_gt_pm_init(struct intel_gt *gt);
 void intel_gt_pm_fini(struct intel_gt *gt);
 
-void intel_gt_sanitize(struct intel_gt *gt, bool force);
-
 void intel_gt_suspend_prepare(struct intel_gt *gt);
 void intel_gt_suspend_late(struct intel_gt *gt);
 int intel_gt_resume(struct intel_gt *gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
index b4f04614230e..7ef1d37970f6 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
@@ -14,13 +14,16 @@
 #include "intel_gt_requests.h"
 #include "intel_timeline.h"
 
-static void retire_requests(struct intel_timeline *tl)
+static bool retire_requests(struct intel_timeline *tl)
 {
 	struct i915_request *rq, *rn;
 
 	list_for_each_entry_safe(rq, rn, &tl->requests, link)
 		if (!i915_request_retire(rq))
-			break;
+			return false;
+
+	/* And check nothing new was submitted */
+	return !i915_active_fence_isset(&tl->last_request);
 }
 
 static bool flush_submission(struct intel_gt *gt)
@@ -29,9 +32,13 @@ static bool flush_submission(struct intel_gt *gt)
 	enum intel_engine_id id;
 	bool active = false;
 
+	if (!intel_gt_pm_is_awake(gt))
+		return false;
+
 	for_each_engine(engine, gt, id) {
-		active |= intel_engine_flush_submission(engine);
+		intel_engine_flush_submission(engine);
 		active |= flush_work(&engine->retire_work);
+		active |= flush_work(&engine->wakeref.work);
 	}
 
 	return active;
@@ -120,7 +127,6 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
 		timeout = -timeout, interruptible = false;
 
 	flush_submission(gt); /* kick the ksoftirqd tasklets */
-
 	spin_lock(&timelines->lock);
 	list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
 		if (!mutex_trylock(&tl->mutex)) {
@@ -145,7 +151,8 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
 			}
 		}
 
-		retire_requests(tl);
+		if (!retire_requests(tl) || flush_submission(gt))
+			active_count++;
 
 		spin_lock(&timelines->lock);
 
@@ -153,8 +160,6 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
 		list_safe_reset_next(tl, tn, link);
 		if (atomic_dec_and_test(&tl->active_count))
 			list_del(&tl->link);
-		else
-			active_count += i915_active_fence_isset(&tl->last_request);
 
 		mutex_unlock(&tl->mutex);
 
@@ -169,9 +174,6 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
 	list_for_each_entry_safe(tl, tn, &free, link)
 		__intel_timeline_free(&tl->kref);
 
-	if (flush_submission(gt))
-		active_count++;
-
 	return active_count ? timeout : 0;
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
new file mode 100644
index 000000000000..16acdc5d6734
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -0,0 +1,598 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <linux/slab.h> /* fault-inject.h is not standalone! */
+
+#include <linux/fault-inject.h>
+
+#include "i915_trace.h"
+#include "intel_gt.h"
+#include "intel_gtt.h"
+
+void stash_init(struct pagestash *stash)
+{
+	pagevec_init(&stash->pvec);
+	spin_lock_init(&stash->lock);
+}
+
+static struct page *stash_pop_page(struct pagestash *stash)
+{
+	struct page *page = NULL;
+
+	spin_lock(&stash->lock);
+	if (likely(stash->pvec.nr))
+		page = stash->pvec.pages[--stash->pvec.nr];
+	spin_unlock(&stash->lock);
+
+	return page;
+}
+
+static void stash_push_pagevec(struct pagestash *stash, struct pagevec *pvec)
+{
+	unsigned int nr;
+
+	spin_lock_nested(&stash->lock, SINGLE_DEPTH_NESTING);
+
+	nr = min_t(typeof(nr), pvec->nr, pagevec_space(&stash->pvec));
+	memcpy(stash->pvec.pages + stash->pvec.nr,
+	       pvec->pages + pvec->nr - nr,
+	       sizeof(pvec->pages[0]) * nr);
+	stash->pvec.nr += nr;
+
+	spin_unlock(&stash->lock);
+
+	pvec->nr -= nr;
+}
+
+static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
+{
+	struct pagevec stack;
+	struct page *page;
+
+	if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
+		i915_gem_shrink_all(vm->i915);
+
+	page = stash_pop_page(&vm->free_pages);
+	if (page)
+		return page;
+
+	if (!vm->pt_kmap_wc)
+		return alloc_page(gfp);
+
+	/* Look in our global stash of WC pages... */
+	page = stash_pop_page(&vm->i915->mm.wc_stash);
+	if (page)
+		return page;
+
+	/*
+	 * Otherwise batch allocate pages to amortize cost of set_pages_wc.
+	 *
+	 * We have to be careful as page allocation may trigger the shrinker
+	 * (via direct reclaim) which will fill up the WC stash underneath us.
+	 * So we add our WB pages into a temporary pvec on the stack and merge
+	 * them into the WC stash after all the allocations are complete.
+	 */
+	pagevec_init(&stack);
+	do {
+		struct page *page;
+
+		page = alloc_page(gfp);
+		if (unlikely(!page))
+			break;
+
+		stack.pages[stack.nr++] = page;
+	} while (pagevec_space(&stack));
+
+	if (stack.nr && !set_pages_array_wc(stack.pages, stack.nr)) {
+		page = stack.pages[--stack.nr];
+
+		/* Merge spare WC pages to the global stash */
+		if (stack.nr)
+			stash_push_pagevec(&vm->i915->mm.wc_stash, &stack);
+
+		/* Push any surplus WC pages onto the local VM stash */
+		if (stack.nr)
+			stash_push_pagevec(&vm->free_pages, &stack);
+	}
+
+	/* Return unwanted leftovers */
+	if (unlikely(stack.nr)) {
+		WARN_ON_ONCE(set_pages_array_wb(stack.pages, stack.nr));
+		__pagevec_release(&stack);
+	}
+
+	return page;
+}
+
+static void vm_free_pages_release(struct i915_address_space *vm,
+				  bool immediate)
+{
+	struct pagevec *pvec = &vm->free_pages.pvec;
+	struct pagevec stack;
+
+	lockdep_assert_held(&vm->free_pages.lock);
+	GEM_BUG_ON(!pagevec_count(pvec));
+
+	if (vm->pt_kmap_wc) {
+		/*
+		 * When we use WC, first fill up the global stash and then
+		 * only if full immediately free the overflow.
+		 */
+		stash_push_pagevec(&vm->i915->mm.wc_stash, pvec);
+
+		/*
+		 * As we have made some room in the VM's free_pages,
+		 * we can wait for it to fill again. Unless we are
+		 * inside i915_address_space_fini() and must
+		 * immediately release the pages!
+		 */
+		if (pvec->nr <= (immediate ? 0 : PAGEVEC_SIZE - 1))
+			return;
+
+		/*
+		 * We have to drop the lock to allow ourselves to sleep,
+		 * so take a copy of the pvec and clear the stash for
+		 * others to use it as we sleep.
+		 */
+		stack = *pvec;
+		pagevec_reinit(pvec);
+		spin_unlock(&vm->free_pages.lock);
+
+		pvec = &stack;
+		set_pages_array_wb(pvec->pages, pvec->nr);
+
+		spin_lock(&vm->free_pages.lock);
+	}
+
+	__pagevec_release(pvec);
+}
+
+static void vm_free_page(struct i915_address_space *vm, struct page *page)
+{
+	/*
+	 * On !llc, we need to change the pages back to WB. We only do so
+	 * in bulk, so we rarely need to change the page attributes here,
+	 * but doing so requires a stop_machine() from deep inside arch/x86/mm.
+	 * To make detection of the possible sleep more likely, use an
+	 * unconditional might_sleep() for everybody.
+	 */
+	might_sleep();
+	spin_lock(&vm->free_pages.lock);
+	while (!pagevec_space(&vm->free_pages.pvec))
+		vm_free_pages_release(vm, false);
+	GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec) >= PAGEVEC_SIZE);
+	pagevec_add(&vm->free_pages.pvec, page);
+	spin_unlock(&vm->free_pages.lock);
+}
+
+void __i915_vm_close(struct i915_address_space *vm)
+{
+	struct i915_vma *vma, *vn;
+
+	mutex_lock(&vm->mutex);
+	list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) {
+		struct drm_i915_gem_object *obj = vma->obj;
+
+		/* Keep the obj (and hence the vma) alive as _we_ destroy it */
+		if (!kref_get_unless_zero(&obj->base.refcount))
+			continue;
+
+		atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
+		WARN_ON(__i915_vma_unbind(vma));
+		__i915_vma_put(vma);
+
+		i915_gem_object_put(obj);
+	}
+	GEM_BUG_ON(!list_empty(&vm->bound_list));
+	mutex_unlock(&vm->mutex);
+}
+
+void i915_address_space_fini(struct i915_address_space *vm)
+{
+	spin_lock(&vm->free_pages.lock);
+	if (pagevec_count(&vm->free_pages.pvec))
+		vm_free_pages_release(vm, true);
+	GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec));
+	spin_unlock(&vm->free_pages.lock);
+
+	drm_mm_takedown(&vm->mm);
+
+	mutex_destroy(&vm->mutex);
+}
+
+static void __i915_vm_release(struct work_struct *work)
+{
+	struct i915_address_space *vm =
+		container_of(work, struct i915_address_space, rcu.work);
+
+	vm->cleanup(vm);
+	i915_address_space_fini(vm);
+
+	kfree(vm);
+}
+
+void i915_vm_release(struct kref *kref)
+{
+	struct i915_address_space *vm =
+		container_of(kref, struct i915_address_space, ref);
+
+	GEM_BUG_ON(i915_is_ggtt(vm));
+	trace_i915_ppgtt_release(vm);
+
+	queue_rcu_work(vm->i915->wq, &vm->rcu);
+}
+
+void i915_address_space_init(struct i915_address_space *vm, int subclass)
+{
+	kref_init(&vm->ref);
+	INIT_RCU_WORK(&vm->rcu, __i915_vm_release);
+	atomic_set(&vm->open, 1);
+
+	/*
+	 * The vm->mutex must be reclaim safe (for use in the shrinker).
+	 * Do a dummy acquire now under fs_reclaim so that any allocation
+	 * attempt holding the lock is immediately reported by lockdep.
+	 */
+	mutex_init(&vm->mutex);
+	lockdep_set_subclass(&vm->mutex, subclass);
+	i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex);
+
+	GEM_BUG_ON(!vm->total);
+	drm_mm_init(&vm->mm, 0, vm->total);
+	vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
+
+	stash_init(&vm->free_pages);
+
+	INIT_LIST_HEAD(&vm->bound_list);
+}
+
+void clear_pages(struct i915_vma *vma)
+{
+	GEM_BUG_ON(!vma->pages);
+
+	if (vma->pages != vma->obj->mm.pages) {
+		sg_free_table(vma->pages);
+		kfree(vma->pages);
+	}
+	vma->pages = NULL;
+
+	memset(&vma->page_sizes, 0, sizeof(vma->page_sizes));
+}
+
+static int __setup_page_dma(struct i915_address_space *vm,
+			    struct i915_page_dma *p,
+			    gfp_t gfp)
+{
+	p->page = vm_alloc_page(vm, gfp | I915_GFP_ALLOW_FAIL);
+	if (unlikely(!p->page))
+		return -ENOMEM;
+
+	p->daddr = dma_map_page_attrs(vm->dma,
+				      p->page, 0, PAGE_SIZE,
+				      PCI_DMA_BIDIRECTIONAL,
+				      DMA_ATTR_SKIP_CPU_SYNC |
+				      DMA_ATTR_NO_WARN);
+	if (unlikely(dma_mapping_error(vm->dma, p->daddr))) {
+		vm_free_page(vm, p->page);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p)
+{
+	return __setup_page_dma(vm, p, __GFP_HIGHMEM);
+}
+
+void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p)
+{
+	dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+	vm_free_page(vm, p->page);
+}
+
+void
+fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count)
+{
+	kunmap_atomic(memset64(kmap_atomic(p->page), val, count));
+}
+
+int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
+{
+	unsigned long size;
+
+	/*
+	 * In order to utilize 64K pages for an object with a size < 2M, we will
+	 * need to support a 64K scratch page, given that every 16th entry for a
+	 * page-table operating in 64K mode must point to a properly aligned 64K
+	 * region, including any PTEs which happen to point to scratch.
+	 *
+	 * This is only relevant for the 48b PPGTT where we support
+	 * huge-gtt-pages, see also i915_vma_insert(). However, as we share the
+	 * scratch (read-only) between all vm, we create one 64k scratch page
+	 * for all.
+	 */
+	size = I915_GTT_PAGE_SIZE_4K;
+	if (i915_vm_is_4lvl(vm) &&
+	    HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) {
+		size = I915_GTT_PAGE_SIZE_64K;
+		gfp |= __GFP_NOWARN;
+	}
+	gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL;
+
+	do {
+		unsigned int order = get_order(size);
+		struct page *page;
+		dma_addr_t addr;
+
+		page = alloc_pages(gfp, order);
+		if (unlikely(!page))
+			goto skip;
+
+		addr = dma_map_page_attrs(vm->dma,
+					  page, 0, size,
+					  PCI_DMA_BIDIRECTIONAL,
+					  DMA_ATTR_SKIP_CPU_SYNC |
+					  DMA_ATTR_NO_WARN);
+		if (unlikely(dma_mapping_error(vm->dma, addr)))
+			goto free_page;
+
+		if (unlikely(!IS_ALIGNED(addr, size)))
+			goto unmap_page;
+
+		vm->scratch[0].base.page = page;
+		vm->scratch[0].base.daddr = addr;
+		vm->scratch_order = order;
+		return 0;
+
+unmap_page:
+		dma_unmap_page(vm->dma, addr, size, PCI_DMA_BIDIRECTIONAL);
+free_page:
+		__free_pages(page, order);
+skip:
+		if (size == I915_GTT_PAGE_SIZE_4K)
+			return -ENOMEM;
+
+		size = I915_GTT_PAGE_SIZE_4K;
+		gfp &= ~__GFP_NOWARN;
+	} while (1);
+}
+
+void cleanup_scratch_page(struct i915_address_space *vm)
+{
+	struct i915_page_dma *p = px_base(&vm->scratch[0]);
+	unsigned int order = vm->scratch_order;
+
+	dma_unmap_page(vm->dma, p->daddr, BIT(order) << PAGE_SHIFT,
+		       PCI_DMA_BIDIRECTIONAL);
+	__free_pages(p->page, order);
+}
+
+void free_scratch(struct i915_address_space *vm)
+{
+	int i;
+
+	if (!px_dma(&vm->scratch[0])) /* set to 0 on clones */
+		return;
+
+	for (i = 1; i <= vm->top; i++) {
+		if (!px_dma(&vm->scratch[i]))
+			break;
+		cleanup_page_dma(vm, px_base(&vm->scratch[i]));
+	}
+
+	cleanup_scratch_page(vm);
+}
+
+void gtt_write_workarounds(struct intel_gt *gt)
+{
+	struct drm_i915_private *i915 = gt->i915;
+	struct intel_uncore *uncore = gt->uncore;
+
+	/*
+	 * This function is for gtt related workarounds. This function is
+	 * called on driver load and after a GPU reset, so you can place
+	 * workarounds here even if they get overwritten by GPU reset.
+	 */
+	/* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */
+	if (IS_BROADWELL(i915))
+		intel_uncore_write(uncore,
+				   GEN8_L3_LRA_1_GPGPU,
+				   GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
+	else if (IS_CHERRYVIEW(i915))
+		intel_uncore_write(uncore,
+				   GEN8_L3_LRA_1_GPGPU,
+				   GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
+	else if (IS_GEN9_LP(i915))
+		intel_uncore_write(uncore,
+				   GEN8_L3_LRA_1_GPGPU,
+				   GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
+	else if (INTEL_GEN(i915) >= 9 && INTEL_GEN(i915) <= 11)
+		intel_uncore_write(uncore,
+				   GEN8_L3_LRA_1_GPGPU,
+				   GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
+
+	/*
+	 * To support 64K PTEs we need to first enable the use of the
+	 * Intermediate-Page-Size(IPS) bit of the PDE field via some magical
+	 * mmio, otherwise the page-walker will simply ignore the IPS bit. This
+	 * shouldn't be needed after GEN10.
+	 *
+	 * 64K pages were first introduced from BDW+, although technically they
+	 * only *work* from gen9+. For pre-BDW we instead have the option for
+	 * 32K pages, but we don't currently have any support for it in our
+	 * driver.
+	 */
+	if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) &&
+	    INTEL_GEN(i915) <= 10)
+		intel_uncore_rmw(uncore,
+				 GEN8_GAMW_ECO_DEV_RW_IA,
+				 0,
+				 GAMW_ECO_ENABLE_64K_IPS_FIELD);
+
+	if (IS_GEN_RANGE(i915, 8, 11)) {
+		bool can_use_gtt_cache = true;
+
+		/*
+		 * According to the BSpec if we use 2M/1G pages then we also
+		 * need to disable the GTT cache. At least on BDW we can see
+		 * visual corruption when using 2M pages, and not disabling the
+		 * GTT cache.
+		 */
+		if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M))
+			can_use_gtt_cache = false;
+
+		/* WaGttCachingOffByDefault */
+		intel_uncore_write(uncore,
+				   HSW_GTT_CACHE_EN,
+				   can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0);
+		WARN_ON_ONCE(can_use_gtt_cache &&
+			     intel_uncore_read(uncore,
+					       HSW_GTT_CACHE_EN) == 0);
+	}
+}
+
+u64 gen8_pte_encode(dma_addr_t addr,
+		    enum i915_cache_level level,
+		    u32 flags)
+{
+	gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW;
+
+	if (unlikely(flags & PTE_READ_ONLY))
+		pte &= ~_PAGE_RW;
+
+	switch (level) {
+	case I915_CACHE_NONE:
+		pte |= PPAT_UNCACHED;
+		break;
+	case I915_CACHE_WT:
+		pte |= PPAT_DISPLAY_ELLC;
+		break;
+	default:
+		pte |= PPAT_CACHED;
+		break;
+	}
+
+	return pte;
+}
+
+static void tgl_setup_private_ppat(struct intel_uncore *uncore)
+{
+	/* TGL doesn't support LLC or AGE settings */
+	intel_uncore_write(uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB);
+	intel_uncore_write(uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC);
+	intel_uncore_write(uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT);
+	intel_uncore_write(uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC);
+	intel_uncore_write(uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB);
+	intel_uncore_write(uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB);
+	intel_uncore_write(uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB);
+	intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB);
+}
+
+static void cnl_setup_private_ppat(struct intel_uncore *uncore)
+{
+	intel_uncore_write(uncore,
+			   GEN10_PAT_INDEX(0),
+			   GEN8_PPAT_WB | GEN8_PPAT_LLC);
+	intel_uncore_write(uncore,
+			   GEN10_PAT_INDEX(1),
+			   GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
+	intel_uncore_write(uncore,
+			   GEN10_PAT_INDEX(2),
+			   GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
+	intel_uncore_write(uncore,
+			   GEN10_PAT_INDEX(3),
+			   GEN8_PPAT_UC);
+	intel_uncore_write(uncore,
+			   GEN10_PAT_INDEX(4),
+			   GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
+	intel_uncore_write(uncore,
+			   GEN10_PAT_INDEX(5),
+			   GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
+	intel_uncore_write(uncore,
+			   GEN10_PAT_INDEX(6),
+			   GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
+	intel_uncore_write(uncore,
+			   GEN10_PAT_INDEX(7),
+			   GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
+}
+
+/*
+ * The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
+ * bits. When using advanced contexts each context stores its own PAT, but
+ * writing this data shouldn't be harmful even in those cases.
+ */
+static void bdw_setup_private_ppat(struct intel_uncore *uncore)
+{
+	u64 pat;
+
+	pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) |	/* for normal objects, no eLLC */
+	      GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) |	/* for something pointing to ptes? */
+	      GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) |	/* for scanout with eLLC */
+	      GEN8_PPAT(3, GEN8_PPAT_UC) |			/* Uncached objects, mostly for scanout */
+	      GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
+	      GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
+	      GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
+	      GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
+
+	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
+	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
+}
+
+static void chv_setup_private_ppat(struct intel_uncore *uncore)
+{
+	u64 pat;
+
+	/*
+	 * Map WB on BDW to snooped on CHV.
+	 *
+	 * Only the snoop bit has meaning for CHV, the rest is
+	 * ignored.
+	 *
+	 * The hardware will never snoop for certain types of accesses:
+	 * - CPU GTT (GMADR->GGTT->no snoop->memory)
+	 * - PPGTT page tables
+	 * - some other special cycles
+	 *
+	 * As with BDW, we also need to consider the following for GT accesses:
+	 * "For GGTT, there is NO pat_sel[2:0] from the entry,
+	 * so RTL will always use the value corresponding to
+	 * pat_sel = 000".
+	 * Which means we must set the snoop bit in PAT entry 0
+	 * in order to keep the global status page working.
+	 */
+
+	pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
+	      GEN8_PPAT(1, 0) |
+	      GEN8_PPAT(2, 0) |
+	      GEN8_PPAT(3, 0) |
+	      GEN8_PPAT(4, CHV_PPAT_SNOOP) |
+	      GEN8_PPAT(5, CHV_PPAT_SNOOP) |
+	      GEN8_PPAT(6, CHV_PPAT_SNOOP) |
+	      GEN8_PPAT(7, CHV_PPAT_SNOOP);
+
+	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
+	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
+}
+
+void setup_private_pat(struct intel_uncore *uncore)
+{
+	struct drm_i915_private *i915 = uncore->i915;
+
+	GEM_BUG_ON(INTEL_GEN(i915) < 8);
+
+	if (INTEL_GEN(i915) >= 12)
+		tgl_setup_private_ppat(uncore);
+	else if (INTEL_GEN(i915) >= 10)
+		cnl_setup_private_ppat(uncore);
+	else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915))
+		chv_setup_private_ppat(uncore);
+	else
+		bdw_setup_private_ppat(uncore);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/mock_gtt.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
new file mode 100644
index 000000000000..7da7681c20b1
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -0,0 +1,587 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ *
+ * Please try to maintain the following order within this file unless it makes
+ * sense to do otherwise. From top to bottom:
+ * 1. typedefs
+ * 2. #defines, and macros
+ * 3. structure definitions
+ * 4. function prototypes
+ *
+ * Within each section, please try to order by generation in ascending order,
+ * from top to bottom (ie. gen6 on the top, gen8 on the bottom).
+ */
+
+#ifndef __INTEL_GTT_H__
+#define __INTEL_GTT_H__
+
+#include <linux/io-mapping.h>
+#include <linux/kref.h>
+#include <linux/mm.h>
+#include <linux/pagevec.h>
+#include <linux/scatterlist.h>
+#include <linux/workqueue.h>
+
+#include <drm/drm_mm.h>
+
+#include "gt/intel_reset.h"
+#include "i915_gem_fence_reg.h"
+#include "i915_selftest.h"
+#include "i915_vma_types.h"
+
+#define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
+
+#if IS_ENABLED(CONFIG_DRM_I915_TRACE_GTT)
+#define DBG(...) trace_printk(__VA_ARGS__)
+#else
+#define DBG(...)
+#endif
+
+#define NALLOC 3 /* 1 normal, 1 for concurrent threads, 1 for preallocation */
+
+#define I915_GTT_PAGE_SIZE_4K	BIT_ULL(12)
+#define I915_GTT_PAGE_SIZE_64K	BIT_ULL(16)
+#define I915_GTT_PAGE_SIZE_2M	BIT_ULL(21)
+
+#define I915_GTT_PAGE_SIZE I915_GTT_PAGE_SIZE_4K
+#define I915_GTT_MAX_PAGE_SIZE I915_GTT_PAGE_SIZE_2M
+
+#define I915_GTT_PAGE_MASK -I915_GTT_PAGE_SIZE
+
+#define I915_GTT_MIN_ALIGNMENT I915_GTT_PAGE_SIZE
+
+#define I915_FENCE_REG_NONE -1
+#define I915_MAX_NUM_FENCES 32
+/* 32 fences + sign bit for FENCE_REG_NONE */
+#define I915_MAX_NUM_FENCE_BITS 6
+
+typedef u32 gen6_pte_t;
+typedef u64 gen8_pte_t;
+
+#define ggtt_total_entries(ggtt) ((ggtt)->vm.total >> PAGE_SHIFT)
+
+#define I915_PTES(pte_len)		((unsigned int)(PAGE_SIZE / (pte_len)))
+#define I915_PTE_MASK(pte_len)		(I915_PTES(pte_len) - 1)
+#define I915_PDES			512
+#define I915_PDE_MASK			(I915_PDES - 1)
+
+/* gen6-hsw has bit 11-4 for physical addr bit 39-32 */
+#define GEN6_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0xff0))
+#define GEN6_PTE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
+#define GEN6_PDE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
+#define GEN6_PTE_CACHE_LLC		(2 << 1)
+#define GEN6_PTE_UNCACHED		(1 << 1)
+#define GEN6_PTE_VALID			REG_BIT(0)
+
+#define GEN6_PTES			I915_PTES(sizeof(gen6_pte_t))
+#define GEN6_PD_SIZE		        (I915_PDES * PAGE_SIZE)
+#define GEN6_PD_ALIGN			(PAGE_SIZE * 16)
+#define GEN6_PDE_SHIFT			22
+#define GEN6_PDE_VALID			REG_BIT(0)
+#define NUM_PTE(pde_shift)     (1 << (pde_shift - PAGE_SHIFT))
+
+#define GEN7_PTE_CACHE_L3_LLC		(3 << 1)
+
+#define BYT_PTE_SNOOPED_BY_CPU_CACHES	REG_BIT(2)
+#define BYT_PTE_WRITEABLE		REG_BIT(1)
+
+/*
+ * Cacheability Control is a 4-bit value. The low three bits are stored in bits
+ * 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE.
+ */
+#define HSW_CACHEABILITY_CONTROL(bits)	((((bits) & 0x7) << 1) | \
+					 (((bits) & 0x8) << (11 - 3)))
+#define HSW_WB_LLC_AGE3			HSW_CACHEABILITY_CONTROL(0x2)
+#define HSW_WB_LLC_AGE0			HSW_CACHEABILITY_CONTROL(0x3)
+#define HSW_WB_ELLC_LLC_AGE3		HSW_CACHEABILITY_CONTROL(0x8)
+#define HSW_WB_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0xb)
+#define HSW_WT_ELLC_LLC_AGE3		HSW_CACHEABILITY_CONTROL(0x7)
+#define HSW_WT_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0x6)
+#define HSW_PTE_UNCACHED		(0)
+#define HSW_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0x7f0))
+#define HSW_PTE_ADDR_ENCODE(addr)	HSW_GTT_ADDR_ENCODE(addr)
+
+/*
+ * GEN8 32b style address is defined as a 3 level page table:
+ * 31:30 | 29:21 | 20:12 |  11:0
+ * PDPE  |  PDE  |  PTE  | offset
+ * The difference as compared to normal x86 3 level page table is the PDPEs are
+ * programmed via register.
+ *
+ * GEN8 48b style address is defined as a 4 level page table:
+ * 47:39 | 38:30 | 29:21 | 20:12 |  11:0
+ * PML4E | PDPE  |  PDE  |  PTE  | offset
+ */
+#define GEN8_3LVL_PDPES			4
+
+#define PPAT_UNCACHED			(_PAGE_PWT | _PAGE_PCD)
+#define PPAT_CACHED_PDE			0 /* WB LLC */
+#define PPAT_CACHED			_PAGE_PAT /* WB LLCeLLC */
+#define PPAT_DISPLAY_ELLC		_PAGE_PCD /* WT eLLC */
+
+#define CHV_PPAT_SNOOP			REG_BIT(6)
+#define GEN8_PPAT_AGE(x)		((x)<<4)
+#define GEN8_PPAT_LLCeLLC		(3<<2)
+#define GEN8_PPAT_LLCELLC		(2<<2)
+#define GEN8_PPAT_LLC			(1<<2)
+#define GEN8_PPAT_WB			(3<<0)
+#define GEN8_PPAT_WT			(2<<0)
+#define GEN8_PPAT_WC			(1<<0)
+#define GEN8_PPAT_UC			(0<<0)
+#define GEN8_PPAT_ELLC_OVERRIDE		(0<<2)
+#define GEN8_PPAT(i, x)			((u64)(x) << ((i) * 8))
+
+#define GEN8_PDE_IPS_64K BIT(11)
+#define GEN8_PDE_PS_2M   BIT(7)
+
+#define for_each_sgt_daddr(__dp, __iter, __sgt) \
+	__for_each_sgt_daddr(__dp, __iter, __sgt, I915_GTT_PAGE_SIZE)
+
+struct i915_page_dma {
+	struct page *page;
+	union {
+		dma_addr_t daddr;
+
+		/*
+		 * For gen6/gen7 only. This is the offset in the GGTT
+		 * where the page directory entries for PPGTT begin
+		 */
+		u32 ggtt_offset;
+	};
+};
+
+struct i915_page_scratch {
+	struct i915_page_dma base;
+	u64 encode;
+};
+
+struct i915_page_table {
+	struct i915_page_dma base;
+	atomic_t used;
+};
+
+struct i915_page_directory {
+	struct i915_page_table pt;
+	spinlock_t lock;
+	void *entry[512];
+};
+
+#define __px_choose_expr(x, type, expr, other) \
+	__builtin_choose_expr( \
+	__builtin_types_compatible_p(typeof(x), type) || \
+	__builtin_types_compatible_p(typeof(x), const type), \
+	({ type __x = (type)(x); expr; }), \
+	other)
+
+#define px_base(px) \
+	__px_choose_expr(px, struct i915_page_dma *, __x, \
+	__px_choose_expr(px, struct i915_page_scratch *, &__x->base, \
+	__px_choose_expr(px, struct i915_page_table *, &__x->base, \
+	__px_choose_expr(px, struct i915_page_directory *, &__x->pt.base, \
+	(void)0))))
+#define px_dma(px) (px_base(px)->daddr)
+
+#define px_pt(px) \
+	__px_choose_expr(px, struct i915_page_table *, __x, \
+	__px_choose_expr(px, struct i915_page_directory *, &__x->pt, \
+	(void)0))
+#define px_used(px) (&px_pt(px)->used)
+
+enum i915_cache_level;
+
+struct drm_i915_file_private;
+struct drm_i915_gem_object;
+struct i915_vma;
+struct intel_gt;
+
+struct i915_vma_ops {
+	/* Map an object into an address space with the given cache flags. */
+	int (*bind_vma)(struct i915_vma *vma,
+			enum i915_cache_level cache_level,
+			u32 flags);
+	/*
+	 * Unmap an object from an address space. This usually consists of
+	 * setting the valid PTE entries to a reserved scratch page.
+	 */
+	void (*unbind_vma)(struct i915_vma *vma);
+
+	int (*set_pages)(struct i915_vma *vma);
+	void (*clear_pages)(struct i915_vma *vma);
+};
+
+struct pagestash {
+	spinlock_t lock;
+	struct pagevec pvec;
+};
+
+void stash_init(struct pagestash *stash);
+
+struct i915_address_space {
+	struct kref ref;
+	struct rcu_work rcu;
+
+	struct drm_mm mm;
+	struct intel_gt *gt;
+	struct drm_i915_private *i915;
+	struct device *dma;
+	/*
+	 * Every address space belongs to a struct file - except for the global
+	 * GTT that is owned by the driver (and so @file is set to NULL). In
+	 * principle, no information should leak from one context to another
+	 * (or between files/processes etc) unless explicitly shared by the
+	 * owner. Tracking the owner is important in order to free up per-file
+	 * objects along with the file, to aide resource tracking, and to
+	 * assign blame.
+	 */
+	struct drm_i915_file_private *file;
+	u64 total;		/* size addr space maps (ex. 2GB for ggtt) */
+	u64 reserved;		/* size addr space reserved */
+
+	unsigned int bind_async_flags;
+
+	/*
+	 * Each active user context has its own address space (in full-ppgtt).
+	 * Since the vm may be shared between multiple contexts, we count how
+	 * many contexts keep us "open". Once open hits zero, we are closed
+	 * and do not allow any new attachments, and proceed to shutdown our
+	 * vma and page directories.
+	 */
+	atomic_t open;
+
+	struct mutex mutex; /* protects vma and our lists */
+#define VM_CLASS_GGTT 0
+#define VM_CLASS_PPGTT 1
+
+	struct i915_page_scratch scratch[4];
+	unsigned int scratch_order;
+	unsigned int top;
+
+	/**
+	 * List of vma currently bound.
+	 */
+	struct list_head bound_list;
+
+	struct pagestash free_pages;
+
+	/* Global GTT */
+	bool is_ggtt:1;
+
+	/* Some systems require uncached updates of the page directories */
+	bool pt_kmap_wc:1;
+
+	/* Some systems support read-only mappings for GGTT and/or PPGTT */
+	bool has_read_only:1;
+
+	u64 (*pte_encode)(dma_addr_t addr,
+			  enum i915_cache_level level,
+			  u32 flags); /* Create a valid PTE */
+#define PTE_READ_ONLY	BIT(0)
+
+	int (*allocate_va_range)(struct i915_address_space *vm,
+				 u64 start, u64 length);
+	void (*clear_range)(struct i915_address_space *vm,
+			    u64 start, u64 length);
+	void (*insert_page)(struct i915_address_space *vm,
+			    dma_addr_t addr,
+			    u64 offset,
+			    enum i915_cache_level cache_level,
+			    u32 flags);
+	void (*insert_entries)(struct i915_address_space *vm,
+			       struct i915_vma *vma,
+			       enum i915_cache_level cache_level,
+			       u32 flags);
+	void (*cleanup)(struct i915_address_space *vm);
+
+	struct i915_vma_ops vma_ops;
+
+	I915_SELFTEST_DECLARE(struct fault_attr fault_attr);
+	I915_SELFTEST_DECLARE(bool scrub_64K);
+};
+
+/*
+ * The Graphics Translation Table is the way in which GEN hardware translates a
+ * Graphics Virtual Address into a Physical Address. In addition to the normal
+ * collateral associated with any va->pa translations GEN hardware also has a
+ * portion of the GTT which can be mapped by the CPU and remain both coherent
+ * and correct (in cases like swizzling). That region is referred to as GMADR in
+ * the spec.
+ */
+struct i915_ggtt {
+	struct i915_address_space vm;
+
+	struct io_mapping iomap;	/* Mapping to our CPU mappable region */
+	struct resource gmadr;          /* GMADR resource */
+	resource_size_t mappable_end;	/* End offset that we can CPU map */
+
+	/** "Graphics Stolen Memory" holds the global PTEs */
+	void __iomem *gsm;
+	void (*invalidate)(struct i915_ggtt *ggtt);
+
+	/** PPGTT used for aliasing the PPGTT with the GTT */
+	struct i915_ppgtt *alias;
+
+	bool do_idle_maps;
+
+	int mtrr;
+
+	/** Bit 6 swizzling required for X tiling */
+	u32 bit_6_swizzle_x;
+	/** Bit 6 swizzling required for Y tiling */
+	u32 bit_6_swizzle_y;
+
+	u32 pin_bias;
+
+	unsigned int num_fences;
+	struct i915_fence_reg fence_regs[I915_MAX_NUM_FENCES];
+	struct list_head fence_list;
+
+	/**
+	 * List of all objects in gtt_space, currently mmaped by userspace.
+	 * All objects within this list must also be on bound_list.
+	 */
+	struct list_head userfault_list;
+
+	/* Manual runtime pm autosuspend delay for user GGTT mmaps */
+	struct intel_wakeref_auto userfault_wakeref;
+
+	struct mutex error_mutex;
+	struct drm_mm_node error_capture;
+	struct drm_mm_node uc_fw;
+};
+
+struct i915_ppgtt {
+	struct i915_address_space vm;
+
+	struct i915_page_directory *pd;
+};
+
+#define i915_is_ggtt(vm) ((vm)->is_ggtt)
+
+static inline bool
+i915_vm_is_4lvl(const struct i915_address_space *vm)
+{
+	return (vm->total - 1) >> 32;
+}
+
+static inline bool
+i915_vm_has_scratch_64K(struct i915_address_space *vm)
+{
+	return vm->scratch_order == get_order(I915_GTT_PAGE_SIZE_64K);
+}
+
+static inline bool
+i915_vm_has_cache_coloring(struct i915_address_space *vm)
+{
+	return i915_is_ggtt(vm) && vm->mm.color_adjust;
+}
+
+static inline struct i915_ggtt *
+i915_vm_to_ggtt(struct i915_address_space *vm)
+{
+	BUILD_BUG_ON(offsetof(struct i915_ggtt, vm));
+	GEM_BUG_ON(!i915_is_ggtt(vm));
+	return container_of(vm, struct i915_ggtt, vm);
+}
+
+static inline struct i915_ppgtt *
+i915_vm_to_ppgtt(struct i915_address_space *vm)
+{
+	BUILD_BUG_ON(offsetof(struct i915_ppgtt, vm));
+	GEM_BUG_ON(i915_is_ggtt(vm));
+	return container_of(vm, struct i915_ppgtt, vm);
+}
+
+static inline struct i915_address_space *
+i915_vm_get(struct i915_address_space *vm)
+{
+	kref_get(&vm->ref);
+	return vm;
+}
+
+void i915_vm_release(struct kref *kref);
+
+static inline void i915_vm_put(struct i915_address_space *vm)
+{
+	kref_put(&vm->ref, i915_vm_release);
+}
+
+static inline struct i915_address_space *
+i915_vm_open(struct i915_address_space *vm)
+{
+	GEM_BUG_ON(!atomic_read(&vm->open));
+	atomic_inc(&vm->open);
+	return i915_vm_get(vm);
+}
+
+static inline bool
+i915_vm_tryopen(struct i915_address_space *vm)
+{
+	if (atomic_add_unless(&vm->open, 1, 0))
+		return i915_vm_get(vm);
+
+	return false;
+}
+
+void __i915_vm_close(struct i915_address_space *vm);
+
+static inline void
+i915_vm_close(struct i915_address_space *vm)
+{
+	GEM_BUG_ON(!atomic_read(&vm->open));
+	if (atomic_dec_and_test(&vm->open))
+		__i915_vm_close(vm);
+
+	i915_vm_put(vm);
+}
+
+void i915_address_space_init(struct i915_address_space *vm, int subclass);
+void i915_address_space_fini(struct i915_address_space *vm);
+
+static inline u32 i915_pte_index(u64 address, unsigned int pde_shift)
+{
+	const u32 mask = NUM_PTE(pde_shift) - 1;
+
+	return (address >> PAGE_SHIFT) & mask;
+}
+
+/*
+ * Helper to counts the number of PTEs within the given length. This count
+ * does not cross a page table boundary, so the max value would be
+ * GEN6_PTES for GEN6, and GEN8_PTES for GEN8.
+ */
+static inline u32 i915_pte_count(u64 addr, u64 length, unsigned int pde_shift)
+{
+	const u64 mask = ~((1ULL << pde_shift) - 1);
+	u64 end;
+
+	GEM_BUG_ON(length == 0);
+	GEM_BUG_ON(offset_in_page(addr | length));
+
+	end = addr + length;
+
+	if ((addr & mask) != (end & mask))
+		return NUM_PTE(pde_shift) - i915_pte_index(addr, pde_shift);
+
+	return i915_pte_index(end, pde_shift) - i915_pte_index(addr, pde_shift);
+}
+
+static inline u32 i915_pde_index(u64 addr, u32 shift)
+{
+	return (addr >> shift) & I915_PDE_MASK;
+}
+
+static inline struct i915_page_table *
+i915_pt_entry(const struct i915_page_directory * const pd,
+	      const unsigned short n)
+{
+	return pd->entry[n];
+}
+
+static inline struct i915_page_directory *
+i915_pd_entry(const struct i915_page_directory * const pdp,
+	      const unsigned short n)
+{
+	return pdp->entry[n];
+}
+
+static inline dma_addr_t
+i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n)
+{
+	struct i915_page_dma *pt = ppgtt->pd->entry[n];
+
+	return px_dma(pt ?: px_base(&ppgtt->vm.scratch[ppgtt->vm.top]));
+}
+
+void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt);
+
+int i915_ggtt_probe_hw(struct drm_i915_private *i915);
+int i915_ggtt_init_hw(struct drm_i915_private *i915);
+int i915_ggtt_enable_hw(struct drm_i915_private *i915);
+void i915_ggtt_enable_guc(struct i915_ggtt *ggtt);
+void i915_ggtt_disable_guc(struct i915_ggtt *ggtt);
+int i915_init_ggtt(struct drm_i915_private *i915);
+void i915_ggtt_driver_release(struct drm_i915_private *i915);
+
+static inline bool i915_ggtt_has_aperture(const struct i915_ggtt *ggtt)
+{
+	return ggtt->mappable_end > 0;
+}
+
+int i915_ppgtt_init_hw(struct intel_gt *gt);
+
+struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt);
+
+void i915_gem_suspend_gtt_mappings(struct drm_i915_private *i915);
+void i915_gem_restore_gtt_mappings(struct drm_i915_private *i915);
+
+u64 gen8_pte_encode(dma_addr_t addr,
+		    enum i915_cache_level level,
+		    u32 flags);
+
+int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p);
+void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p);
+
+#define kmap_atomic_px(px) kmap_atomic(px_base(px)->page)
+
+void
+fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count);
+
+#define fill_px(px, v) fill_page_dma(px_base(px), (v), PAGE_SIZE / sizeof(u64))
+#define fill32_px(px, v) do {						\
+	u64 v__ = lower_32_bits(v);					\
+	fill_px((px), v__ << 32 | v__);					\
+} while (0)
+
+int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp);
+void cleanup_scratch_page(struct i915_address_space *vm);
+void free_scratch(struct i915_address_space *vm);
+
+struct i915_page_table *alloc_pt(struct i915_address_space *vm);
+struct i915_page_directory *alloc_pd(struct i915_address_space *vm);
+struct i915_page_directory *__alloc_pd(size_t sz);
+
+void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd);
+
+#define free_px(vm, px) free_pd(vm, px_base(px))
+
+void
+__set_pd_entry(struct i915_page_directory * const pd,
+	       const unsigned short idx,
+	       struct i915_page_dma * const to,
+	       u64 (*encode)(const dma_addr_t, const enum i915_cache_level));
+
+#define set_pd_entry(pd, idx, to) \
+	__set_pd_entry((pd), (idx), px_base(to), gen8_pde_encode)
+
+void
+clear_pd_entry(struct i915_page_directory * const pd,
+	       const unsigned short idx,
+	       const struct i915_page_scratch * const scratch);
+
+bool
+release_pd_entry(struct i915_page_directory * const pd,
+		 const unsigned short idx,
+		 struct i915_page_table * const pt,
+		 const struct i915_page_scratch * const scratch);
+void gen6_ggtt_invalidate(struct i915_ggtt *ggtt);
+
+int ggtt_set_pages(struct i915_vma *vma);
+int ppgtt_set_pages(struct i915_vma *vma);
+void clear_pages(struct i915_vma *vma);
+
+void gtt_write_workarounds(struct intel_gt *gt);
+
+void setup_private_pat(struct intel_uncore *uncore);
+
+static inline struct sgt_dma {
+	struct scatterlist *sg;
+	dma_addr_t dma, max;
+} sgt_dma(struct i915_vma *vma) {
+	struct scatterlist *sg = vma->pages->sgl;
+	dma_addr_t addr = sg_dma_address(sg);
+
+	return (struct sgt_dma){ sg, addr, addr + sg->length };
+}
+
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 4fb70a7716e3..0cf0f6fae675 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -488,17 +488,23 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
 	return desc;
 }
 
-static u32 *set_offsets(u32 *regs,
+static inline unsigned int dword_in_page(void *addr)
+{
+	return offset_in_page(addr) / sizeof(u32);
+}
+
+static void set_offsets(u32 *regs,
 			const u8 *data,
-			const struct intel_engine_cs *engine)
+			const struct intel_engine_cs *engine,
+			bool clear)
 #define NOP(x) (BIT(7) | (x))
-#define LRI(count, flags) ((flags) << 6 | (count))
+#define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
 #define POSTED BIT(0)
 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
 #define REG16(x) \
 	(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
 	(((x) >> 2) & 0x7f)
-#define END() 0
+#define END(x) 0, (x)
 {
 	const u32 base = engine->mmio_base;
 
@@ -506,7 +512,10 @@ static u32 *set_offsets(u32 *regs,
 		u8 count, flags;
 
 		if (*data & BIT(7)) { /* skip */
-			regs += *data++ & ~BIT(7);
+			count = *data++ & ~BIT(7);
+			if (clear)
+				memset32(regs, MI_NOOP, count);
+			regs += count;
 			continue;
 		}
 
@@ -532,12 +541,25 @@ static u32 *set_offsets(u32 *regs,
 				offset |= v & ~BIT(7);
 			} while (v & BIT(7));
 
-			*regs = base + (offset << 2);
+			regs[0] = base + (offset << 2);
+			if (clear)
+				regs[1] = 0;
 			regs += 2;
 		} while (--count);
 	}
 
-	return regs;
+	if (clear) {
+		u8 count = *++data;
+
+		/* Clear past the tail for HW access */
+		GEM_BUG_ON(dword_in_page(regs) > count);
+		memset32(regs, MI_NOOP, count - dword_in_page(regs));
+
+		/* Close the batch; used mainly by live_lrc_layout() */
+		*regs = MI_BATCH_BUFFER_END;
+		if (INTEL_GEN(engine->i915) >= 10)
+			*regs |= BIT(0);
+	}
 }
 
 static const u8 gen8_xcs_offsets[] = {
@@ -572,7 +594,7 @@ static const u8 gen8_xcs_offsets[] = {
 	REG16(0x200),
 	REG(0x028),
 
-	END(),
+	END(80)
 };
 
 static const u8 gen9_xcs_offsets[] = {
@@ -656,7 +678,7 @@ static const u8 gen9_xcs_offsets[] = {
 	REG16(0x67c),
 	REG(0x068),
 
-	END(),
+	END(176)
 };
 
 static const u8 gen12_xcs_offsets[] = {
@@ -688,7 +710,7 @@ static const u8 gen12_xcs_offsets[] = {
 	REG16(0x274),
 	REG16(0x270),
 
-	END(),
+	END(80)
 };
 
 static const u8 gen8_rcs_offsets[] = {
@@ -725,7 +747,91 @@ static const u8 gen8_rcs_offsets[] = {
 	LRI(1, 0),
 	REG(0x0c8),
 
-	END(),
+	END(80)
+};
+
+static const u8 gen9_rcs_offsets[] = {
+	NOP(1),
+	LRI(14, POSTED),
+	REG16(0x244),
+	REG(0x34),
+	REG(0x30),
+	REG(0x38),
+	REG(0x3c),
+	REG(0x168),
+	REG(0x140),
+	REG(0x110),
+	REG(0x11c),
+	REG(0x114),
+	REG(0x118),
+	REG(0x1c0),
+	REG(0x1c4),
+	REG(0x1c8),
+
+	NOP(3),
+	LRI(9, POSTED),
+	REG16(0x3a8),
+	REG16(0x28c),
+	REG16(0x288),
+	REG16(0x284),
+	REG16(0x280),
+	REG16(0x27c),
+	REG16(0x278),
+	REG16(0x274),
+	REG16(0x270),
+
+	NOP(13),
+	LRI(1, 0),
+	REG(0xc8),
+
+	NOP(13),
+	LRI(44, POSTED),
+	REG(0x28),
+	REG(0x9c),
+	REG(0xc0),
+	REG(0x178),
+	REG(0x17c),
+	REG16(0x358),
+	REG(0x170),
+	REG(0x150),
+	REG(0x154),
+	REG(0x158),
+	REG16(0x41c),
+	REG16(0x600),
+	REG16(0x604),
+	REG16(0x608),
+	REG16(0x60c),
+	REG16(0x610),
+	REG16(0x614),
+	REG16(0x618),
+	REG16(0x61c),
+	REG16(0x620),
+	REG16(0x624),
+	REG16(0x628),
+	REG16(0x62c),
+	REG16(0x630),
+	REG16(0x634),
+	REG16(0x638),
+	REG16(0x63c),
+	REG16(0x640),
+	REG16(0x644),
+	REG16(0x648),
+	REG16(0x64c),
+	REG16(0x650),
+	REG16(0x654),
+	REG16(0x658),
+	REG16(0x65c),
+	REG16(0x660),
+	REG16(0x664),
+	REG16(0x668),
+	REG16(0x66c),
+	REG16(0x670),
+	REG16(0x674),
+	REG16(0x678),
+	REG16(0x67c),
+	REG(0x68),
+
+	END(176)
 };
 
 static const u8 gen11_rcs_offsets[] = {
@@ -766,7 +872,7 @@ static const u8 gen11_rcs_offsets[] = {
 	LRI(1, 0),
 	REG(0x0c8),
 
-	END(),
+	END(80)
 };
 
 static const u8 gen12_rcs_offsets[] = {
@@ -807,7 +913,7 @@ static const u8 gen12_rcs_offsets[] = {
 	LRI(1, 0),
 	REG(0x0c8),
 
-	END(),
+	END(80)
 };
 
 #undef END
@@ -832,6 +938,8 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine)
 			return gen12_rcs_offsets;
 		else if (INTEL_GEN(engine->i915) >= 11)
 			return gen11_rcs_offsets;
+		else if (INTEL_GEN(engine->i915) >= 9)
+			return gen9_rcs_offsets;
 		else
 			return gen8_rcs_offsets;
 	} else {
@@ -1108,7 +1216,7 @@ __execlists_schedule_in(struct i915_request *rq)
 		/* We don't need a strict matching tag, just different values */
 		ce->lrc_desc &= ~GENMASK_ULL(47, 37);
 		ce->lrc_desc |=
-			(u64)(engine->context_tag++ % NUM_CONTEXT_TAG) <<
+			(u64)(++engine->context_tag % NUM_CONTEXT_TAG) <<
 			GEN11_SW_CTX_ID_SHIFT;
 		BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID);
 	}
@@ -1243,10 +1351,6 @@ static u64 execlists_update_context(struct i915_request *rq)
 	 */
 	wmb();
 
-	/* Wa_1607138340:tgl */
-	if (IS_TGL_REVID(rq->i915, TGL_REVID_A0, TGL_REVID_A0))
-		desc |= CTX_DESC_FORCE_RESTORE;
-
 	ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
 	return desc;
 }
@@ -1430,8 +1534,8 @@ static bool can_merge_rq(const struct i915_request *prev,
 	if (i915_request_completed(next))
 		return true;
 
-	if (unlikely((prev->flags ^ next->flags) &
-		     (I915_REQUEST_NOPREEMPT | I915_REQUEST_SENTINEL)))
+	if (unlikely((prev->fence.flags ^ next->fence.flags) &
+		     (I915_FENCE_FLAG_NOPREEMPT | I915_FENCE_FLAG_SENTINEL)))
 		return false;
 
 	if (!can_merge_ctx(prev->context, next->context))
@@ -1443,7 +1547,7 @@ static bool can_merge_rq(const struct i915_request *prev,
 static void virtual_update_register_offsets(u32 *regs,
 					    struct intel_engine_cs *engine)
 {
-	set_offsets(regs, reg_offsets(engine), engine);
+	set_offsets(regs, reg_offsets(engine), engine, false);
 }
 
 static bool virtual_matches(const struct virtual_engine *ve,
@@ -1590,7 +1694,7 @@ active_timeslice(const struct intel_engine_cs *engine)
 {
 	const struct i915_request *rq = *engine->execlists.active;
 
-	if (i915_request_completed(rq))
+	if (!rq || i915_request_completed(rq))
 		return 0;
 
 	if (engine->execlists.switch_priority_hint < effective_prio(rq))
@@ -1636,6 +1740,11 @@ static void set_preempt_timeout(struct intel_engine_cs *engine)
 		     active_preempt_timeout(engine));
 }
 
+static inline void clear_ports(struct i915_request **ports, int count)
+{
+	memset_p((void **)ports, NULL, count);
+}
+
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -1996,10 +2105,9 @@ done:
 
 			goto skip_submit;
 		}
+		clear_ports(port + 1, last_port - port);
 
-		memset(port + 1, 0, (last_port - port) * sizeof(*port));
 		execlists_submit_ports(engine);
-
 		set_preempt_timeout(engine);
 	} else {
 skip_submit:
@@ -2014,13 +2122,14 @@ cancel_port_requests(struct intel_engine_execlists * const execlists)
 
 	for (port = execlists->pending; *port; port++)
 		execlists_schedule_out(*port);
-	memset(execlists->pending, 0, sizeof(execlists->pending));
+	clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending));
 
 	/* Mark the end of active before we overwrite *active */
 	for (port = xchg(&execlists->active, execlists->pending); *port; port++)
 		execlists_schedule_out(*port);
-	WRITE_ONCE(execlists->active,
-		   memset(execlists->inflight, 0, sizeof(execlists->inflight)));
+	clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight));
+
+	WRITE_ONCE(execlists->active, execlists->inflight);
 }
 
 static inline void
@@ -2176,7 +2285,6 @@ static void process_csb(struct intel_engine_cs *engine)
 
 			/* Point active to the new ELSP; prevent overwriting */
 			WRITE_ONCE(execlists->active, execlists->pending);
-			set_timeslice(engine);
 
 			if (!inject_preempt_hang(execlists))
 				ring_set_paused(engine, 0);
@@ -2217,6 +2325,7 @@ static void process_csb(struct intel_engine_cs *engine)
 	} while (head != tail);
 
 	execlists->csb_head = head;
+	set_timeslice(engine);
 
 	/*
 	 * Gen11 has proven to fail wrt global observation point between
@@ -2399,7 +2508,7 @@ set_redzone(void *vaddr, const struct intel_engine_cs *engine)
 
 	vaddr += engine->context_size;
 
-	memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE);
+	memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
 }
 
 static void
@@ -2410,7 +2519,7 @@ check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
 
 	vaddr += engine->context_size;
 
-	if (memchr_inv(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE))
+	if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
 		dev_err_once(engine->i915->drm.dev,
 			     "%s context redzone overwritten!\n",
 			     engine->name);
@@ -2453,33 +2562,21 @@ __execlists_context_pin(struct intel_context *ce,
 			struct intel_engine_cs *engine)
 {
 	void *vaddr;
-	int ret;
 
 	GEM_BUG_ON(!ce->state);
-
-	ret = intel_context_active_acquire(ce);
-	if (ret)
-		goto err;
 	GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
 
 	vaddr = i915_gem_object_pin_map(ce->state->obj,
 					i915_coherent_map_type(engine->i915) |
 					I915_MAP_OVERRIDE);
-	if (IS_ERR(vaddr)) {
-		ret = PTR_ERR(vaddr);
-		goto unpin_active;
-	}
+	if (IS_ERR(vaddr))
+		return PTR_ERR(vaddr);
 
-	ce->lrc_desc = lrc_descriptor(ce, engine);
+	ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
 	ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
 	__execlists_update_reg_state(ce, engine);
 
 	return 0;
-
-unpin_active:
-	intel_context_active_release(ce);
-err:
-	return ret;
 }
 
 static int execlists_context_pin(struct intel_context *ce)
@@ -2494,6 +2591,9 @@ static int execlists_context_alloc(struct intel_context *ce)
 
 static void execlists_context_reset(struct intel_context *ce)
 {
+	CE_TRACE(ce, "reset\n");
+	GEM_BUG_ON(!intel_context_is_pinned(ce));
+
 	/*
 	 * Because we emit WA_TAIL_DWORDS there may be a disparity
 	 * between our bookkeeping in ce->ring->head and ce->ring->tail and
@@ -2510,8 +2610,14 @@ static void execlists_context_reset(struct intel_context *ce)
 	 * So to avoid that we reset the context images upon resume. For
 	 * simplicity, we just zero everything out.
 	 */
-	intel_ring_reset(ce->ring, 0);
+	intel_ring_reset(ce->ring, ce->ring->emit);
+
+	/* Scrub away the garbage */
+	execlists_init_reg_state(ce->lrc_reg_state,
+				 ce, ce->engine, ce->ring, true);
 	__execlists_update_reg_state(ce, ce->engine);
+
+	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
 }
 
 static const struct intel_context_ops execlists_context_ops = {
@@ -2730,6 +2836,14 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
 	batch = gen8_emit_flush_coherentl3_wa(engine, batch);
 
+	/* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
+	batch = gen8_emit_pipe_control(batch,
+				       PIPE_CONTROL_FLUSH_L3 |
+				       PIPE_CONTROL_STORE_DATA_INDEX |
+				       PIPE_CONTROL_CS_STALL |
+				       PIPE_CONTROL_QW_WRITE,
+				       LRC_PPHWSP_SCRATCH_ADDR);
+
 	batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
 
 	/* WaMediaPoolStateCmdInWABB:bxt,glk */
@@ -2925,6 +3039,8 @@ static void enable_execlists(struct intel_engine_cs *engine)
 			RING_HWS_PGA,
 			i915_ggtt_offset(engine->status_page.vma));
 	ENGINE_POSTING_READ(engine, RING_HWS_PGA);
+
+	engine->context_tag = 0;
 }
 
 static bool unexpected_starting_state(struct intel_engine_cs *engine)
@@ -3030,10 +3146,8 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
 			       &execlists->csb_status[reset_value]);
 }
 
-static void __execlists_reset_reg_state(const struct intel_context *ce,
-					const struct intel_engine_cs *engine)
+static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
 {
-	u32 *regs = ce->lrc_reg_state;
 	int x;
 
 	x = lrc_ring_mi_mode(engine);
@@ -3043,6 +3157,14 @@ static void __execlists_reset_reg_state(const struct intel_context *ce,
 	}
 }
 
+static void __execlists_reset_reg_state(const struct intel_context *ce,
+					const struct intel_engine_cs *engine)
+{
+	u32 *regs = ce->lrc_reg_state;
+
+	__reset_stop_ring(regs, engine);
+}
+
 static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -3795,7 +3917,6 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 {
 	/* Default vfuncs which can be overriden by each engine. */
 
-	engine->release = execlists_release;
 	engine->resume = execlists_resume;
 
 	engine->cops = &execlists_context_ops;
@@ -3910,6 +4031,9 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
 
 	reset_csb_pointers(engine);
 
+	/* Finally, take ownership and responsibility for cleanup! */
+	engine->release = execlists_release;
+
 	return 0;
 }
 
@@ -3949,18 +4073,21 @@ static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine)
 
 static void init_common_reg_state(u32 * const regs,
 				  const struct intel_engine_cs *engine,
-				  const struct intel_ring *ring)
+				  const struct intel_ring *ring,
+				  bool inhibit)
 {
-	regs[CTX_CONTEXT_CONTROL] =
-		_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
-		_MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
+	u32 ctl;
+
+	ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
+	ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
+	if (inhibit)
+		ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
 	if (INTEL_GEN(engine->i915) < 11)
-		regs[CTX_CONTEXT_CONTROL] |=
-			_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
-					    CTX_CTRL_RS_CTX_ENABLE);
+		ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
+					   CTX_CTRL_RS_CTX_ENABLE);
+	regs[CTX_CONTEXT_CONTROL] = ctl;
 
 	regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
-	regs[CTX_BB_STATE] = RING_BB_PPGTT;
 }
 
 static void init_wa_bb_reg_state(u32 * const regs,
@@ -4016,7 +4143,7 @@ static void execlists_init_reg_state(u32 *regs,
 				     const struct intel_context *ce,
 				     const struct intel_engine_cs *engine,
 				     const struct intel_ring *ring,
-				     bool close)
+				     bool inhibit)
 {
 	/*
 	 * A context is actually a big batch buffer with several
@@ -4028,21 +4155,17 @@ static void execlists_init_reg_state(u32 *regs,
 	 *
 	 * Must keep consistent with virtual_update_register_offsets().
 	 */
-	u32 *bbe = set_offsets(regs, reg_offsets(engine), engine);
-
-	if (close) { /* Close the batch; used mainly by live_lrc_layout() */
-		*bbe = MI_BATCH_BUFFER_END;
-		if (INTEL_GEN(engine->i915) >= 10)
-			*bbe |= BIT(0);
-	}
+	set_offsets(regs, reg_offsets(engine), engine, inhibit);
 
-	init_common_reg_state(regs, engine, ring);
+	init_common_reg_state(regs, engine, ring, inhibit);
 	init_ppgtt_reg_state(regs, vm_alias(ce->vm));
 
 	init_wa_bb_reg_state(regs, engine,
 			     INTEL_GEN(engine->i915) >= 12 ?
 			     GEN12_CTX_BB_PER_CTX_PTR :
 			     CTX_BB_PER_CTX_PTR);
+
+	__reset_stop_ring(regs, engine);
 }
 
 static int
@@ -4053,7 +4176,6 @@ populate_lr_context(struct intel_context *ce,
 {
 	bool inhibit = true;
 	void *vaddr;
-	u32 *regs;
 	int ret;
 
 	vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
@@ -4083,11 +4205,8 @@ populate_lr_context(struct intel_context *ce,
 
 	/* The second page of the context object contains some fields which must
 	 * be set up prior to the first execution. */
-	regs = vaddr + LRC_STATE_PN * PAGE_SIZE;
-	execlists_init_reg_state(regs, ce, engine, ring, inhibit);
-	if (inhibit)
-		regs[CTX_CONTEXT_CONTROL] |=
-			_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
+	execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
+				 ce, engine, ring, inhibit);
 
 	ret = 0;
 err_unpin_ctx:
@@ -4481,9 +4600,11 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings,
 	ve->base.gt = siblings[0]->gt;
 	ve->base.uncore = siblings[0]->uncore;
 	ve->base.id = -1;
+
 	ve->base.class = OTHER_CLASS;
 	ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
 	ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
+	ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
 
 	/*
 	 * The decision on whether to submit a request using semaphores
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 893249ea48d4..eeef90b55c64 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -127,7 +127,7 @@ struct drm_i915_mocs_table {
 		   LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3), \
 		   L3_3_WB)
 
-static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
+static const struct drm_i915_mocs_entry skl_mocs_table[] = {
 	GEN9_MOCS_ENTRIES,
 	MOCS_ENTRY(I915_MOCS_CACHED,
 		   LE_3_WB | LE_TC_2_LLC_ELLC | LE_LRUM(3),
@@ -233,7 +233,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
 		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
 		   L3_1_UC)
 
-static const struct drm_i915_mocs_entry tigerlake_mocs_table[] = {
+static const struct drm_i915_mocs_entry tgl_mocs_table[] = {
 	/* Base - Error (Reserved for Non-Use) */
 	MOCS_ENTRY(0, 0x0, 0x0),
 	/* Base - Reserved */
@@ -267,7 +267,7 @@ static const struct drm_i915_mocs_entry tigerlake_mocs_table[] = {
 		   L3_3_WB),
 };
 
-static const struct drm_i915_mocs_entry icelake_mocs_table[] = {
+static const struct drm_i915_mocs_entry icl_mocs_table[] = {
 	/* Base - Uncached (Deprecated) */
 	MOCS_ENTRY(I915_MOCS_UNCACHED,
 		   LE_1_UC | LE_TC_1_LLC,
@@ -284,17 +284,17 @@ static bool get_mocs_settings(const struct drm_i915_private *i915,
 			      struct drm_i915_mocs_table *table)
 {
 	if (INTEL_GEN(i915) >= 12) {
-		table->size  = ARRAY_SIZE(tigerlake_mocs_table);
-		table->table = tigerlake_mocs_table;
+		table->size  = ARRAY_SIZE(tgl_mocs_table);
+		table->table = tgl_mocs_table;
 		table->n_entries = GEN11_NUM_MOCS_ENTRIES;
 	} else if (IS_GEN(i915, 11)) {
-		table->size  = ARRAY_SIZE(icelake_mocs_table);
-		table->table = icelake_mocs_table;
+		table->size  = ARRAY_SIZE(icl_mocs_table);
+		table->table = icl_mocs_table;
 		table->n_entries = GEN11_NUM_MOCS_ENTRIES;
 	} else if (IS_GEN9_BC(i915) || IS_CANNONLAKE(i915)) {
-		table->size  = ARRAY_SIZE(skylake_mocs_table);
+		table->size  = ARRAY_SIZE(skl_mocs_table);
 		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
-		table->table = skylake_mocs_table;
+		table->table = skl_mocs_table;
 	} else if (IS_GEN9_LP(i915)) {
 		table->size  = ARRAY_SIZE(broxton_mocs_table);
 		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
new file mode 100644
index 000000000000..f86f7e68ce5e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
@@ -0,0 +1,218 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <linux/slab.h>
+
+#include "i915_trace.h"
+#include "intel_gtt.h"
+#include "gen6_ppgtt.h"
+#include "gen8_ppgtt.h"
+
+struct i915_page_table *alloc_pt(struct i915_address_space *vm)
+{
+	struct i915_page_table *pt;
+
+	pt = kmalloc(sizeof(*pt), I915_GFP_ALLOW_FAIL);
+	if (unlikely(!pt))
+		return ERR_PTR(-ENOMEM);
+
+	if (unlikely(setup_page_dma(vm, &pt->base))) {
+		kfree(pt);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	atomic_set(&pt->used, 0);
+	return pt;
+}
+
+struct i915_page_directory *__alloc_pd(size_t sz)
+{
+	struct i915_page_directory *pd;
+
+	pd = kzalloc(sz, I915_GFP_ALLOW_FAIL);
+	if (unlikely(!pd))
+		return NULL;
+
+	spin_lock_init(&pd->lock);
+	return pd;
+}
+
+struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
+{
+	struct i915_page_directory *pd;
+
+	pd = __alloc_pd(sizeof(*pd));
+	if (unlikely(!pd))
+		return ERR_PTR(-ENOMEM);
+
+	if (unlikely(setup_page_dma(vm, px_base(pd)))) {
+		kfree(pd);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	return pd;
+}
+
+void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd)
+{
+	cleanup_page_dma(vm, pd);
+	kfree(pd);
+}
+
+static inline void
+write_dma_entry(struct i915_page_dma * const pdma,
+		const unsigned short idx,
+		const u64 encoded_entry)
+{
+	u64 * const vaddr = kmap_atomic(pdma->page);
+
+	vaddr[idx] = encoded_entry;
+	kunmap_atomic(vaddr);
+}
+
+void
+__set_pd_entry(struct i915_page_directory * const pd,
+	       const unsigned short idx,
+	       struct i915_page_dma * const to,
+	       u64 (*encode)(const dma_addr_t, const enum i915_cache_level))
+{
+	/* Each thread pre-pins the pd, and we may have a thread per pde. */
+	GEM_BUG_ON(atomic_read(px_used(pd)) > NALLOC * ARRAY_SIZE(pd->entry));
+
+	atomic_inc(px_used(pd));
+	pd->entry[idx] = to;
+	write_dma_entry(px_base(pd), idx, encode(to->daddr, I915_CACHE_LLC));
+}
+
+void
+clear_pd_entry(struct i915_page_directory * const pd,
+	       const unsigned short idx,
+	       const struct i915_page_scratch * const scratch)
+{
+	GEM_BUG_ON(atomic_read(px_used(pd)) == 0);
+
+	write_dma_entry(px_base(pd), idx, scratch->encode);
+	pd->entry[idx] = NULL;
+	atomic_dec(px_used(pd));
+}
+
+bool
+release_pd_entry(struct i915_page_directory * const pd,
+		 const unsigned short idx,
+		 struct i915_page_table * const pt,
+		 const struct i915_page_scratch * const scratch)
+{
+	bool free = false;
+
+	if (atomic_add_unless(&pt->used, -1, 1))
+		return false;
+
+	spin_lock(&pd->lock);
+	if (atomic_dec_and_test(&pt->used)) {
+		clear_pd_entry(pd, idx, scratch);
+		free = true;
+	}
+	spin_unlock(&pd->lock);
+
+	return free;
+}
+
+int i915_ppgtt_init_hw(struct intel_gt *gt)
+{
+	struct drm_i915_private *i915 = gt->i915;
+
+	gtt_write_workarounds(gt);
+
+	if (IS_GEN(i915, 6))
+		gen6_ppgtt_enable(gt);
+	else if (IS_GEN(i915, 7))
+		gen7_ppgtt_enable(gt);
+
+	return 0;
+}
+
+static struct i915_ppgtt *
+__ppgtt_create(struct intel_gt *gt)
+{
+	if (INTEL_GEN(gt->i915) < 8)
+		return gen6_ppgtt_create(gt);
+	else
+		return gen8_ppgtt_create(gt);
+}
+
+struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt)
+{
+	struct i915_ppgtt *ppgtt;
+
+	ppgtt = __ppgtt_create(gt);
+	if (IS_ERR(ppgtt))
+		return ppgtt;
+
+	trace_i915_ppgtt_create(&ppgtt->vm);
+
+	return ppgtt;
+}
+
+static int ppgtt_bind_vma(struct i915_vma *vma,
+			  enum i915_cache_level cache_level,
+			  u32 flags)
+{
+	u32 pte_flags;
+	int err;
+
+	if (flags & I915_VMA_ALLOC) {
+		err = vma->vm->allocate_va_range(vma->vm,
+						 vma->node.start, vma->size);
+		if (err)
+			return err;
+
+		set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma));
+	}
+
+	/* Applicable to VLV, and gen8+ */
+	pte_flags = 0;
+	if (i915_gem_object_is_readonly(vma->obj))
+		pte_flags |= PTE_READ_ONLY;
+
+	GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)));
+	vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
+	wmb();
+
+	return 0;
+}
+
+static void ppgtt_unbind_vma(struct i915_vma *vma)
+{
+	if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)))
+		vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
+}
+
+int ppgtt_set_pages(struct i915_vma *vma)
+{
+	GEM_BUG_ON(vma->pages);
+
+	vma->pages = vma->obj->mm.pages;
+
+	vma->page_sizes = vma->obj->mm.page_sizes;
+
+	return 0;
+}
+
+void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt)
+{
+	struct drm_i915_private *i915 = gt->i915;
+
+	ppgtt->vm.gt = gt;
+	ppgtt->vm.i915 = i915;
+	ppgtt->vm.dma = &i915->drm.pdev->dev;
+	ppgtt->vm.total = BIT_ULL(INTEL_INFO(i915)->ppgtt_size);
+
+	i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT);
+
+	ppgtt->vm.vma_ops.bind_vma    = ppgtt_bind_vma;
+	ppgtt->vm.vma_ops.unbind_vma  = ppgtt_unbind_vma;
+	ppgtt->vm.vma_ops.set_pages   = ppgtt_set_pages;
+	ppgtt->vm.vma_ops.clear_pages = clear_pages;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 1c51296646e0..beee0cf89bce 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -147,11 +147,7 @@ static void mark_innocent(struct i915_request *rq)
 
 void __i915_request_reset(struct i915_request *rq, bool guilty)
 {
-	GEM_TRACE("%s rq=%llx:%lld, guilty? %s\n",
-		  rq->engine->name,
-		  rq->fence.context,
-		  rq->fence.seqno,
-		  yesno(guilty));
+	RQ_TRACE(rq, "guilty? %s\n", yesno(guilty));
 
 	GEM_BUG_ON(i915_request_completed(rq));
 
@@ -251,9 +247,8 @@ out:
 	return ret;
 }
 
-static int ironlake_do_reset(struct intel_gt *gt,
-			     intel_engine_mask_t engine_mask,
-			     unsigned int retry)
+static int ilk_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask,
+			unsigned int retry)
 {
 	struct intel_uncore *uncore = gt->uncore;
 	int ret;
@@ -597,7 +592,7 @@ static reset_func intel_get_gpu_reset(const struct intel_gt *gt)
 	else if (INTEL_GEN(i915) >= 6)
 		return gen6_reset_engines;
 	else if (INTEL_GEN(i915) >= 5)
-		return ironlake_do_reset;
+		return ilk_do_reset;
 	else if (IS_G4X(i915))
 		return g4x_do_reset;
 	else if (IS_G33(i915) || IS_PINEVIEW(i915))
@@ -625,7 +620,7 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
 	 */
 	intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
 	for (retry = 0; ret == -ETIMEDOUT && retry < retries; retry++) {
-		GEM_TRACE("engine_mask=%x\n", engine_mask);
+		GT_TRACE(gt, "engine_mask=%x\n", engine_mask);
 		preempt_disable();
 		ret = reset(gt, engine_mask, retry);
 		preempt_enable();
@@ -785,8 +780,7 @@ static void nop_submit_request(struct i915_request *request)
 	struct intel_engine_cs *engine = request->engine;
 	unsigned long flags;
 
-	GEM_TRACE("%s fence %llx:%lld -> -EIO\n",
-		  engine->name, request->fence.context, request->fence.seqno);
+	RQ_TRACE(request, "-EIO\n");
 	dma_fence_set_error(&request->fence, -EIO);
 
 	spin_lock_irqsave(&engine->active.lock, flags);
@@ -813,7 +807,7 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
 			intel_engine_dump(engine, &p, "%s\n", engine->name);
 	}
 
-	GEM_TRACE("start\n");
+	GT_TRACE(gt, "start\n");
 
 	/*
 	 * First, stop submission to hw, but do not yet complete requests by
@@ -844,7 +838,7 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
 
 	reset_finish(gt, awake);
 
-	GEM_TRACE("end\n");
+	GT_TRACE(gt, "end\n");
 }
 
 void intel_gt_set_wedged(struct intel_gt *gt)
@@ -870,7 +864,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
 	if (test_bit(I915_WEDGED_ON_INIT, &gt->reset.flags))
 		return false;
 
-	GEM_TRACE("start\n");
+	GT_TRACE(gt, "start\n");
 
 	/*
 	 * Before unwedging, make sure that all pending operations
@@ -932,7 +926,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
 	 */
 	intel_engines_reset_default_submission(gt);
 
-	GEM_TRACE("end\n");
+	GT_TRACE(gt, "end\n");
 
 	smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
 	clear_bit(I915_WEDGED, &gt->reset.flags);
@@ -1007,7 +1001,7 @@ void intel_gt_reset(struct intel_gt *gt,
 	intel_engine_mask_t awake;
 	int ret;
 
-	GEM_TRACE("flags=%lx\n", gt->reset.flags);
+	GT_TRACE(gt, "flags=%lx\n", gt->reset.flags);
 
 	might_sleep();
 	GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &gt->reset.flags));
@@ -1236,7 +1230,7 @@ void intel_gt_handle_error(struct intel_gt *gt,
 	engine_mask &= INTEL_INFO(gt->i915)->engine_mask;
 
 	if (flags & I915_ERROR_CAPTURE) {
-		i915_capture_error_state(gt->i915, engine_mask, msg);
+		i915_capture_error_state(gt->i915);
 		intel_gt_clear_error_registers(gt, engine_mask);
 	}
 
@@ -1329,10 +1323,10 @@ int intel_gt_terminally_wedged(struct intel_gt *gt)
 	if (!intel_gt_is_wedged(gt))
 		return 0;
 
-	/* Reset still in progress? Maybe we will recover? */
-	if (!test_bit(I915_RESET_BACKOFF, &gt->reset.flags))
+	if (intel_gt_has_init_error(gt))
 		return -EIO;
 
+	/* Reset still in progress? Maybe we will recover? */
 	if (wait_event_interruptible(gt->reset.queue,
 				     !test_bit(I915_RESET_BACKOFF,
 					       &gt->reset.flags)))
@@ -1354,6 +1348,9 @@ void intel_gt_init_reset(struct intel_gt *gt)
 	init_waitqueue_head(&gt->reset.queue);
 	mutex_init(&gt->reset.mutex);
 	init_srcu_struct(&gt->reset.backoff_srcu);
+
+	/* no GPU until we are ready! */
+	__set_bit(I915_WEDGED, &gt->reset.flags);
 }
 
 void intel_gt_fini_reset(struct intel_gt *gt)
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 81f872f9ef03..bc44fe8e5ffa 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -33,6 +33,7 @@
 
 #include "gem/i915_gem_context.h"
 
+#include "gen6_ppgtt.h"
 #include "i915_drv.h"
 #include "i915_trace.h"
 #include "intel_context.h"
@@ -1328,26 +1329,12 @@ static int ring_context_alloc(struct intel_context *ce)
 
 static int ring_context_pin(struct intel_context *ce)
 {
-	int err;
-
-	err = intel_context_active_acquire(ce);
-	if (err)
-		return err;
-
-	err = __context_pin_ppgtt(ce);
-	if (err)
-		goto err_active;
-
-	return 0;
-
-err_active:
-	intel_context_active_release(ce);
-	return err;
+	return __context_pin_ppgtt(ce);
 }
 
 static void ring_context_reset(struct intel_context *ce)
 {
-	intel_ring_reset(ce->ring, 0);
+	intel_ring_reset(ce->ring, ce->ring->emit);
 }
 
 static const struct intel_context_ops ring_context_ops = {
@@ -1394,7 +1381,7 @@ static int load_pd_dir(struct i915_request *rq,
 
 	intel_ring_advance(rq, cs);
 
-	return 0;
+	return rq->engine->emit_flush(rq, EMIT_FLUSH);
 }
 
 static inline int mi_set_context(struct i915_request *rq, u32 flags)
@@ -1408,14 +1395,6 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
 	int len;
 	u32 *cs;
 
-	flags |= MI_MM_SPACE_GTT;
-	if (IS_HASWELL(i915))
-		/* These flags are for resource streamer on HSW+ */
-		flags |= HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN;
-	else
-		/* We need to save the extended state for powersaving modes */
-		flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN;
-
 	len = 4;
 	if (IS_GEN(i915, 7))
 		len += 2 + (num_engines ? 4 * num_engines + 6 : 0);
@@ -1592,7 +1571,7 @@ static int switch_mm(struct i915_request *rq, struct i915_address_space *vm)
 	if (ret)
 		return ret;
 
-	return rq->engine->emit_flush(rq, EMIT_FLUSH);
+	return rq->engine->emit_flush(rq, EMIT_INVALIDATE);
 }
 
 static int switch_context(struct i915_request *rq)
@@ -1607,15 +1586,21 @@ static int switch_context(struct i915_request *rq)
 		return ret;
 
 	if (ce->state) {
-		u32 hw_flags;
+		u32 flags;
 
 		GEM_BUG_ON(rq->engine->id != RCS0);
 
-		hw_flags = 0;
-		if (!test_bit(CONTEXT_VALID_BIT, &ce->flags))
-			hw_flags = MI_RESTORE_INHIBIT;
+		/* For resource streamer on HSW+ and power context elsewhere */
+		BUILD_BUG_ON(HSW_MI_RS_SAVE_STATE_EN != MI_SAVE_EXT_STATE_EN);
+		BUILD_BUG_ON(HSW_MI_RS_RESTORE_STATE_EN != MI_RESTORE_EXT_STATE_EN);
+
+		flags = MI_SAVE_EXT_STATE_EN | MI_MM_SPACE_GTT;
+		if (test_bit(CONTEXT_VALID_BIT, &ce->flags))
+			flags |= MI_RESTORE_EXT_STATE_EN;
+		else
+			flags |= MI_RESTORE_INHIBIT;
 
-		ret = mi_set_context(rq, hw_flags);
+		ret = mi_set_context(rq, flags);
 		if (ret)
 			return ret;
 	}
@@ -1842,8 +1827,6 @@ static void setup_common(struct intel_engine_cs *engine)
 
 	setup_irq(engine);
 
-	engine->release = ring_release;
-
 	engine->resume = xcs_resume;
 	engine->reset.prepare = reset_prepare;
 	engine->reset.rewind = reset_rewind;
@@ -2009,6 +1992,9 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine)
 
 	GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma);
 
+	/* Finally, take ownership and responsibility for cleanup! */
+	engine->release = ring_release;
+
 	return 0;
 
 err_ring:
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index f232036c3c7a..d2a3d935d186 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -777,7 +777,7 @@ void intel_rps_boost(struct i915_request *rq)
 	spin_lock_irqsave(&rq->lock, flags);
 	if (!i915_request_has_waitboost(rq) &&
 	    !dma_fence_is_signaled_locked(&rq->fence)) {
-		rq->flags |= I915_REQUEST_WAITBOOST;
+		set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags);
 
 		if (!atomic_fetch_inc(&rps->num_waiters) &&
 		    READ_ONCE(rps->cur_freq) < rps->boost_freq)
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index ee5dc4fbdeb9..87716529cd2f 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -348,7 +348,6 @@ void intel_timeline_enter(struct intel_timeline *tl)
 	 * use atomic to manipulate tl->active_count.
 	 */
 	lockdep_assert_held(&tl->mutex);
-	GEM_BUG_ON(!atomic_read(&tl->pin_count));
 
 	if (atomic_add_unless(&tl->active_count, 1, 0))
 		return;
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 195ccf7db272..4e292d4bf7b9 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -254,7 +254,7 @@ static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
 
 	/* WaDisableDopClockGating:bdw
 	 *
-	 * Also see the related UCGTCL1 write in broadwell_init_clock_gating()
+	 * Also see the related UCGTCL1 write in bdw_init_clock_gating()
 	 * to disable EUTC clock gating.
 	 */
 	WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 4e1eafa94be9..a560b7eee2cd 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -149,7 +149,11 @@ static int mock_context_alloc(struct intel_context *ce)
 
 static int mock_context_pin(struct intel_context *ce)
 {
-	return intel_context_active_acquire(ce);
+	return 0;
+}
+
+static void mock_context_reset(struct intel_context *ce)
+{
 }
 
 static const struct intel_context_ops mock_context_ops = {
@@ -161,6 +165,7 @@ static const struct intel_context_ops mock_context_ops = {
 	.enter = intel_context_enter_engine,
 	.exit = intel_context_exit_engine,
 
+	.reset = mock_context_reset,
 	.destroy = mock_context_destroy,
 };
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 5dbda2a74272..3e5e6c86e843 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -1312,7 +1312,7 @@ static int igt_reset_evict_ppgtt(void *arg)
 	if (INTEL_PPGTT(gt->i915) < INTEL_PPGTT_FULL)
 		return 0;
 
-	ppgtt = i915_ppgtt_create(gt->i915);
+	ppgtt = i915_ppgtt_create(gt);
 	if (IS_ERR(ppgtt))
 		return PTR_ERR(ppgtt);
 
@@ -1498,7 +1498,7 @@ static int igt_handle_error(void *arg)
 	struct intel_engine_cs *engine = gt->engine[RCS0];
 	struct hang h;
 	struct i915_request *rq;
-	struct i915_gpu_state *error;
+	struct i915_gpu_coredump *error;
 	int err;
 
 	/* Check that we can issue a global GPU and engine reset */
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 9ec9833c9c7b..15cda024e3e4 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -527,13 +527,19 @@ static struct i915_request *nop_request(struct intel_engine_cs *engine)
 	return rq;
 }
 
-static void wait_for_submit(struct intel_engine_cs *engine,
-			    struct i915_request *rq)
+static int wait_for_submit(struct intel_engine_cs *engine,
+			   struct i915_request *rq,
+			   unsigned long timeout)
 {
+	timeout += jiffies;
 	do {
 		cond_resched();
 		intel_engine_flush_submission(engine);
-	} while (!i915_request_is_active(rq));
+		if (i915_request_is_active(rq))
+			return 0;
+	} while (time_before(jiffies, timeout));
+
+	return -ETIME;
 }
 
 static long timeslice_threshold(const struct intel_engine_cs *engine)
@@ -601,7 +607,12 @@ static int live_timeslice_queue(void *arg)
 			goto err_heartbeat;
 		}
 		engine->schedule(rq, &attr);
-		wait_for_submit(engine, rq);
+		err = wait_for_submit(engine, rq, HZ / 2);
+		if (err) {
+			pr_err("%s: Timed out trying to submit semaphores\n",
+			       engine->name);
+			goto err_rq;
+		}
 
 		/* ELSP[1]: nop request */
 		nop = nop_request(engine);
@@ -609,8 +620,13 @@ static int live_timeslice_queue(void *arg)
 			err = PTR_ERR(nop);
 			goto err_rq;
 		}
-		wait_for_submit(engine, nop);
+		err = wait_for_submit(engine, nop, HZ / 2);
 		i915_request_put(nop);
+		if (err) {
+			pr_err("%s: Timed out trying to submit nop\n",
+			       engine->name);
+			goto err_rq;
+		}
 
 		GEM_BUG_ON(i915_request_completed(rq));
 		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
@@ -1137,7 +1153,7 @@ static int live_nopreempt(void *arg)
 		}
 
 		/* Low priority client, but unpreemptable! */
-		rq_a->flags |= I915_REQUEST_NOPREEMPT;
+		__set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
 
 		i915_request_add(rq_a);
 		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
@@ -3362,7 +3378,7 @@ static int live_lrc_layout(void *arg)
 	struct intel_gt *gt = arg;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
-	u32 *mem;
+	u32 *lrc;
 	int err;
 
 	/*
@@ -3370,13 +3386,13 @@ static int live_lrc_layout(void *arg)
 	 * match the layout saved by HW.
 	 */
 
-	mem = kmalloc(PAGE_SIZE, GFP_KERNEL);
-	if (!mem)
+	lrc = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!lrc)
 		return -ENOMEM;
 
 	err = 0;
 	for_each_engine(engine, gt, id) {
-		u32 *hw, *lrc;
+		u32 *hw;
 		int dw;
 
 		if (!engine->default_state)
@@ -3390,8 +3406,7 @@ static int live_lrc_layout(void *arg)
 		}
 		hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
 
-		lrc = memset(mem, 0, PAGE_SIZE);
-		execlists_init_reg_state(lrc,
+		execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE),
 					 engine->kernel_context,
 					 engine,
 					 engine->kernel_context->ring,
@@ -3406,6 +3421,13 @@ static int live_lrc_layout(void *arg)
 				continue;
 			}
 
+			if (lrc[dw] == 0) {
+				pr_debug("%s: skipped instruction %x at dword %d\n",
+					 engine->name, lri, dw);
+				dw++;
+				continue;
+			}
+
 			if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
 				pr_err("%s: Expected LRI command at dword %d, found %08x\n",
 				       engine->name, dw, lri);
@@ -3454,7 +3476,7 @@ static int live_lrc_layout(void *arg)
 			break;
 	}
 
-	kfree(mem);
+	kfree(lrc);
 	return err;
 }
 
diff --git a/drivers/gpu/drm/i915/gt/uc/Makefile b/drivers/gpu/drm/i915/gt/uc/Makefile
deleted file mode 100644
index bec94d434cb6..000000000000
--- a/drivers/gpu/drm/i915/gt/uc/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-# For building individual subdir files on the command line
-subdir-ccflags-y += -I$(srctree)/$(src)/../..
-
-# Extra header tests
-header-test-pattern-$(CONFIG_DRM_I915_WERROR) := *.h
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 3ffc6267f96e..64934a876a50 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -12,6 +12,9 @@
 
 #include "i915_drv.h"
 
+static const struct intel_uc_ops uc_ops_off;
+static const struct intel_uc_ops uc_ops_on;
+
 /* Reset GuC providing us with fresh state for both GuC and HuC.
  */
 static int __intel_uc_reset_hw(struct intel_uc *uc)
@@ -89,6 +92,11 @@ void intel_uc_init_early(struct intel_uc *uc)
 	intel_huc_init_early(&uc->huc);
 
 	__confirm_options(uc);
+
+	if (intel_uc_uses_guc(uc))
+		uc->ops = &uc_ops_on;
+	else
+		uc->ops = &uc_ops_off;
 }
 
 void intel_uc_driver_late_release(struct intel_uc *uc)
@@ -245,12 +253,11 @@ static void guc_disable_communication(struct intel_guc *guc)
 	DRM_INFO("GuC communication disabled\n");
 }
 
-void intel_uc_fetch_firmwares(struct intel_uc *uc)
+static void __uc_fetch_firmwares(struct intel_uc *uc)
 {
 	int err;
 
-	if (!intel_uc_uses_guc(uc))
-		return;
+	GEM_BUG_ON(!intel_uc_uses_guc(uc));
 
 	err = intel_uc_fw_fetch(&uc->guc.fw);
 	if (err)
@@ -260,20 +267,19 @@ void intel_uc_fetch_firmwares(struct intel_uc *uc)
 		intel_uc_fw_fetch(&uc->huc.fw);
 }
 
-void intel_uc_cleanup_firmwares(struct intel_uc *uc)
+static void __uc_cleanup_firmwares(struct intel_uc *uc)
 {
 	intel_uc_fw_cleanup_fetch(&uc->huc.fw);
 	intel_uc_fw_cleanup_fetch(&uc->guc.fw);
 }
 
-void intel_uc_init(struct intel_uc *uc)
+static void __uc_init(struct intel_uc *uc)
 {
 	struct intel_guc *guc = &uc->guc;
 	struct intel_huc *huc = &uc->huc;
 	int ret;
 
-	if (!intel_uc_uses_guc(uc))
-		return;
+	GEM_BUG_ON(!intel_uc_uses_guc(uc));
 
 	/* XXX: GuC submission is unavailable for now */
 	GEM_BUG_ON(intel_uc_supports_guc_submission(uc));
@@ -288,7 +294,7 @@ void intel_uc_init(struct intel_uc *uc)
 		intel_huc_init(huc);
 }
 
-void intel_uc_fini(struct intel_uc *uc)
+static void __uc_fini(struct intel_uc *uc)
 {
 	intel_huc_fini(&uc->huc);
 	intel_guc_fini(&uc->guc);
@@ -309,14 +315,6 @@ static int __uc_sanitize(struct intel_uc *uc)
 	return __intel_uc_reset_hw(uc);
 }
 
-void intel_uc_sanitize(struct intel_uc *uc)
-{
-	if (!intel_uc_supports_guc(uc))
-		return;
-
-	__uc_sanitize(uc);
-}
-
 /* Initialize and verify the uC regs related to uC positioning in WOPCM */
 static int uc_init_wopcm(struct intel_uc *uc)
 {
@@ -380,13 +378,8 @@ static bool uc_is_wopcm_locked(struct intel_uc *uc)
 	       (intel_uncore_read(uncore, DMA_GUC_WOPCM_OFFSET) & GUC_WOPCM_OFFSET_VALID);
 }
 
-int intel_uc_init_hw(struct intel_uc *uc)
+static int __uc_check_hw(struct intel_uc *uc)
 {
-	struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
-	struct intel_guc *guc = &uc->guc;
-	struct intel_huc *huc = &uc->huc;
-	int ret, attempts;
-
 	if (!intel_uc_supports_guc(uc))
 		return 0;
 
@@ -395,11 +388,24 @@ int intel_uc_init_hw(struct intel_uc *uc)
 	 * before on this system after reboot, otherwise we risk GPU hangs.
 	 * To check if GuC was loaded before we look at WOPCM registers.
 	 */
-	if (!intel_uc_uses_guc(uc) && !uc_is_wopcm_locked(uc))
-		return 0;
+	if (uc_is_wopcm_locked(uc))
+		return -EIO;
+
+	return 0;
+}
+
+static int __uc_init_hw(struct intel_uc *uc)
+{
+	struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
+	struct intel_guc *guc = &uc->guc;
+	struct intel_huc *huc = &uc->huc;
+	int ret, attempts;
+
+	GEM_BUG_ON(!intel_uc_supports_guc(uc));
+	GEM_BUG_ON(!intel_uc_uses_guc(uc));
 
 	if (!intel_uc_fw_is_available(&guc->fw)) {
-		ret = uc_is_wopcm_locked(uc) ||
+		ret = __uc_check_hw(uc) ||
 		      intel_uc_fw_is_overridden(&guc->fw) ||
 		      intel_uc_supports_guc_submission(uc) ?
 		      intel_uc_fw_status_to_error(guc->fw.status) : 0;
@@ -495,7 +501,7 @@ err_out:
 	return -EIO;
 }
 
-void intel_uc_fini_hw(struct intel_uc *uc)
+static void __uc_fini_hw(struct intel_uc *uc)
 {
 	struct intel_guc *guc = &uc->guc;
 
@@ -595,3 +601,20 @@ int intel_uc_runtime_resume(struct intel_uc *uc)
 	 */
 	return __uc_resume(uc, true);
 }
+
+static const struct intel_uc_ops uc_ops_off = {
+	.init_hw = __uc_check_hw,
+};
+
+static const struct intel_uc_ops uc_ops_on = {
+	.sanitize = __uc_sanitize,
+
+	.init_fw = __uc_fetch_firmwares,
+	.fini_fw = __uc_cleanup_firmwares,
+
+	.init = __uc_init,
+	.fini = __uc_fini,
+
+	.init_hw = __uc_init_hw,
+	.fini_hw = __uc_fini_hw,
+};
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
index 527995c21196..49c913524686 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
@@ -10,7 +10,20 @@
 #include "intel_huc.h"
 #include "i915_params.h"
 
+struct intel_uc;
+
+struct intel_uc_ops {
+	int (*sanitize)(struct intel_uc *uc);
+	void (*init_fw)(struct intel_uc *uc);
+	void (*fini_fw)(struct intel_uc *uc);
+	void (*init)(struct intel_uc *uc);
+	void (*fini)(struct intel_uc *uc);
+	int (*init_hw)(struct intel_uc *uc);
+	void (*fini_hw)(struct intel_uc *uc);
+};
+
 struct intel_uc {
+	struct intel_uc_ops const *ops;
 	struct intel_guc guc;
 	struct intel_huc huc;
 
@@ -21,13 +34,6 @@ struct intel_uc {
 void intel_uc_init_early(struct intel_uc *uc);
 void intel_uc_driver_late_release(struct intel_uc *uc);
 void intel_uc_init_mmio(struct intel_uc *uc);
-void intel_uc_fetch_firmwares(struct intel_uc *uc);
-void intel_uc_cleanup_firmwares(struct intel_uc *uc);
-void intel_uc_sanitize(struct intel_uc *uc);
-void intel_uc_init(struct intel_uc *uc);
-int intel_uc_init_hw(struct intel_uc *uc);
-void intel_uc_fini_hw(struct intel_uc *uc);
-void intel_uc_fini(struct intel_uc *uc);
 void intel_uc_reset_prepare(struct intel_uc *uc);
 void intel_uc_suspend(struct intel_uc *uc);
 void intel_uc_runtime_suspend(struct intel_uc *uc);
@@ -64,4 +70,20 @@ static inline bool intel_uc_uses_huc(struct intel_uc *uc)
 	return intel_huc_is_enabled(&uc->huc);
 }
 
+#define intel_uc_ops_function(_NAME, _OPS, _TYPE, _RET) \
+static inline _TYPE intel_uc_##_NAME(struct intel_uc *uc) \
+{ \
+	if (uc->ops->_OPS) \
+		return uc->ops->_OPS(uc); \
+	return _RET; \
+}
+intel_uc_ops_function(sanitize, sanitize, int, 0);
+intel_uc_ops_function(fetch_firmwares, init_fw, void, );
+intel_uc_ops_function(cleanup_firmwares, fini_fw, void, );
+intel_uc_ops_function(init, init, void, );
+intel_uc_ops_function(fini, fini, void, );
+intel_uc_ops_function(init_hw, init_hw, int, 0);
+intel_uc_ops_function(fini_hw, fini_hw, void, );
+#undef intel_uc_ops_function
+
 #endif
diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c
index e451298d11c3..2477a1e5a166 100644
--- a/drivers/gpu/drm/i915/gvt/dmabuf.c
+++ b/drivers/gpu/drm/i915/gvt/dmabuf.c
@@ -36,13 +36,32 @@
 
 #define GEN8_DECODE_PTE(pte) (pte & GENMASK_ULL(63, 12))
 
+static int vgpu_pin_dma_address(struct intel_vgpu *vgpu,
+				unsigned long size,
+				dma_addr_t dma_addr)
+{
+	int ret = 0;
+
+	if (intel_gvt_hypervisor_dma_pin_guest_page(vgpu, dma_addr))
+		ret = -EINVAL;
+
+	return ret;
+}
+
+static void vgpu_unpin_dma_address(struct intel_vgpu *vgpu,
+				   dma_addr_t dma_addr)
+{
+	intel_gvt_hypervisor_dma_unmap_guest_page(vgpu, dma_addr);
+}
+
 static int vgpu_gem_get_pages(
 		struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+	struct intel_vgpu *vgpu;
 	struct sg_table *st;
 	struct scatterlist *sg;
-	int i, ret;
+	int i, j, ret;
 	gen8_pte_t __iomem *gtt_entries;
 	struct intel_vgpu_fb_info *fb_info;
 	u32 page_num;
@@ -51,6 +70,10 @@ static int vgpu_gem_get_pages(
 	if (WARN_ON(!fb_info))
 		return -ENODEV;
 
+	vgpu = fb_info->obj->vgpu;
+	if (WARN_ON(!vgpu))
+		return -ENODEV;
+
 	st = kmalloc(sizeof(*st), GFP_KERNEL);
 	if (unlikely(!st))
 		return -ENOMEM;
@@ -64,21 +87,53 @@ static int vgpu_gem_get_pages(
 	gtt_entries = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm +
 		(fb_info->start >> PAGE_SHIFT);
 	for_each_sg(st->sgl, sg, page_num, i) {
+		dma_addr_t dma_addr =
+			GEN8_DECODE_PTE(readq(&gtt_entries[i]));
+		if (vgpu_pin_dma_address(vgpu, PAGE_SIZE, dma_addr)) {
+			ret = -EINVAL;
+			goto out;
+		}
+
 		sg->offset = 0;
 		sg->length = PAGE_SIZE;
-		sg_dma_address(sg) =
-			GEN8_DECODE_PTE(readq(&gtt_entries[i]));
 		sg_dma_len(sg) = PAGE_SIZE;
+		sg_dma_address(sg) = dma_addr;
 	}
 
 	__i915_gem_object_set_pages(obj, st, PAGE_SIZE);
+out:
+	if (ret) {
+		dma_addr_t dma_addr;
+
+		for_each_sg(st->sgl, sg, i, j) {
+			dma_addr = sg_dma_address(sg);
+			if (dma_addr)
+				vgpu_unpin_dma_address(vgpu, dma_addr);
+		}
+		sg_free_table(st);
+		kfree(st);
+	}
+
+	return ret;
 
-	return 0;
 }
 
 static void vgpu_gem_put_pages(struct drm_i915_gem_object *obj,
 		struct sg_table *pages)
 {
+	struct scatterlist *sg;
+
+	if (obj->base.dma_buf) {
+		struct intel_vgpu_fb_info *fb_info = obj->gvt_info;
+		struct intel_vgpu_dmabuf_obj *obj = fb_info->obj;
+		struct intel_vgpu *vgpu = obj->vgpu;
+		int i;
+
+		for_each_sg(pages->sgl, sg, fb_info->size, i)
+			vgpu_unpin_dma_address(vgpu,
+					       sg_dma_address(sg));
+	}
+
 	sg_free_table(pages);
 	kfree(pages);
 }
@@ -163,6 +218,7 @@ static struct drm_i915_gem_object *vgpu_create_gem(struct drm_device *dev,
 	drm_gem_private_object_init(dev, &obj->base,
 		roundup(info->size, PAGE_SIZE));
 	i915_gem_object_init(obj, &intel_vgpu_gem_ops, &lock_class);
+	i915_gem_object_set_readonly(obj);
 
 	obj->read_domains = I915_GEM_DOMAIN_GTT;
 	obj->write_domain = 0;
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index bb9fe6bf5275..6d28d72e6c7e 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -341,6 +341,10 @@ static int gdrst_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 			gvt_dbg_mmio("vgpu%d: request VCS2 Reset\n", vgpu->id);
 			engine_mask |= BIT(VCS1);
 		}
+		if (data & GEN9_GRDOM_GUC) {
+			gvt_dbg_mmio("vgpu%d: request GUC Reset\n", vgpu->id);
+			vgpu_vreg_t(vgpu, GUC_STATUS) |= GS_MIA_IN_RESET;
+		}
 		engine_mask &= INTEL_INFO(vgpu->gvt->dev_priv)->engine_mask;
 	}
 
@@ -1636,6 +1640,16 @@ static int edp_psr_imr_iir_write(struct intel_vgpu *vgpu,
 	return 0;
 }
 
+static int guc_status_read(struct intel_vgpu *vgpu,
+			   unsigned int offset, void *p_data,
+			   unsigned int bytes)
+{
+	/* keep MIA_IN_RESET before clearing */
+	read_vreg(vgpu, offset, p_data, bytes);
+	vgpu_vreg(vgpu, offset) &= ~GS_MIA_IN_RESET;
+	return 0;
+}
+
 static int mmio_read_from_hw(struct intel_vgpu *vgpu,
 		unsigned int offset, void *p_data, unsigned int bytes)
 {
@@ -2672,10 +2686,12 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 
 	MMIO_DH(EDP_PSR_IMR, D_BDW_PLUS, NULL, edp_psr_imr_iir_write);
 	MMIO_DH(EDP_PSR_IIR, D_BDW_PLUS, NULL, edp_psr_imr_iir_write);
+	MMIO_DH(GUC_STATUS, D_ALL, guc_status_read, NULL);
+
 	return 0;
 }
 
-static int init_broadwell_mmio_info(struct intel_gvt *gvt)
+static int init_bdw_mmio_info(struct intel_gvt *gvt)
 {
 	struct drm_i915_private *dev_priv = gvt->dev_priv;
 	int ret;
@@ -3364,20 +3380,20 @@ int intel_gvt_setup_mmio_info(struct intel_gvt *gvt)
 		goto err;
 
 	if (IS_BROADWELL(dev_priv)) {
-		ret = init_broadwell_mmio_info(gvt);
+		ret = init_bdw_mmio_info(gvt);
 		if (ret)
 			goto err;
 	} else if (IS_SKYLAKE(dev_priv)
 		|| IS_KABYLAKE(dev_priv)
 		|| IS_COFFEELAKE(dev_priv)) {
-		ret = init_broadwell_mmio_info(gvt);
+		ret = init_bdw_mmio_info(gvt);
 		if (ret)
 			goto err;
 		ret = init_skl_mmio_info(gvt);
 		if (ret)
 			goto err;
 	} else if (IS_BROXTON(dev_priv)) {
-		ret = init_broadwell_mmio_info(gvt);
+		ret = init_bdw_mmio_info(gvt);
 		if (ret)
 			goto err;
 		ret = init_skl_mmio_info(gvt);
diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h
index 9599c0a762b2..b17c4a1599cd 100644
--- a/drivers/gpu/drm/i915/gvt/hypercall.h
+++ b/drivers/gpu/drm/i915/gvt/hypercall.h
@@ -66,6 +66,8 @@ struct intel_gvt_mpt {
 				  unsigned long size, dma_addr_t *dma_addr);
 	void (*dma_unmap_guest_page)(unsigned long handle, dma_addr_t dma_addr);
 
+	int (*dma_pin_guest_page)(unsigned long handle, dma_addr_t dma_addr);
+
 	int (*map_gfn_to_mfn)(unsigned long handle, unsigned long gfn,
 			      unsigned long mfn, unsigned int nr, bool map);
 	int (*set_trap_area)(unsigned long handle, u64 start, u64 end,
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 04a5a0d90823..3259a1fa69e1 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -1916,6 +1916,28 @@ err_unlock:
 	return ret;
 }
 
+static int kvmgt_dma_pin_guest_page(unsigned long handle, dma_addr_t dma_addr)
+{
+	struct kvmgt_guest_info *info;
+	struct gvt_dma *entry;
+	int ret = 0;
+
+	if (!handle_valid(handle))
+		return -ENODEV;
+
+	info = (struct kvmgt_guest_info *)handle;
+
+	mutex_lock(&info->vgpu->vdev.cache_lock);
+	entry = __gvt_cache_find_dma_addr(info->vgpu, dma_addr);
+	if (entry)
+		kref_get(&entry->ref);
+	else
+		ret = -ENOMEM;
+	mutex_unlock(&info->vgpu->vdev.cache_lock);
+
+	return ret;
+}
+
 static void __gvt_dma_release(struct kref *ref)
 {
 	struct gvt_dma *entry = container_of(ref, typeof(*entry), ref);
@@ -2027,6 +2049,7 @@ static struct intel_gvt_mpt kvmgt_mpt = {
 	.gfn_to_mfn = kvmgt_gfn_to_pfn,
 	.dma_map_guest_page = kvmgt_dma_map_guest_page,
 	.dma_unmap_guest_page = kvmgt_dma_unmap_guest_page,
+	.dma_pin_guest_page = kvmgt_dma_pin_guest_page,
 	.set_opregion = kvmgt_set_opregion,
 	.set_edid = kvmgt_set_edid,
 	.get_vfio_device = kvmgt_get_vfio_device,
diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h
index 0f9440128123..9ad224df9c68 100644
--- a/drivers/gpu/drm/i915/gvt/mpt.h
+++ b/drivers/gpu/drm/i915/gvt/mpt.h
@@ -255,6 +255,21 @@ static inline void intel_gvt_hypervisor_dma_unmap_guest_page(
 }
 
 /**
+ * intel_gvt_hypervisor_dma_pin_guest_page - pin guest dma buf
+ * @vgpu: a vGPU
+ * @dma_addr: guest dma addr
+ *
+ * Returns:
+ * 0 on success, negative error code if failed.
+ */
+static inline int
+intel_gvt_hypervisor_dma_pin_guest_page(struct intel_vgpu *vgpu,
+					dma_addr_t dma_addr)
+{
+	return intel_gvt_host.mpt->dma_pin_guest_page(vgpu->handle, dma_addr);
+}
+
+/**
  * intel_gvt_hypervisor_map_gfn_to_mfn - map a GFN region to MFN
  * @vgpu: a vGPU
  * @gfn: guest PFN
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index b3299f88e24e..685d1e04a5ff 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -1224,7 +1224,7 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
 	enum intel_engine_id i;
 	int ret;
 
-	ppgtt = i915_ppgtt_create(i915);
+	ppgtt = i915_ppgtt_create(&i915->gt);
 	if (IS_ERR(ppgtt))
 		return PTR_ERR(ppgtt);
 
diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c
index d5a6e4e3d0fd..85bd9bf4f6ee 100644
--- a/drivers/gpu/drm/i915/gvt/vgpu.c
+++ b/drivers/gpu/drm/i915/gvt/vgpu.c
@@ -212,9 +212,9 @@ static void intel_gvt_update_vgpu_types(struct intel_gvt *gvt)
  */
 void intel_gvt_activate_vgpu(struct intel_vgpu *vgpu)
 {
-	mutex_lock(&vgpu->gvt->lock);
+	mutex_lock(&vgpu->vgpu_lock);
 	vgpu->active = true;
-	mutex_unlock(&vgpu->gvt->lock);
+	mutex_unlock(&vgpu->vgpu_lock);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index cfe09964622b..f3da5c06f331 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -605,12 +605,15 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 					    struct intel_engine_cs *engine)
 {
 	intel_engine_mask_t tmp, mask = engine->mask;
+	struct llist_node *pos = NULL, *next;
 	struct intel_gt *gt = engine->gt;
-	struct llist_node *pos, *next;
 	int err;
 
 	GEM_BUG_ON(i915_active_is_idle(ref));
-	GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers));
+
+	/* Wait until the previous preallocation is completed */
+	while (!llist_empty(&ref->preallocated_barriers))
+		cond_resched();
 
 	/*
 	 * Preallocate a node for each physical engine supporting the target
@@ -653,16 +656,24 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 		GEM_BUG_ON(rcu_access_pointer(node->base.fence) != ERR_PTR(-EAGAIN));
 
 		GEM_BUG_ON(barrier_to_engine(node) != engine);
-		llist_add(barrier_to_ll(node), &ref->preallocated_barriers);
+		next = barrier_to_ll(node);
+		next->next = pos;
+		if (!pos)
+			pos = next;
 		intel_engine_pm_get(engine);
 	}
 
+	GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers));
+	llist_add_batch(next, pos, &ref->preallocated_barriers);
+
 	return 0;
 
 unwind:
-	llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) {
+	while (pos) {
 		struct active_node *node = barrier_from_ll(pos);
 
+		pos = pos->next;
+
 		atomic_dec(&ref->count);
 		intel_engine_pm_put(barrier_to_engine(node));
 
diff --git a/drivers/gpu/drm/i915/i915_buddy.c b/drivers/gpu/drm/i915/i915_buddy.c
index e9d4200ce3bc..66883af64ca1 100644
--- a/drivers/gpu/drm/i915/i915_buddy.c
+++ b/drivers/gpu/drm/i915/i915_buddy.c
@@ -262,8 +262,10 @@ void i915_buddy_free_list(struct i915_buddy_mm *mm, struct list_head *objects)
 {
 	struct i915_buddy_block *block, *on;
 
-	list_for_each_entry_safe(block, on, objects, link)
+	list_for_each_entry_safe(block, on, objects, link) {
 		i915_buddy_free(mm, block);
+		cond_resched();
+	}
 	INIT_LIST_HEAD(objects);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index d28468eaed57..d5a9b8a964c2 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -321,16 +321,15 @@ static void print_context_stats(struct seq_file *m,
 
 		for_each_gem_engine(ce,
 				    i915_gem_context_lock_engines(ctx), it) {
-			intel_context_lock_pinned(ce);
-			if (intel_context_is_pinned(ce)) {
+			if (intel_context_pin_if_active(ce)) {
 				rcu_read_lock();
 				if (ce->state)
 					per_file_stats(0,
 						       ce->state->obj, &kstats);
 				per_file_stats(0, ce->ring->vma->obj, &kstats);
 				rcu_read_unlock();
+				intel_context_unpin(ce);
 			}
-			intel_context_unlock_pinned(ce);
 		}
 		i915_gem_context_unlock_engines(ctx);
 
@@ -367,12 +366,16 @@ static void print_context_stats(struct seq_file *m,
 static int i915_gem_object_info(struct seq_file *m, void *data)
 {
 	struct drm_i915_private *i915 = node_to_i915(m->private);
+	struct intel_memory_region *mr;
+	enum intel_region_id id;
 
 	seq_printf(m, "%u shrinkable [%u free] objects, %llu bytes\n",
 		   i915->mm.shrink_count,
 		   atomic_read(&i915->mm.free_count),
 		   i915->mm.shrink_memory);
-
+	for_each_memory_region(mr, i915, id)
+		seq_printf(m, "%s: total:%pa, available:%pa bytes\n",
+			   mr->name, &mr->total, &mr->avail);
 	seq_putc(m, '\n');
 
 	print_context_stats(m, i915);
@@ -682,7 +685,7 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data)
 static ssize_t gpu_state_read(struct file *file, char __user *ubuf,
 			      size_t count, loff_t *pos)
 {
-	struct i915_gpu_state *error;
+	struct i915_gpu_coredump *error;
 	ssize_t ret;
 	void *buf;
 
@@ -695,7 +698,7 @@ static ssize_t gpu_state_read(struct file *file, char __user *ubuf,
 	if (!buf)
 		return -ENOMEM;
 
-	ret = i915_gpu_state_copy_to_buffer(error, buf, *pos, count);
+	ret = i915_gpu_coredump_copy_to_buffer(error, buf, *pos, count);
 	if (ret <= 0)
 		goto out;
 
@@ -711,19 +714,19 @@ out:
 
 static int gpu_state_release(struct inode *inode, struct file *file)
 {
-	i915_gpu_state_put(file->private_data);
+	i915_gpu_coredump_put(file->private_data);
 	return 0;
 }
 
 static int i915_gpu_info_open(struct inode *inode, struct file *file)
 {
 	struct drm_i915_private *i915 = inode->i_private;
-	struct i915_gpu_state *gpu;
+	struct i915_gpu_coredump *gpu;
 	intel_wakeref_t wakeref;
 
 	gpu = NULL;
 	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
-		gpu = i915_capture_gpu_state(i915);
+		gpu = i915_gpu_coredump(i915);
 	if (IS_ERR(gpu))
 		return PTR_ERR(gpu);
 
@@ -745,7 +748,7 @@ i915_error_state_write(struct file *filp,
 		       size_t cnt,
 		       loff_t *ppos)
 {
-	struct i915_gpu_state *error = filp->private_data;
+	struct i915_gpu_coredump *error = filp->private_data;
 
 	if (!error)
 		return 0;
@@ -758,7 +761,7 @@ i915_error_state_write(struct file *filp,
 
 static int i915_error_state_open(struct inode *inode, struct file *file)
 {
-	struct i915_gpu_state *error;
+	struct i915_gpu_coredump *error;
 
 	error = i915_first_error_state(inode->i_private);
 	if (IS_ERR(error))
@@ -1001,7 +1004,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
 	return ret;
 }
 
-static int ironlake_drpc_info(struct seq_file *m)
+static int ilk_drpc_info(struct seq_file *m)
 {
 	struct drm_i915_private *i915 = node_to_i915(m->private);
 	struct intel_uncore *uncore = &i915->uncore;
@@ -1209,7 +1212,7 @@ static int i915_drpc_info(struct seq_file *m, void *unused)
 		else if (INTEL_GEN(dev_priv) >= 6)
 			err = gen6_drpc_info(m);
 		else
-			err = ironlake_drpc_info(m);
+			err = ilk_drpc_info(m);
 	}
 
 	return err;
@@ -1509,15 +1512,14 @@ static int i915_context_status(struct seq_file *m, void *unused)
 
 		for_each_gem_engine(ce,
 				    i915_gem_context_lock_engines(ctx), it) {
-			intel_context_lock_pinned(ce);
-			if (intel_context_is_pinned(ce)) {
+			if (intel_context_pin_if_active(ce)) {
 				seq_printf(m, "%s: ", ce->engine->name);
 				if (ce->state)
 					describe_obj(m, ce->state->obj);
 				describe_ctx_ring(m, ce->ring);
 				seq_putc(m, '\n');
+				intel_context_unpin(ce);
 			}
-			intel_context_unlock_pinned(ce);
 		}
 		i915_gem_context_unlock_engines(ctx);
 
@@ -1977,7 +1979,7 @@ static int i915_psr_sink_status_show(struct seq_file *m, void *data)
 	struct drm_connector *connector = m->private;
 	struct drm_i915_private *dev_priv = to_i915(connector->dev);
 	struct intel_dp *intel_dp =
-		enc_to_intel_dp(&intel_attached_encoder(connector)->base);
+		enc_to_intel_dp(intel_attached_encoder(to_intel_connector(connector)));
 	int ret;
 
 	if (!CAN_PSR(dev_priv)) {
@@ -2389,7 +2391,7 @@ static void intel_dp_info(struct seq_file *m,
 			  struct intel_connector *intel_connector)
 {
 	struct intel_encoder *intel_encoder = intel_connector->encoder;
-	struct intel_dp *intel_dp = enc_to_intel_dp(&intel_encoder->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(intel_encoder);
 
 	seq_printf(m, "\tDPCD rev: %x\n", intel_dp->dpcd[DP_DPCD_REV]);
 	seq_printf(m, "\taudio support: %s\n", yesno(intel_dp->has_audio));
@@ -2409,7 +2411,7 @@ static void intel_dp_mst_info(struct seq_file *m,
 {
 	struct intel_encoder *intel_encoder = intel_connector->encoder;
 	struct intel_dp_mst_encoder *intel_mst =
-		enc_to_mst(&intel_encoder->base);
+		enc_to_mst(intel_encoder);
 	struct intel_digital_port *intel_dig_port = intel_mst->primary;
 	struct intel_dp *intel_dp = &intel_dig_port->dp;
 	bool has_audio = drm_dp_mst_port_has_audio(&intel_dp->mst_mgr,
@@ -2422,7 +2424,7 @@ static void intel_hdmi_info(struct seq_file *m,
 			    struct intel_connector *intel_connector)
 {
 	struct intel_encoder *intel_encoder = intel_connector->encoder;
-	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&intel_encoder->base);
+	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(intel_encoder);
 
 	seq_printf(m, "\taudio support: %s\n", yesno(intel_hdmi->has_audio));
 	if (intel_connector->hdcp.shim) {
@@ -3012,11 +3014,11 @@ static int i915_dp_mst_info(struct seq_file *m, void *unused)
 		if (connector->connector_type != DRM_MODE_CONNECTOR_DisplayPort)
 			continue;
 
-		intel_encoder = intel_attached_encoder(connector);
+		intel_encoder = intel_attached_encoder(to_intel_connector(connector));
 		if (!intel_encoder || intel_encoder->type == INTEL_OUTPUT_DP_MST)
 			continue;
 
-		intel_dig_port = enc_to_dig_port(&intel_encoder->base);
+		intel_dig_port = enc_to_dig_port(intel_encoder);
 		if (!intel_dig_port->dp.can_mst)
 			continue;
 
@@ -3066,7 +3068,7 @@ static ssize_t i915_displayport_test_active_write(struct file *file,
 			continue;
 
 		if (encoder && connector->status == connector_status_connected) {
-			intel_dp = enc_to_intel_dp(&encoder->base);
+			intel_dp = enc_to_intel_dp(encoder);
 			status = kstrtoint(input_buffer, 10, &val);
 			if (status < 0)
 				break;
@@ -3075,9 +3077,9 @@ static ssize_t i915_displayport_test_active_write(struct file *file,
 			 * testing code, only accept an actual value of 1 here
 			 */
 			if (val == 1)
-				intel_dp->compliance.test_active = 1;
+				intel_dp->compliance.test_active = true;
 			else
-				intel_dp->compliance.test_active = 0;
+				intel_dp->compliance.test_active = false;
 		}
 	}
 	drm_connector_list_iter_end(&conn_iter);
@@ -3110,7 +3112,7 @@ static int i915_displayport_test_active_show(struct seq_file *m, void *data)
 			continue;
 
 		if (encoder && connector->status == connector_status_connected) {
-			intel_dp = enc_to_intel_dp(&encoder->base);
+			intel_dp = enc_to_intel_dp(encoder);
 			if (intel_dp->compliance.test_active)
 				seq_puts(m, "1");
 			else
@@ -3160,7 +3162,7 @@ static int i915_displayport_test_data_show(struct seq_file *m, void *data)
 			continue;
 
 		if (encoder && connector->status == connector_status_connected) {
-			intel_dp = enc_to_intel_dp(&encoder->base);
+			intel_dp = enc_to_intel_dp(encoder);
 			if (intel_dp->compliance.test_type ==
 			    DP_TEST_LINK_EDID_READ)
 				seq_printf(m, "%lx",
@@ -3204,7 +3206,7 @@ static int i915_displayport_test_type_show(struct seq_file *m, void *data)
 			continue;
 
 		if (encoder && connector->status == connector_status_connected) {
-			intel_dp = enc_to_intel_dp(&encoder->base);
+			intel_dp = enc_to_intel_dp(encoder);
 			seq_printf(m, "%02lx", intel_dp->compliance.test_type);
 		} else
 			seq_puts(m, "0");
@@ -3815,8 +3817,8 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
 #undef SS_MAX
 }
 
-static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv,
-					 struct sseu_dev_info *sseu)
+static void bdw_sseu_device_status(struct drm_i915_private *dev_priv,
+				   struct sseu_dev_info *sseu)
 {
 	const struct intel_runtime_info *info = RUNTIME_INFO(dev_priv);
 	u32 slice_info = I915_READ(GEN8_GT_SLICE_INFO);
@@ -3901,7 +3903,7 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
 		if (IS_CHERRYVIEW(dev_priv))
 			cherryview_sseu_device_status(dev_priv, &sseu);
 		else if (IS_BROADWELL(dev_priv))
-			broadwell_sseu_device_status(dev_priv, &sseu);
+			bdw_sseu_device_status(dev_priv, &sseu);
 		else if (IS_GEN(dev_priv, 9))
 			gen9_sseu_device_status(dev_priv, &sseu);
 		else if (INTEL_GEN(dev_priv) >= 10)
@@ -4142,14 +4144,14 @@ static int i915_drrs_ctl_set(void *data, u64 val)
 			      drm_connector_mask(connector)))
 				continue;
 
-			encoder = intel_attached_encoder(connector);
+			encoder = intel_attached_encoder(to_intel_connector(connector));
 			if (encoder->type != INTEL_OUTPUT_EDP)
 				continue;
 
 			DRM_DEBUG_DRIVER("Manually %sabling DRRS. %llu\n",
 						val ? "en" : "dis", val);
 
-			intel_dp = enc_to_intel_dp(&encoder->base);
+			intel_dp = enc_to_intel_dp(encoder);
 			if (val)
 				intel_edp_drrs_enable(intel_dp,
 						      crtc_state);
@@ -4353,7 +4355,7 @@ static int i915_dpcd_show(struct seq_file *m, void *data)
 {
 	struct drm_connector *connector = m->private;
 	struct intel_dp *intel_dp =
-		enc_to_intel_dp(&intel_attached_encoder(connector)->base);
+		enc_to_intel_dp(intel_attached_encoder(to_intel_connector(connector)));
 	u8 buf[16];
 	ssize_t err;
 	int i;
@@ -4388,7 +4390,7 @@ static int i915_panel_show(struct seq_file *m, void *data)
 {
 	struct drm_connector *connector = m->private;
 	struct intel_dp *intel_dp =
-		enc_to_intel_dp(&intel_attached_encoder(connector)->base);
+		enc_to_intel_dp(intel_attached_encoder(to_intel_connector(connector)));
 
 	if (connector->status != connector_status_connected)
 		return -ENODEV;
@@ -4466,7 +4468,7 @@ static int i915_dsc_fec_support_show(struct seq_file *m, void *data)
 		} else if (ret) {
 			break;
 		}
-		intel_dp = enc_to_intel_dp(&intel_attached_encoder(connector)->base);
+		intel_dp = enc_to_intel_dp(intel_attached_encoder(to_intel_connector(connector)));
 		crtc_state = to_intel_crtc_state(crtc->state);
 		seq_printf(m, "DSC_Enabled: %s\n",
 			   yesno(crtc_state->dsc.compression_enable));
@@ -4493,8 +4495,8 @@ static ssize_t i915_dsc_fec_support_write(struct file *file,
 	int ret;
 	struct drm_connector *connector =
 		((struct seq_file *)file->private_data)->private;
-	struct intel_encoder *encoder = intel_attached_encoder(connector);
-	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector));
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
 	if (len == 0)
 		return 0;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 59525094d0e3..f7385abdd74b 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -469,6 +469,12 @@ static void vlv_free_s0ix_state(struct drm_i915_private *i915)
 	i915->vlv_s0ix_state = NULL;
 }
 
+static void sanitize_gpu(struct drm_i915_private *i915)
+{
+	if (!INTEL_INFO(i915)->gpu_reset_clobbers_display)
+		__intel_gt_reset(&i915->gt, ALL_ENGINES);
+}
+
 /**
  * i915_driver_early_probe - setup state not requiring device access
  * @dev_priv: device private
@@ -602,6 +608,9 @@ static int i915_driver_mmio_probe(struct drm_i915_private *dev_priv)
 	if (ret)
 		goto err_uncore;
 
+	/* As early as possible, scrub existing GPU state before clobbering */
+	sanitize_gpu(dev_priv);
+
 	return 0;
 
 err_uncore:
@@ -1817,7 +1826,7 @@ static int i915_drm_resume(struct drm_device *dev)
 
 	disable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
 
-	intel_gt_sanitize(&dev_priv->gt, true);
+	sanitize_gpu(dev_priv);
 
 	ret = i915_ggtt_enable_hw(dev_priv);
 	if (ret)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d05a968227f7..077af22b8340 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -46,6 +46,7 @@
 #include <linux/dma-resv.h>
 #include <linux/shmem_fs.h>
 #include <linux/stackdepot.h>
+#include <linux/xarray.h>
 
 #include <drm/intel-gtt.h>
 #include <drm/drm_legacy.h> /* for struct drm_dma_handle */
@@ -110,8 +111,8 @@
 
 #define DRIVER_NAME		"i915"
 #define DRIVER_DESC		"Intel Graphics"
-#define DRIVER_DATE		"20191223"
-#define DRIVER_TIMESTAMP	1577120893
+#define DRIVER_DATE		"20200114"
+#define DRIVER_TIMESTAMP	1579001978
 
 struct drm_i915_gem_object;
 
@@ -201,8 +202,7 @@ struct drm_i915_file_private {
 		struct list_head request_list;
 	} mm;
 
-	struct idr context_idr;
-	struct mutex context_idr_lock; /* guards context_idr */
+	struct xarray context_xa;
 
 	struct idr vm_idr;
 	struct mutex vm_idr_lock; /* guards vm_idr */
@@ -505,6 +505,7 @@ struct i915_psr {
 	bool dc3co_enabled;
 	u32 dc3co_exit_delay;
 	struct delayed_work idle_work;
+	bool initially_probed;
 };
 
 #define QUIRK_LVDS_SSC_DISABLE (1<<1)
@@ -1252,6 +1253,16 @@ struct drm_i915_private {
 			struct llist_head free_list;
 			struct work_struct free_work;
 		} contexts;
+
+		/*
+		 * We replace the local file with a global mappings as the
+		 * backing storage for the mmap is on the device and not
+		 * on the struct file, and we do not want to prolong the
+		 * lifetime of the local fd. To minimise the number of
+		 * anonymous inodes we create, we use a global singleton to
+		 * share the global mapping.
+		 */
+		struct file *mmap_singleton;
 	} gem;
 
 	u8 pch_ssc_use;
@@ -1657,8 +1668,10 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 	(IS_BROADWELL(dev_priv) || IS_GEN(dev_priv, 9))
 
 /* WaRsDisableCoarsePowerGating:skl,cnl */
-#define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \
-	IS_GEN_RANGE(dev_priv, 9, 10)
+#define NEEDS_WaRsDisableCoarsePowerGating(dev_priv)			\
+	(IS_CANNONLAKE(dev_priv) ||					\
+	 IS_SKL_GT3(dev_priv) ||					\
+	 IS_SKL_GT4(dev_priv))
 
 #define HAS_GMBUS_IRQ(dev_priv) (INTEL_GEN(dev_priv) >= 4)
 #define HAS_GMBUS_BURST_READ(dev_priv) (INTEL_GEN(dev_priv) >= 10 || \
@@ -1861,7 +1874,7 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error)
 }
 
 static inline u32 i915_reset_engine_count(struct i915_gpu_error *error,
-					  struct intel_engine_cs *engine)
+					  const struct intel_engine_cs *engine)
 {
 	return atomic_read(&error->reset_engine_count[engine->uabi_class]);
 }
@@ -1889,7 +1902,7 @@ struct dma_buf *i915_gem_prime_export(struct drm_gem_object *gem_obj, int flags)
 static inline struct i915_gem_context *
 __i915_gem_context_lookup_rcu(struct drm_i915_file_private *file_priv, u32 id)
 {
-	return idr_find(&file_priv->context_idr, id);
+	return xa_load(&file_priv->context_xa, id);
 }
 
 static inline struct i915_gem_context *
@@ -2015,6 +2028,9 @@ int i915_reg_read_ioctl(struct drm_device *dev, void *data,
 int remap_io_mapping(struct vm_area_struct *vma,
 		     unsigned long addr, unsigned long pfn, unsigned long size,
 		     struct io_mapping *iomap);
+int remap_io_sg(struct vm_area_struct *vma,
+		unsigned long addr, unsigned long size,
+		struct scatterlist *sgl, resource_size_t iobase);
 
 static inline int intel_hws_csb_write_index(struct drm_i915_private *i915)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9ddcf17230e6..94f993e4c12f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -45,6 +45,7 @@
 #include "gem/i915_gem_context.h"
 #include "gem/i915_gem_ioctls.h"
 #include "gem/i915_gem_mman.h"
+#include "gem/i915_gem_region.h"
 #include "gt/intel_engine_user.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
@@ -200,7 +201,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
 
 static int
 i915_gem_create(struct drm_file *file,
-		struct drm_i915_private *dev_priv,
+		struct intel_memory_region *mr,
 		u64 *size_p,
 		u32 *handle_p)
 {
@@ -209,12 +210,16 @@ i915_gem_create(struct drm_file *file,
 	u64 size;
 	int ret;
 
-	size = round_up(*size_p, PAGE_SIZE);
+	GEM_BUG_ON(!is_power_of_2(mr->min_page_size));
+	size = round_up(*size_p, mr->min_page_size);
 	if (size == 0)
 		return -EINVAL;
 
+	/* For most of the ABI (e.g. mmap) we think in system pages */
+	GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
+
 	/* Allocate the new object */
-	obj = i915_gem_object_create_shmem(dev_priv, size);
+	obj = i915_gem_object_create_region(mr, size, 0);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -234,6 +239,7 @@ i915_gem_dumb_create(struct drm_file *file,
 		     struct drm_device *dev,
 		     struct drm_mode_create_dumb *args)
 {
+	enum intel_memory_type mem_type;
 	int cpp = DIV_ROUND_UP(args->bpp, 8);
 	u32 format;
 
@@ -260,7 +266,14 @@ i915_gem_dumb_create(struct drm_file *file,
 		args->pitch = ALIGN(args->pitch, 4096);
 
 	args->size = args->pitch * args->height;
-	return i915_gem_create(file, to_i915(dev),
+
+	mem_type = INTEL_MEMORY_SYSTEM;
+	if (HAS_LMEM(to_i915(dev)))
+		mem_type = INTEL_MEMORY_LOCAL;
+
+	return i915_gem_create(file,
+			       intel_memory_region_by_type(to_i915(dev),
+							   mem_type),
 			       &args->size, &args->handle);
 }
 
@@ -274,12 +287,14 @@ int
 i915_gem_create_ioctl(struct drm_device *dev, void *data,
 		      struct drm_file *file)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_private *i915 = to_i915(dev);
 	struct drm_i915_gem_create *args = data;
 
-	i915_gem_flush_free_objects(dev_priv);
+	i915_gem_flush_free_objects(i915);
 
-	return i915_gem_create(file, dev_priv,
+	return i915_gem_create(file,
+			       intel_memory_region_by_type(i915,
+							   INTEL_MEMORY_SYSTEM),
 			       &args->size, &args->handle);
 }
 
@@ -1172,6 +1187,8 @@ void i915_gem_driver_remove(struct drm_i915_private *dev_priv)
 
 void i915_gem_driver_release(struct drm_i915_private *dev_priv)
 {
+	i915_gem_driver_release__contexts(dev_priv);
+
 	intel_gt_driver_release(&dev_priv->gt);
 
 	intel_wa_list_free(&dev_priv->gt_wa_list);
@@ -1179,8 +1196,6 @@ void i915_gem_driver_release(struct drm_i915_private *dev_priv)
 	intel_uc_cleanup_firmwares(&dev_priv->gt.uc);
 	i915_gem_cleanup_userptr(dev_priv);
 
-	i915_gem_driver_release__contexts(dev_priv);
-
 	i915_gem_drain_freed_objects(dev_priv);
 
 	WARN_ON(!list_empty(&dev_priv->gem.contexts.list));
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index 71efccfde122..d9c34a23cd67 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -412,6 +412,9 @@ int i915_vma_pin_fence(struct i915_vma *vma)
 {
 	int err;
 
+	if (!vma->fence && !i915_gem_object_is_tiled(vma->obj))
+		return 0;
+
 	/*
 	 * Note that we revoke fences on runtime suspend. Therefore the user
 	 * must keep the device awake whilst using the fence.
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 1efe58ad0ce9..e039eb56900f 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1,26 +1,7 @@
+// SPDX-License-Identifier: MIT
 /*
  * Copyright © 2010 Daniel Vetter
- * Copyright © 2011-2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
+ * Copyright © 2020 Intel Corporation
  */
 
 #include <linux/slab.h> /* fault-inject.h is not standalone! */
@@ -45,2116 +26,6 @@
 #include "i915_trace.h"
 #include "i915_vgpu.h"
 
-#define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
-
-#if IS_ENABLED(CONFIG_DRM_I915_TRACE_GTT)
-#define DBG(...) trace_printk(__VA_ARGS__)
-#else
-#define DBG(...)
-#endif
-
-#define NALLOC 3 /* 1 normal, 1 for concurrent threads, 1 for preallocation */
-
-/**
- * DOC: Global GTT views
- *
- * Background and previous state
- *
- * Historically objects could exists (be bound) in global GTT space only as
- * singular instances with a view representing all of the object's backing pages
- * in a linear fashion. This view will be called a normal view.
- *
- * To support multiple views of the same object, where the number of mapped
- * pages is not equal to the backing store, or where the layout of the pages
- * is not linear, concept of a GGTT view was added.
- *
- * One example of an alternative view is a stereo display driven by a single
- * image. In this case we would have a framebuffer looking like this
- * (2x2 pages):
- *
- *    12
- *    34
- *
- * Above would represent a normal GGTT view as normally mapped for GPU or CPU
- * rendering. In contrast, fed to the display engine would be an alternative
- * view which could look something like this:
- *
- *   1212
- *   3434
- *
- * In this example both the size and layout of pages in the alternative view is
- * different from the normal view.
- *
- * Implementation and usage
- *
- * GGTT views are implemented using VMAs and are distinguished via enum
- * i915_ggtt_view_type and struct i915_ggtt_view.
- *
- * A new flavour of core GEM functions which work with GGTT bound objects were
- * added with the _ggtt_ infix, and sometimes with _view postfix to avoid
- * renaming  in large amounts of code. They take the struct i915_ggtt_view
- * parameter encapsulating all metadata required to implement a view.
- *
- * As a helper for callers which are only interested in the normal view,
- * globally const i915_ggtt_view_normal singleton instance exists. All old core
- * GEM API functions, the ones not taking the view parameter, are operating on,
- * or with the normal GGTT view.
- *
- * Code wanting to add or use a new GGTT view needs to:
- *
- * 1. Add a new enum with a suitable name.
- * 2. Extend the metadata in the i915_ggtt_view structure if required.
- * 3. Add support to i915_get_vma_pages().
- *
- * New views are required to build a scatter-gather table from within the
- * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and
- * exists for the lifetime of an VMA.
- *
- * Core API is designed to have copy semantics which means that passed in
- * struct i915_ggtt_view does not need to be persistent (left around after
- * calling the core API functions).
- *
- */
-
-#define as_pd(x) container_of((x), typeof(struct i915_page_directory), pt)
-
-static int
-i915_get_ggtt_vma_pages(struct i915_vma *vma);
-
-static void gen6_ggtt_invalidate(struct i915_ggtt *ggtt)
-{
-	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
-
-	spin_lock_irq(&uncore->lock);
-	intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
-	intel_uncore_read_fw(uncore, GFX_FLSH_CNTL_GEN6);
-	spin_unlock_irq(&uncore->lock);
-}
-
-static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
-{
-	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
-
-	/*
-	 * Note that as an uncached mmio write, this will flush the
-	 * WCB of the writes into the GGTT before it triggers the invalidate.
-	 */
-	intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
-}
-
-static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
-{
-	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
-	struct drm_i915_private *i915 = ggtt->vm.i915;
-
-	gen8_ggtt_invalidate(ggtt);
-
-	if (INTEL_GEN(i915) >= 12)
-		intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR,
-				      GEN12_GUC_TLB_INV_CR_INVALIDATE);
-	else
-		intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE);
-}
-
-static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt)
-{
-	intel_gtt_chipset_flush();
-}
-
-static int ppgtt_bind_vma(struct i915_vma *vma,
-			  enum i915_cache_level cache_level,
-			  u32 flags)
-{
-	u32 pte_flags;
-	int err;
-
-	if (flags & I915_VMA_ALLOC) {
-		err = vma->vm->allocate_va_range(vma->vm,
-						 vma->node.start, vma->size);
-		if (err)
-			return err;
-
-		set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma));
-	}
-
-	/* Applicable to VLV, and gen8+ */
-	pte_flags = 0;
-	if (i915_gem_object_is_readonly(vma->obj))
-		pte_flags |= PTE_READ_ONLY;
-
-	GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)));
-	vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
-	wmb();
-
-	return 0;
-}
-
-static void ppgtt_unbind_vma(struct i915_vma *vma)
-{
-	if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)))
-		vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
-}
-
-static int ppgtt_set_pages(struct i915_vma *vma)
-{
-	GEM_BUG_ON(vma->pages);
-
-	vma->pages = vma->obj->mm.pages;
-
-	vma->page_sizes = vma->obj->mm.page_sizes;
-
-	return 0;
-}
-
-static void clear_pages(struct i915_vma *vma)
-{
-	GEM_BUG_ON(!vma->pages);
-
-	if (vma->pages != vma->obj->mm.pages) {
-		sg_free_table(vma->pages);
-		kfree(vma->pages);
-	}
-	vma->pages = NULL;
-
-	memset(&vma->page_sizes, 0, sizeof(vma->page_sizes));
-}
-
-static u64 gen8_pte_encode(dma_addr_t addr,
-			   enum i915_cache_level level,
-			   u32 flags)
-{
-	gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW;
-
-	if (unlikely(flags & PTE_READ_ONLY))
-		pte &= ~_PAGE_RW;
-
-	switch (level) {
-	case I915_CACHE_NONE:
-		pte |= PPAT_UNCACHED;
-		break;
-	case I915_CACHE_WT:
-		pte |= PPAT_DISPLAY_ELLC;
-		break;
-	default:
-		pte |= PPAT_CACHED;
-		break;
-	}
-
-	return pte;
-}
-
-static u64 gen8_pde_encode(const dma_addr_t addr,
-			   const enum i915_cache_level level)
-{
-	u64 pde = _PAGE_PRESENT | _PAGE_RW;
-	pde |= addr;
-	if (level != I915_CACHE_NONE)
-		pde |= PPAT_CACHED_PDE;
-	else
-		pde |= PPAT_UNCACHED;
-	return pde;
-}
-
-static u64 snb_pte_encode(dma_addr_t addr,
-			  enum i915_cache_level level,
-			  u32 flags)
-{
-	gen6_pte_t pte = GEN6_PTE_VALID;
-	pte |= GEN6_PTE_ADDR_ENCODE(addr);
-
-	switch (level) {
-	case I915_CACHE_L3_LLC:
-	case I915_CACHE_LLC:
-		pte |= GEN6_PTE_CACHE_LLC;
-		break;
-	case I915_CACHE_NONE:
-		pte |= GEN6_PTE_UNCACHED;
-		break;
-	default:
-		MISSING_CASE(level);
-	}
-
-	return pte;
-}
-
-static u64 ivb_pte_encode(dma_addr_t addr,
-			  enum i915_cache_level level,
-			  u32 flags)
-{
-	gen6_pte_t pte = GEN6_PTE_VALID;
-	pte |= GEN6_PTE_ADDR_ENCODE(addr);
-
-	switch (level) {
-	case I915_CACHE_L3_LLC:
-		pte |= GEN7_PTE_CACHE_L3_LLC;
-		break;
-	case I915_CACHE_LLC:
-		pte |= GEN6_PTE_CACHE_LLC;
-		break;
-	case I915_CACHE_NONE:
-		pte |= GEN6_PTE_UNCACHED;
-		break;
-	default:
-		MISSING_CASE(level);
-	}
-
-	return pte;
-}
-
-static u64 byt_pte_encode(dma_addr_t addr,
-			  enum i915_cache_level level,
-			  u32 flags)
-{
-	gen6_pte_t pte = GEN6_PTE_VALID;
-	pte |= GEN6_PTE_ADDR_ENCODE(addr);
-
-	if (!(flags & PTE_READ_ONLY))
-		pte |= BYT_PTE_WRITEABLE;
-
-	if (level != I915_CACHE_NONE)
-		pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
-
-	return pte;
-}
-
-static u64 hsw_pte_encode(dma_addr_t addr,
-			  enum i915_cache_level level,
-			  u32 flags)
-{
-	gen6_pte_t pte = GEN6_PTE_VALID;
-	pte |= HSW_PTE_ADDR_ENCODE(addr);
-
-	if (level != I915_CACHE_NONE)
-		pte |= HSW_WB_LLC_AGE3;
-
-	return pte;
-}
-
-static u64 iris_pte_encode(dma_addr_t addr,
-			   enum i915_cache_level level,
-			   u32 flags)
-{
-	gen6_pte_t pte = GEN6_PTE_VALID;
-	pte |= HSW_PTE_ADDR_ENCODE(addr);
-
-	switch (level) {
-	case I915_CACHE_NONE:
-		break;
-	case I915_CACHE_WT:
-		pte |= HSW_WT_ELLC_LLC_AGE3;
-		break;
-	default:
-		pte |= HSW_WB_ELLC_LLC_AGE3;
-		break;
-	}
-
-	return pte;
-}
-
-static void stash_init(struct pagestash *stash)
-{
-	pagevec_init(&stash->pvec);
-	spin_lock_init(&stash->lock);
-}
-
-static struct page *stash_pop_page(struct pagestash *stash)
-{
-	struct page *page = NULL;
-
-	spin_lock(&stash->lock);
-	if (likely(stash->pvec.nr))
-		page = stash->pvec.pages[--stash->pvec.nr];
-	spin_unlock(&stash->lock);
-
-	return page;
-}
-
-static void stash_push_pagevec(struct pagestash *stash, struct pagevec *pvec)
-{
-	unsigned int nr;
-
-	spin_lock_nested(&stash->lock, SINGLE_DEPTH_NESTING);
-
-	nr = min_t(typeof(nr), pvec->nr, pagevec_space(&stash->pvec));
-	memcpy(stash->pvec.pages + stash->pvec.nr,
-	       pvec->pages + pvec->nr - nr,
-	       sizeof(pvec->pages[0]) * nr);
-	stash->pvec.nr += nr;
-
-	spin_unlock(&stash->lock);
-
-	pvec->nr -= nr;
-}
-
-static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
-{
-	struct pagevec stack;
-	struct page *page;
-
-	if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
-		i915_gem_shrink_all(vm->i915);
-
-	page = stash_pop_page(&vm->free_pages);
-	if (page)
-		return page;
-
-	if (!vm->pt_kmap_wc)
-		return alloc_page(gfp);
-
-	/* Look in our global stash of WC pages... */
-	page = stash_pop_page(&vm->i915->mm.wc_stash);
-	if (page)
-		return page;
-
-	/*
-	 * Otherwise batch allocate pages to amortize cost of set_pages_wc.
-	 *
-	 * We have to be careful as page allocation may trigger the shrinker
-	 * (via direct reclaim) which will fill up the WC stash underneath us.
-	 * So we add our WB pages into a temporary pvec on the stack and merge
-	 * them into the WC stash after all the allocations are complete.
-	 */
-	pagevec_init(&stack);
-	do {
-		struct page *page;
-
-		page = alloc_page(gfp);
-		if (unlikely(!page))
-			break;
-
-		stack.pages[stack.nr++] = page;
-	} while (pagevec_space(&stack));
-
-	if (stack.nr && !set_pages_array_wc(stack.pages, stack.nr)) {
-		page = stack.pages[--stack.nr];
-
-		/* Merge spare WC pages to the global stash */
-		if (stack.nr)
-			stash_push_pagevec(&vm->i915->mm.wc_stash, &stack);
-
-		/* Push any surplus WC pages onto the local VM stash */
-		if (stack.nr)
-			stash_push_pagevec(&vm->free_pages, &stack);
-	}
-
-	/* Return unwanted leftovers */
-	if (unlikely(stack.nr)) {
-		WARN_ON_ONCE(set_pages_array_wb(stack.pages, stack.nr));
-		__pagevec_release(&stack);
-	}
-
-	return page;
-}
-
-static void vm_free_pages_release(struct i915_address_space *vm,
-				  bool immediate)
-{
-	struct pagevec *pvec = &vm->free_pages.pvec;
-	struct pagevec stack;
-
-	lockdep_assert_held(&vm->free_pages.lock);
-	GEM_BUG_ON(!pagevec_count(pvec));
-
-	if (vm->pt_kmap_wc) {
-		/*
-		 * When we use WC, first fill up the global stash and then
-		 * only if full immediately free the overflow.
-		 */
-		stash_push_pagevec(&vm->i915->mm.wc_stash, pvec);
-
-		/*
-		 * As we have made some room in the VM's free_pages,
-		 * we can wait for it to fill again. Unless we are
-		 * inside i915_address_space_fini() and must
-		 * immediately release the pages!
-		 */
-		if (pvec->nr <= (immediate ? 0 : PAGEVEC_SIZE - 1))
-			return;
-
-		/*
-		 * We have to drop the lock to allow ourselves to sleep,
-		 * so take a copy of the pvec and clear the stash for
-		 * others to use it as we sleep.
-		 */
-		stack = *pvec;
-		pagevec_reinit(pvec);
-		spin_unlock(&vm->free_pages.lock);
-
-		pvec = &stack;
-		set_pages_array_wb(pvec->pages, pvec->nr);
-
-		spin_lock(&vm->free_pages.lock);
-	}
-
-	__pagevec_release(pvec);
-}
-
-static void vm_free_page(struct i915_address_space *vm, struct page *page)
-{
-	/*
-	 * On !llc, we need to change the pages back to WB. We only do so
-	 * in bulk, so we rarely need to change the page attributes here,
-	 * but doing so requires a stop_machine() from deep inside arch/x86/mm.
-	 * To make detection of the possible sleep more likely, use an
-	 * unconditional might_sleep() for everybody.
-	 */
-	might_sleep();
-	spin_lock(&vm->free_pages.lock);
-	while (!pagevec_space(&vm->free_pages.pvec))
-		vm_free_pages_release(vm, false);
-	GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec) >= PAGEVEC_SIZE);
-	pagevec_add(&vm->free_pages.pvec, page);
-	spin_unlock(&vm->free_pages.lock);
-}
-
-static void i915_address_space_fini(struct i915_address_space *vm)
-{
-	spin_lock(&vm->free_pages.lock);
-	if (pagevec_count(&vm->free_pages.pvec))
-		vm_free_pages_release(vm, true);
-	GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec));
-	spin_unlock(&vm->free_pages.lock);
-
-	drm_mm_takedown(&vm->mm);
-
-	mutex_destroy(&vm->mutex);
-}
-
-void __i915_vm_close(struct i915_address_space *vm)
-{
-	struct i915_vma *vma, *vn;
-
-	mutex_lock(&vm->mutex);
-	list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) {
-		struct drm_i915_gem_object *obj = vma->obj;
-
-		/* Keep the obj (and hence the vma) alive as _we_ destroy it */
-		if (!kref_get_unless_zero(&obj->base.refcount))
-			continue;
-
-		atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
-		WARN_ON(__i915_vma_unbind(vma));
-		__i915_vma_put(vma);
-
-		i915_gem_object_put(obj);
-	}
-	GEM_BUG_ON(!list_empty(&vm->bound_list));
-	mutex_unlock(&vm->mutex);
-}
-
-static void __i915_vm_release(struct work_struct *work)
-{
-	struct i915_address_space *vm =
-		container_of(work, struct i915_address_space, rcu.work);
-
-	vm->cleanup(vm);
-	i915_address_space_fini(vm);
-
-	kfree(vm);
-}
-
-void i915_vm_release(struct kref *kref)
-{
-	struct i915_address_space *vm =
-		container_of(kref, struct i915_address_space, ref);
-
-	GEM_BUG_ON(i915_is_ggtt(vm));
-	trace_i915_ppgtt_release(vm);
-
-	queue_rcu_work(vm->i915->wq, &vm->rcu);
-}
-
-static void i915_address_space_init(struct i915_address_space *vm, int subclass)
-{
-	kref_init(&vm->ref);
-	INIT_RCU_WORK(&vm->rcu, __i915_vm_release);
-	atomic_set(&vm->open, 1);
-
-	/*
-	 * The vm->mutex must be reclaim safe (for use in the shrinker).
-	 * Do a dummy acquire now under fs_reclaim so that any allocation
-	 * attempt holding the lock is immediately reported by lockdep.
-	 */
-	mutex_init(&vm->mutex);
-	lockdep_set_subclass(&vm->mutex, subclass);
-	i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex);
-
-	GEM_BUG_ON(!vm->total);
-	drm_mm_init(&vm->mm, 0, vm->total);
-	vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
-
-	stash_init(&vm->free_pages);
-
-	INIT_LIST_HEAD(&vm->bound_list);
-}
-
-static int __setup_page_dma(struct i915_address_space *vm,
-			    struct i915_page_dma *p,
-			    gfp_t gfp)
-{
-	p->page = vm_alloc_page(vm, gfp | I915_GFP_ALLOW_FAIL);
-	if (unlikely(!p->page))
-		return -ENOMEM;
-
-	p->daddr = dma_map_page_attrs(vm->dma,
-				      p->page, 0, PAGE_SIZE,
-				      PCI_DMA_BIDIRECTIONAL,
-				      DMA_ATTR_SKIP_CPU_SYNC |
-				      DMA_ATTR_NO_WARN);
-	if (unlikely(dma_mapping_error(vm->dma, p->daddr))) {
-		vm_free_page(vm, p->page);
-		return -ENOMEM;
-	}
-
-	return 0;
-}
-
-static int setup_page_dma(struct i915_address_space *vm,
-			  struct i915_page_dma *p)
-{
-	return __setup_page_dma(vm, p, __GFP_HIGHMEM);
-}
-
-static void cleanup_page_dma(struct i915_address_space *vm,
-			     struct i915_page_dma *p)
-{
-	dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
-	vm_free_page(vm, p->page);
-}
-
-#define kmap_atomic_px(px) kmap_atomic(px_base(px)->page)
-
-static void
-fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count)
-{
-	kunmap_atomic(memset64(kmap_atomic(p->page), val, count));
-}
-
-#define fill_px(px, v) fill_page_dma(px_base(px), (v), PAGE_SIZE / sizeof(u64))
-#define fill32_px(px, v) do {						\
-	u64 v__ = lower_32_bits(v);					\
-	fill_px((px), v__ << 32 | v__);					\
-} while (0)
-
-static int
-setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
-{
-	unsigned long size;
-
-	/*
-	 * In order to utilize 64K pages for an object with a size < 2M, we will
-	 * need to support a 64K scratch page, given that every 16th entry for a
-	 * page-table operating in 64K mode must point to a properly aligned 64K
-	 * region, including any PTEs which happen to point to scratch.
-	 *
-	 * This is only relevant for the 48b PPGTT where we support
-	 * huge-gtt-pages, see also i915_vma_insert(). However, as we share the
-	 * scratch (read-only) between all vm, we create one 64k scratch page
-	 * for all.
-	 */
-	size = I915_GTT_PAGE_SIZE_4K;
-	if (i915_vm_is_4lvl(vm) &&
-	    HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) {
-		size = I915_GTT_PAGE_SIZE_64K;
-		gfp |= __GFP_NOWARN;
-	}
-	gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL;
-
-	do {
-		unsigned int order = get_order(size);
-		struct page *page;
-		dma_addr_t addr;
-
-		page = alloc_pages(gfp, order);
-		if (unlikely(!page))
-			goto skip;
-
-		addr = dma_map_page_attrs(vm->dma,
-					  page, 0, size,
-					  PCI_DMA_BIDIRECTIONAL,
-					  DMA_ATTR_SKIP_CPU_SYNC |
-					  DMA_ATTR_NO_WARN);
-		if (unlikely(dma_mapping_error(vm->dma, addr)))
-			goto free_page;
-
-		if (unlikely(!IS_ALIGNED(addr, size)))
-			goto unmap_page;
-
-		vm->scratch[0].base.page = page;
-		vm->scratch[0].base.daddr = addr;
-		vm->scratch_order = order;
-		return 0;
-
-unmap_page:
-		dma_unmap_page(vm->dma, addr, size, PCI_DMA_BIDIRECTIONAL);
-free_page:
-		__free_pages(page, order);
-skip:
-		if (size == I915_GTT_PAGE_SIZE_4K)
-			return -ENOMEM;
-
-		size = I915_GTT_PAGE_SIZE_4K;
-		gfp &= ~__GFP_NOWARN;
-	} while (1);
-}
-
-static void cleanup_scratch_page(struct i915_address_space *vm)
-{
-	struct i915_page_dma *p = px_base(&vm->scratch[0]);
-	unsigned int order = vm->scratch_order;
-
-	dma_unmap_page(vm->dma, p->daddr, BIT(order) << PAGE_SHIFT,
-		       PCI_DMA_BIDIRECTIONAL);
-	__free_pages(p->page, order);
-}
-
-static void free_scratch(struct i915_address_space *vm)
-{
-	int i;
-
-	if (!px_dma(&vm->scratch[0])) /* set to 0 on clones */
-		return;
-
-	for (i = 1; i <= vm->top; i++) {
-		if (!px_dma(&vm->scratch[i]))
-			break;
-		cleanup_page_dma(vm, px_base(&vm->scratch[i]));
-	}
-
-	cleanup_scratch_page(vm);
-}
-
-static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
-{
-	struct i915_page_table *pt;
-
-	pt = kmalloc(sizeof(*pt), I915_GFP_ALLOW_FAIL);
-	if (unlikely(!pt))
-		return ERR_PTR(-ENOMEM);
-
-	if (unlikely(setup_page_dma(vm, &pt->base))) {
-		kfree(pt);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	atomic_set(&pt->used, 0);
-	return pt;
-}
-
-static struct i915_page_directory *__alloc_pd(size_t sz)
-{
-	struct i915_page_directory *pd;
-
-	pd = kzalloc(sz, I915_GFP_ALLOW_FAIL);
-	if (unlikely(!pd))
-		return NULL;
-
-	spin_lock_init(&pd->lock);
-	return pd;
-}
-
-static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
-{
-	struct i915_page_directory *pd;
-
-	pd = __alloc_pd(sizeof(*pd));
-	if (unlikely(!pd))
-		return ERR_PTR(-ENOMEM);
-
-	if (unlikely(setup_page_dma(vm, px_base(pd)))) {
-		kfree(pd);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	return pd;
-}
-
-static void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd)
-{
-	cleanup_page_dma(vm, pd);
-	kfree(pd);
-}
-
-#define free_px(vm, px) free_pd(vm, px_base(px))
-
-static inline void
-write_dma_entry(struct i915_page_dma * const pdma,
-		const unsigned short idx,
-		const u64 encoded_entry)
-{
-	u64 * const vaddr = kmap_atomic(pdma->page);
-
-	vaddr[idx] = encoded_entry;
-	kunmap_atomic(vaddr);
-}
-
-static inline void
-__set_pd_entry(struct i915_page_directory * const pd,
-	       const unsigned short idx,
-	       struct i915_page_dma * const to,
-	       u64 (*encode)(const dma_addr_t, const enum i915_cache_level))
-{
-	/* Each thread pre-pins the pd, and we may have a thread per pde. */
-	GEM_BUG_ON(atomic_read(px_used(pd)) > NALLOC * ARRAY_SIZE(pd->entry));
-
-	atomic_inc(px_used(pd));
-	pd->entry[idx] = to;
-	write_dma_entry(px_base(pd), idx, encode(to->daddr, I915_CACHE_LLC));
-}
-
-#define set_pd_entry(pd, idx, to) \
-	__set_pd_entry((pd), (idx), px_base(to), gen8_pde_encode)
-
-static inline void
-clear_pd_entry(struct i915_page_directory * const pd,
-	       const unsigned short idx,
-	       const struct i915_page_scratch * const scratch)
-{
-	GEM_BUG_ON(atomic_read(px_used(pd)) == 0);
-
-	write_dma_entry(px_base(pd), idx, scratch->encode);
-	pd->entry[idx] = NULL;
-	atomic_dec(px_used(pd));
-}
-
-static bool
-release_pd_entry(struct i915_page_directory * const pd,
-		 const unsigned short idx,
-		 struct i915_page_table * const pt,
-		 const struct i915_page_scratch * const scratch)
-{
-	bool free = false;
-
-	if (atomic_add_unless(&pt->used, -1, 1))
-		return false;
-
-	spin_lock(&pd->lock);
-	if (atomic_dec_and_test(&pt->used)) {
-		clear_pd_entry(pd, idx, scratch);
-		free = true;
-	}
-	spin_unlock(&pd->lock);
-
-	return free;
-}
-
-static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
-{
-	struct drm_i915_private *dev_priv = ppgtt->vm.i915;
-	enum vgt_g2v_type msg;
-	int i;
-
-	if (create)
-		atomic_inc(px_used(ppgtt->pd)); /* never remove */
-	else
-		atomic_dec(px_used(ppgtt->pd));
-
-	mutex_lock(&dev_priv->vgpu.lock);
-
-	if (i915_vm_is_4lvl(&ppgtt->vm)) {
-		const u64 daddr = px_dma(ppgtt->pd);
-
-		I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
-		I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
-
-		msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
-				VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
-	} else {
-		for (i = 0; i < GEN8_3LVL_PDPES; i++) {
-			const u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
-
-			I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr));
-			I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr));
-		}
-
-		msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
-				VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
-	}
-
-	/* g2v_notify atomically (via hv trap) consumes the message packet. */
-	I915_WRITE(vgtif_reg(g2v_notify), msg);
-
-	mutex_unlock(&dev_priv->vgpu.lock);
-}
-
-/* Index shifts into the pagetable are offset by GEN8_PTE_SHIFT [12] */
-#define GEN8_PAGE_SIZE (SZ_4K) /* page and page-directory sizes are the same */
-#define GEN8_PTE_SHIFT (ilog2(GEN8_PAGE_SIZE))
-#define GEN8_PDES (GEN8_PAGE_SIZE / sizeof(u64))
-#define gen8_pd_shift(lvl) ((lvl) * ilog2(GEN8_PDES))
-#define gen8_pd_index(i, lvl) i915_pde_index((i), gen8_pd_shift(lvl))
-#define __gen8_pte_shift(lvl) (GEN8_PTE_SHIFT + gen8_pd_shift(lvl))
-#define __gen8_pte_index(a, lvl) i915_pde_index((a), __gen8_pte_shift(lvl))
-
-static inline unsigned int
-gen8_pd_range(u64 start, u64 end, int lvl, unsigned int *idx)
-{
-	const int shift = gen8_pd_shift(lvl);
-	const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);
-
-	GEM_BUG_ON(start >= end);
-	end += ~mask >> gen8_pd_shift(1);
-
-	*idx = i915_pde_index(start, shift);
-	if ((start ^ end) & mask)
-		return GEN8_PDES - *idx;
-	else
-		return i915_pde_index(end, shift) - *idx;
-}
-
-static inline bool gen8_pd_contains(u64 start, u64 end, int lvl)
-{
-	const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);
-
-	GEM_BUG_ON(start >= end);
-	return (start ^ end) & mask && (start & ~mask) == 0;
-}
-
-static inline unsigned int gen8_pt_count(u64 start, u64 end)
-{
-	GEM_BUG_ON(start >= end);
-	if ((start ^ end) >> gen8_pd_shift(1))
-		return GEN8_PDES - (start & (GEN8_PDES - 1));
-	else
-		return end - start;
-}
-
-static inline unsigned int gen8_pd_top_count(const struct i915_address_space *vm)
-{
-	unsigned int shift = __gen8_pte_shift(vm->top);
-	return (vm->total + (1ull << shift) - 1) >> shift;
-}
-
-static inline struct i915_page_directory *
-gen8_pdp_for_page_index(struct i915_address_space * const vm, const u64 idx)
-{
-	struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm);
-
-	if (vm->top == 2)
-		return ppgtt->pd;
-	else
-		return i915_pd_entry(ppgtt->pd, gen8_pd_index(idx, vm->top));
-}
-
-static inline struct i915_page_directory *
-gen8_pdp_for_page_address(struct i915_address_space * const vm, const u64 addr)
-{
-	return gen8_pdp_for_page_index(vm, addr >> GEN8_PTE_SHIFT);
-}
-
-static void __gen8_ppgtt_cleanup(struct i915_address_space *vm,
-				 struct i915_page_directory *pd,
-				 int count, int lvl)
-{
-	if (lvl) {
-		void **pde = pd->entry;
-
-		do {
-			if (!*pde)
-				continue;
-
-			__gen8_ppgtt_cleanup(vm, *pde, GEN8_PDES, lvl - 1);
-		} while (pde++, --count);
-	}
-
-	free_px(vm, pd);
-}
-
-static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
-{
-	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-
-	if (intel_vgpu_active(vm->i915))
-		gen8_ppgtt_notify_vgt(ppgtt, false);
-
-	__gen8_ppgtt_cleanup(vm, ppgtt->pd, gen8_pd_top_count(vm), vm->top);
-	free_scratch(vm);
-}
-
-static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
-			      struct i915_page_directory * const pd,
-			      u64 start, const u64 end, int lvl)
-{
-	const struct i915_page_scratch * const scratch = &vm->scratch[lvl];
-	unsigned int idx, len;
-
-	GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
-
-	len = gen8_pd_range(start, end, lvl--, &idx);
-	DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
-	    __func__, vm, lvl + 1, start, end,
-	    idx, len, atomic_read(px_used(pd)));
-	GEM_BUG_ON(!len || len >= atomic_read(px_used(pd)));
-
-	do {
-		struct i915_page_table *pt = pd->entry[idx];
-
-		if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) &&
-		    gen8_pd_contains(start, end, lvl)) {
-			DBG("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n",
-			    __func__, vm, lvl + 1, idx, start, end);
-			clear_pd_entry(pd, idx, scratch);
-			__gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl);
-			start += (u64)I915_PDES << gen8_pd_shift(lvl);
-			continue;
-		}
-
-		if (lvl) {
-			start = __gen8_ppgtt_clear(vm, as_pd(pt),
-						   start, end, lvl);
-		} else {
-			unsigned int count;
-			u64 *vaddr;
-
-			count = gen8_pt_count(start, end);
-			DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n",
-			    __func__, vm, lvl, start, end,
-			    gen8_pd_index(start, 0), count,
-			    atomic_read(&pt->used));
-			GEM_BUG_ON(!count || count >= atomic_read(&pt->used));
-
-			vaddr = kmap_atomic_px(pt);
-			memset64(vaddr + gen8_pd_index(start, 0),
-				 vm->scratch[0].encode,
-				 count);
-			kunmap_atomic(vaddr);
-
-			atomic_sub(count, &pt->used);
-			start += count;
-		}
-
-		if (release_pd_entry(pd, idx, pt, scratch))
-			free_px(vm, pt);
-	} while (idx++, --len);
-
-	return start;
-}
-
-static void gen8_ppgtt_clear(struct i915_address_space *vm,
-			     u64 start, u64 length)
-{
-	GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
-	GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
-	GEM_BUG_ON(range_overflows(start, length, vm->total));
-
-	start >>= GEN8_PTE_SHIFT;
-	length >>= GEN8_PTE_SHIFT;
-	GEM_BUG_ON(length == 0);
-
-	__gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd,
-			   start, start + length, vm->top);
-}
-
-static int __gen8_ppgtt_alloc(struct i915_address_space * const vm,
-			      struct i915_page_directory * const pd,
-			      u64 * const start, const u64 end, int lvl)
-{
-	const struct i915_page_scratch * const scratch = &vm->scratch[lvl];
-	struct i915_page_table *alloc = NULL;
-	unsigned int idx, len;
-	int ret = 0;
-
-	GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
-
-	len = gen8_pd_range(*start, end, lvl--, &idx);
-	DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
-	    __func__, vm, lvl + 1, *start, end,
-	    idx, len, atomic_read(px_used(pd)));
-	GEM_BUG_ON(!len || (idx + len - 1) >> gen8_pd_shift(1));
-
-	spin_lock(&pd->lock);
-	GEM_BUG_ON(!atomic_read(px_used(pd))); /* Must be pinned! */
-	do {
-		struct i915_page_table *pt = pd->entry[idx];
-
-		if (!pt) {
-			spin_unlock(&pd->lock);
-
-			DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n",
-			    __func__, vm, lvl + 1, idx);
-
-			pt = fetch_and_zero(&alloc);
-			if (lvl) {
-				if (!pt) {
-					pt = &alloc_pd(vm)->pt;
-					if (IS_ERR(pt)) {
-						ret = PTR_ERR(pt);
-						goto out;
-					}
-				}
-
-				fill_px(pt, vm->scratch[lvl].encode);
-			} else {
-				if (!pt) {
-					pt = alloc_pt(vm);
-					if (IS_ERR(pt)) {
-						ret = PTR_ERR(pt);
-						goto out;
-					}
-				}
-
-				if (intel_vgpu_active(vm->i915) ||
-				    gen8_pt_count(*start, end) < I915_PDES)
-					fill_px(pt, vm->scratch[lvl].encode);
-			}
-
-			spin_lock(&pd->lock);
-			if (likely(!pd->entry[idx]))
-				set_pd_entry(pd, idx, pt);
-			else
-				alloc = pt, pt = pd->entry[idx];
-		}
-
-		if (lvl) {
-			atomic_inc(&pt->used);
-			spin_unlock(&pd->lock);
-
-			ret = __gen8_ppgtt_alloc(vm, as_pd(pt),
-						 start, end, lvl);
-			if (unlikely(ret)) {
-				if (release_pd_entry(pd, idx, pt, scratch))
-					free_px(vm, pt);
-				goto out;
-			}
-
-			spin_lock(&pd->lock);
-			atomic_dec(&pt->used);
-			GEM_BUG_ON(!atomic_read(&pt->used));
-		} else {
-			unsigned int count = gen8_pt_count(*start, end);
-
-			DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n",
-			    __func__, vm, lvl, *start, end,
-			    gen8_pd_index(*start, 0), count,
-			    atomic_read(&pt->used));
-
-			atomic_add(count, &pt->used);
-			/* All other pdes may be simultaneously removed */
-			GEM_BUG_ON(atomic_read(&pt->used) > NALLOC * I915_PDES);
-			*start += count;
-		}
-	} while (idx++, --len);
-	spin_unlock(&pd->lock);
-out:
-	if (alloc)
-		free_px(vm, alloc);
-	return ret;
-}
-
-static int gen8_ppgtt_alloc(struct i915_address_space *vm,
-			    u64 start, u64 length)
-{
-	u64 from;
-	int err;
-
-	GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
-	GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
-	GEM_BUG_ON(range_overflows(start, length, vm->total));
-
-	start >>= GEN8_PTE_SHIFT;
-	length >>= GEN8_PTE_SHIFT;
-	GEM_BUG_ON(length == 0);
-	from = start;
-
-	err = __gen8_ppgtt_alloc(vm, i915_vm_to_ppgtt(vm)->pd,
-				 &start, start + length, vm->top);
-	if (unlikely(err && from != start))
-		__gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd,
-				   from, start, vm->top);
-
-	return err;
-}
-
-static inline struct sgt_dma {
-	struct scatterlist *sg;
-	dma_addr_t dma, max;
-} sgt_dma(struct i915_vma *vma) {
-	struct scatterlist *sg = vma->pages->sgl;
-	dma_addr_t addr = sg_dma_address(sg);
-	return (struct sgt_dma) { sg, addr, addr + sg->length };
-}
-
-static __always_inline u64
-gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
-		      struct i915_page_directory *pdp,
-		      struct sgt_dma *iter,
-		      u64 idx,
-		      enum i915_cache_level cache_level,
-		      u32 flags)
-{
-	struct i915_page_directory *pd;
-	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
-	gen8_pte_t *vaddr;
-
-	pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
-	vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
-	do {
-		vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma;
-
-		iter->dma += I915_GTT_PAGE_SIZE;
-		if (iter->dma >= iter->max) {
-			iter->sg = __sg_next(iter->sg);
-			if (!iter->sg) {
-				idx = 0;
-				break;
-			}
-
-			iter->dma = sg_dma_address(iter->sg);
-			iter->max = iter->dma + iter->sg->length;
-		}
-
-		if (gen8_pd_index(++idx, 0) == 0) {
-			if (gen8_pd_index(idx, 1) == 0) {
-				/* Limited by sg length for 3lvl */
-				if (gen8_pd_index(idx, 2) == 0)
-					break;
-
-				pd = pdp->entry[gen8_pd_index(idx, 2)];
-			}
-
-			kunmap_atomic(vaddr);
-			vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
-		}
-	} while (1);
-	kunmap_atomic(vaddr);
-
-	return idx;
-}
-
-static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
-				   struct sgt_dma *iter,
-				   enum i915_cache_level cache_level,
-				   u32 flags)
-{
-	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
-	u64 start = vma->node.start;
-	dma_addr_t rem = iter->sg->length;
-
-	GEM_BUG_ON(!i915_vm_is_4lvl(vma->vm));
-
-	do {
-		struct i915_page_directory * const pdp =
-			gen8_pdp_for_page_address(vma->vm, start);
-		struct i915_page_directory * const pd =
-			i915_pd_entry(pdp, __gen8_pte_index(start, 2));
-		gen8_pte_t encode = pte_encode;
-		unsigned int maybe_64K = -1;
-		unsigned int page_size;
-		gen8_pte_t *vaddr;
-		u16 index;
-
-		if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
-		    IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
-		    rem >= I915_GTT_PAGE_SIZE_2M &&
-		    !__gen8_pte_index(start, 0)) {
-			index = __gen8_pte_index(start, 1);
-			encode |= GEN8_PDE_PS_2M;
-			page_size = I915_GTT_PAGE_SIZE_2M;
-
-			vaddr = kmap_atomic_px(pd);
-		} else {
-			struct i915_page_table *pt =
-				i915_pt_entry(pd, __gen8_pte_index(start, 1));
-
-			index = __gen8_pte_index(start, 0);
-			page_size = I915_GTT_PAGE_SIZE;
-
-			if (!index &&
-			    vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
-			    IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
-			    (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
-			     rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE))
-				maybe_64K = __gen8_pte_index(start, 1);
-
-			vaddr = kmap_atomic_px(pt);
-		}
-
-		do {
-			GEM_BUG_ON(iter->sg->length < page_size);
-			vaddr[index++] = encode | iter->dma;
-
-			start += page_size;
-			iter->dma += page_size;
-			rem -= page_size;
-			if (iter->dma >= iter->max) {
-				iter->sg = __sg_next(iter->sg);
-				if (!iter->sg)
-					break;
-
-				rem = iter->sg->length;
-				iter->dma = sg_dma_address(iter->sg);
-				iter->max = iter->dma + rem;
-
-				if (maybe_64K != -1 && index < I915_PDES &&
-				    !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
-				      (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
-				       rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE)))
-					maybe_64K = -1;
-
-				if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
-					break;
-			}
-		} while (rem >= page_size && index < I915_PDES);
-
-		kunmap_atomic(vaddr);
-
-		/*
-		 * Is it safe to mark the 2M block as 64K? -- Either we have
-		 * filled whole page-table with 64K entries, or filled part of
-		 * it and have reached the end of the sg table and we have
-		 * enough padding.
-		 */
-		if (maybe_64K != -1 &&
-		    (index == I915_PDES ||
-		     (i915_vm_has_scratch_64K(vma->vm) &&
-		      !iter->sg && IS_ALIGNED(vma->node.start +
-					      vma->node.size,
-					      I915_GTT_PAGE_SIZE_2M)))) {
-			vaddr = kmap_atomic_px(pd);
-			vaddr[maybe_64K] |= GEN8_PDE_IPS_64K;
-			kunmap_atomic(vaddr);
-			page_size = I915_GTT_PAGE_SIZE_64K;
-
-			/*
-			 * We write all 4K page entries, even when using 64K
-			 * pages. In order to verify that the HW isn't cheating
-			 * by using the 4K PTE instead of the 64K PTE, we want
-			 * to remove all the surplus entries. If the HW skipped
-			 * the 64K PTE, it will read/write into the scratch page
-			 * instead - which we detect as missing results during
-			 * selftests.
-			 */
-			if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) {
-				u16 i;
-
-				encode = vma->vm->scratch[0].encode;
-				vaddr = kmap_atomic_px(i915_pt_entry(pd, maybe_64K));
-
-				for (i = 1; i < index; i += 16)
-					memset64(vaddr + i, encode, 15);
-
-				kunmap_atomic(vaddr);
-			}
-		}
-
-		vma->page_sizes.gtt |= page_size;
-	} while (iter->sg);
-}
-
-static void gen8_ppgtt_insert(struct i915_address_space *vm,
-			      struct i915_vma *vma,
-			      enum i915_cache_level cache_level,
-			      u32 flags)
-{
-	struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm);
-	struct sgt_dma iter = sgt_dma(vma);
-
-	if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) {
-		gen8_ppgtt_insert_huge(vma, &iter, cache_level, flags);
-	} else  {
-		u64 idx = vma->node.start >> GEN8_PTE_SHIFT;
-
-		do {
-			struct i915_page_directory * const pdp =
-				gen8_pdp_for_page_index(vm, idx);
-
-			idx = gen8_ppgtt_insert_pte(ppgtt, pdp, &iter, idx,
-						    cache_level, flags);
-		} while (idx);
-
-		vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
-	}
-}
-
-static int gen8_init_scratch(struct i915_address_space *vm)
-{
-	int ret;
-	int i;
-
-	/*
-	 * If everybody agrees to not to write into the scratch page,
-	 * we can reuse it for all vm, keeping contexts and processes separate.
-	 */
-	if (vm->has_read_only && vm->gt->vm && !i915_is_ggtt(vm->gt->vm)) {
-		struct i915_address_space *clone = vm->gt->vm;
-
-		GEM_BUG_ON(!clone->has_read_only);
-
-		vm->scratch_order = clone->scratch_order;
-		memcpy(vm->scratch, clone->scratch, sizeof(vm->scratch));
-		px_dma(&vm->scratch[0]) = 0; /* no xfer of ownership */
-		return 0;
-	}
-
-	ret = setup_scratch_page(vm, __GFP_HIGHMEM);
-	if (ret)
-		return ret;
-
-	vm->scratch[0].encode =
-		gen8_pte_encode(px_dma(&vm->scratch[0]),
-				I915_CACHE_LLC, vm->has_read_only);
-
-	for (i = 1; i <= vm->top; i++) {
-		if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[i]))))
-			goto free_scratch;
-
-		fill_px(&vm->scratch[i], vm->scratch[i - 1].encode);
-		vm->scratch[i].encode =
-			gen8_pde_encode(px_dma(&vm->scratch[i]),
-					I915_CACHE_LLC);
-	}
-
-	return 0;
-
-free_scratch:
-	free_scratch(vm);
-	return -ENOMEM;
-}
-
-static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
-{
-	struct i915_address_space *vm = &ppgtt->vm;
-	struct i915_page_directory *pd = ppgtt->pd;
-	unsigned int idx;
-
-	GEM_BUG_ON(vm->top != 2);
-	GEM_BUG_ON(gen8_pd_top_count(vm) != GEN8_3LVL_PDPES);
-
-	for (idx = 0; idx < GEN8_3LVL_PDPES; idx++) {
-		struct i915_page_directory *pde;
-
-		pde = alloc_pd(vm);
-		if (IS_ERR(pde))
-			return PTR_ERR(pde);
-
-		fill_px(pde, vm->scratch[1].encode);
-		set_pd_entry(pd, idx, pde);
-		atomic_inc(px_used(pde)); /* keep pinned */
-	}
-	wmb();
-
-	return 0;
-}
-
-static void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt)
-{
-	struct drm_i915_private *i915 = gt->i915;
-
-	ppgtt->vm.gt = gt;
-	ppgtt->vm.i915 = i915;
-	ppgtt->vm.dma = &i915->drm.pdev->dev;
-	ppgtt->vm.total = BIT_ULL(INTEL_INFO(i915)->ppgtt_size);
-
-	i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT);
-
-	ppgtt->vm.vma_ops.bind_vma    = ppgtt_bind_vma;
-	ppgtt->vm.vma_ops.unbind_vma  = ppgtt_unbind_vma;
-	ppgtt->vm.vma_ops.set_pages   = ppgtt_set_pages;
-	ppgtt->vm.vma_ops.clear_pages = clear_pages;
-}
-
-static struct i915_page_directory *
-gen8_alloc_top_pd(struct i915_address_space *vm)
-{
-	const unsigned int count = gen8_pd_top_count(vm);
-	struct i915_page_directory *pd;
-
-	GEM_BUG_ON(count > ARRAY_SIZE(pd->entry));
-
-	pd = __alloc_pd(offsetof(typeof(*pd), entry[count]));
-	if (unlikely(!pd))
-		return ERR_PTR(-ENOMEM);
-
-	if (unlikely(setup_page_dma(vm, px_base(pd)))) {
-		kfree(pd);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	fill_page_dma(px_base(pd), vm->scratch[vm->top].encode, count);
-	atomic_inc(px_used(pd)); /* mark as pinned */
-	return pd;
-}
-
-/*
- * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
- * with a net effect resembling a 2-level page table in normal x86 terms. Each
- * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
- * space.
- *
- */
-static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
-{
-	struct i915_ppgtt *ppgtt;
-	int err;
-
-	ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
-	if (!ppgtt)
-		return ERR_PTR(-ENOMEM);
-
-	ppgtt_init(ppgtt, &i915->gt);
-	ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2;
-
-	/*
-	 * From bdw, there is hw support for read-only pages in the PPGTT.
-	 *
-	 * Gen11 has HSDES#:1807136187 unresolved. Disable ro support
-	 * for now.
-	 *
-	 * Gen12 has inherited the same read-only fault issue from gen11.
-	 */
-	ppgtt->vm.has_read_only = !IS_GEN_RANGE(i915, 11, 12);
-
-	/* There are only few exceptions for gen >=6. chv and bxt.
-	 * And we are not sure about the latter so play safe for now.
-	 */
-	if (IS_CHERRYVIEW(i915) || IS_BROXTON(i915))
-		ppgtt->vm.pt_kmap_wc = true;
-
-	err = gen8_init_scratch(&ppgtt->vm);
-	if (err)
-		goto err_free;
-
-	ppgtt->pd = gen8_alloc_top_pd(&ppgtt->vm);
-	if (IS_ERR(ppgtt->pd)) {
-		err = PTR_ERR(ppgtt->pd);
-		goto err_free_scratch;
-	}
-
-	if (!i915_vm_is_4lvl(&ppgtt->vm)) {
-		err = gen8_preallocate_top_level_pdp(ppgtt);
-		if (err)
-			goto err_free_pd;
-	}
-
-	ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
-	ppgtt->vm.insert_entries = gen8_ppgtt_insert;
-	ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
-	ppgtt->vm.clear_range = gen8_ppgtt_clear;
-
-	if (intel_vgpu_active(i915))
-		gen8_ppgtt_notify_vgt(ppgtt, true);
-
-	ppgtt->vm.cleanup = gen8_ppgtt_cleanup;
-
-	return ppgtt;
-
-err_free_pd:
-	__gen8_ppgtt_cleanup(&ppgtt->vm, ppgtt->pd,
-			     gen8_pd_top_count(&ppgtt->vm), ppgtt->vm.top);
-err_free_scratch:
-	free_scratch(&ppgtt->vm);
-err_free:
-	kfree(ppgtt);
-	return ERR_PTR(err);
-}
-
-/* Write pde (index) from the page directory @pd to the page table @pt */
-static inline void gen6_write_pde(const struct gen6_ppgtt *ppgtt,
-				  const unsigned int pde,
-				  const struct i915_page_table *pt)
-{
-	/* Caller needs to make sure the write completes if necessary */
-	iowrite32(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID,
-		  ppgtt->pd_addr + pde);
-}
-
-static void gen7_ppgtt_enable(struct intel_gt *gt)
-{
-	struct drm_i915_private *i915 = gt->i915;
-	struct intel_uncore *uncore = gt->uncore;
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	u32 ecochk;
-
-	intel_uncore_rmw(uncore, GAC_ECO_BITS, 0, ECOBITS_PPGTT_CACHE64B);
-
-	ecochk = intel_uncore_read(uncore, GAM_ECOCHK);
-	if (IS_HASWELL(i915)) {
-		ecochk |= ECOCHK_PPGTT_WB_HSW;
-	} else {
-		ecochk |= ECOCHK_PPGTT_LLC_IVB;
-		ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
-	}
-	intel_uncore_write(uncore, GAM_ECOCHK, ecochk);
-
-	for_each_engine(engine, gt, id) {
-		/* GFX_MODE is per-ring on gen7+ */
-		ENGINE_WRITE(engine,
-			     RING_MODE_GEN7,
-			     _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
-	}
-}
-
-static void gen6_ppgtt_enable(struct intel_gt *gt)
-{
-	struct intel_uncore *uncore = gt->uncore;
-
-	intel_uncore_rmw(uncore,
-			 GAC_ECO_BITS,
-			 0,
-			 ECOBITS_SNB_BIT | ECOBITS_PPGTT_CACHE64B);
-
-	intel_uncore_rmw(uncore,
-			 GAB_CTL,
-			 0,
-			 GAB_CTL_CONT_AFTER_PAGEFAULT);
-
-	intel_uncore_rmw(uncore,
-			 GAM_ECOCHK,
-			 0,
-			 ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
-
-	if (HAS_PPGTT(uncore->i915)) /* may be disabled for VT-d */
-		intel_uncore_write(uncore,
-				   GFX_MODE,
-				   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
-}
-
-/* PPGTT support for Sandybdrige/Gen6 and later */
-static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
-				   u64 start, u64 length)
-{
-	struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
-	const unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
-	const gen6_pte_t scratch_pte = vm->scratch[0].encode;
-	unsigned int pde = first_entry / GEN6_PTES;
-	unsigned int pte = first_entry % GEN6_PTES;
-	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
-
-	while (num_entries) {
-		struct i915_page_table * const pt =
-			i915_pt_entry(ppgtt->base.pd, pde++);
-		const unsigned int count = min(num_entries, GEN6_PTES - pte);
-		gen6_pte_t *vaddr;
-
-		GEM_BUG_ON(px_base(pt) == px_base(&vm->scratch[1]));
-
-		num_entries -= count;
-
-		GEM_BUG_ON(count > atomic_read(&pt->used));
-		if (!atomic_sub_return(count, &pt->used))
-			ppgtt->scan_for_unused_pt = true;
-
-		/*
-		 * Note that the hw doesn't support removing PDE on the fly
-		 * (they are cached inside the context with no means to
-		 * invalidate the cache), so we can only reset the PTE
-		 * entries back to scratch.
-		 */
-
-		vaddr = kmap_atomic_px(pt);
-		memset32(vaddr + pte, scratch_pte, count);
-		kunmap_atomic(vaddr);
-
-		pte = 0;
-	}
-}
-
-static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
-				      struct i915_vma *vma,
-				      enum i915_cache_level cache_level,
-				      u32 flags)
-{
-	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-	struct i915_page_directory * const pd = ppgtt->pd;
-	unsigned first_entry = vma->node.start / I915_GTT_PAGE_SIZE;
-	unsigned act_pt = first_entry / GEN6_PTES;
-	unsigned act_pte = first_entry % GEN6_PTES;
-	const u32 pte_encode = vm->pte_encode(0, cache_level, flags);
-	struct sgt_dma iter = sgt_dma(vma);
-	gen6_pte_t *vaddr;
-
-	GEM_BUG_ON(pd->entry[act_pt] == &vm->scratch[1]);
-
-	vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt));
-	do {
-		vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
-
-		iter.dma += I915_GTT_PAGE_SIZE;
-		if (iter.dma == iter.max) {
-			iter.sg = __sg_next(iter.sg);
-			if (!iter.sg)
-				break;
-
-			iter.dma = sg_dma_address(iter.sg);
-			iter.max = iter.dma + iter.sg->length;
-		}
-
-		if (++act_pte == GEN6_PTES) {
-			kunmap_atomic(vaddr);
-			vaddr = kmap_atomic_px(i915_pt_entry(pd, ++act_pt));
-			act_pte = 0;
-		}
-	} while (1);
-	kunmap_atomic(vaddr);
-
-	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
-}
-
-static void gen6_flush_pd(struct gen6_ppgtt *ppgtt, u64 start, u64 end)
-{
-	struct i915_page_directory * const pd = ppgtt->base.pd;
-	struct i915_page_table *pt;
-	unsigned int pde;
-
-	start = round_down(start, SZ_64K);
-	end = round_up(end, SZ_64K) - start;
-
-	mutex_lock(&ppgtt->flush);
-
-	gen6_for_each_pde(pt, pd, start, end, pde)
-		gen6_write_pde(ppgtt, pde, pt);
-
-	mb();
-	ioread32(ppgtt->pd_addr + pde - 1);
-	gen6_ggtt_invalidate(ppgtt->base.vm.gt->ggtt);
-	mb();
-
-	mutex_unlock(&ppgtt->flush);
-}
-
-static int gen6_alloc_va_range(struct i915_address_space *vm,
-			       u64 start, u64 length)
-{
-	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
-	struct i915_page_directory * const pd = ppgtt->base.pd;
-	struct i915_page_table *pt, *alloc = NULL;
-	intel_wakeref_t wakeref;
-	u64 from = start;
-	unsigned int pde;
-	int ret = 0;
-
-	wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm);
-
-	spin_lock(&pd->lock);
-	gen6_for_each_pde(pt, pd, start, length, pde) {
-		const unsigned int count = gen6_pte_count(start, length);
-
-		if (px_base(pt) == px_base(&vm->scratch[1])) {
-			spin_unlock(&pd->lock);
-
-			pt = fetch_and_zero(&alloc);
-			if (!pt)
-				pt = alloc_pt(vm);
-			if (IS_ERR(pt)) {
-				ret = PTR_ERR(pt);
-				goto unwind_out;
-			}
-
-			fill32_px(pt, vm->scratch[0].encode);
-
-			spin_lock(&pd->lock);
-			if (pd->entry[pde] == &vm->scratch[1]) {
-				pd->entry[pde] = pt;
-			} else {
-				alloc = pt;
-				pt = pd->entry[pde];
-			}
-		}
-
-		atomic_add(count, &pt->used);
-	}
-	spin_unlock(&pd->lock);
-
-	if (i915_vma_is_bound(ppgtt->vma, I915_VMA_GLOBAL_BIND))
-		gen6_flush_pd(ppgtt, from, start);
-
-	goto out;
-
-unwind_out:
-	gen6_ppgtt_clear_range(vm, from, start - from);
-out:
-	if (alloc)
-		free_px(vm, alloc);
-	intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref);
-	return ret;
-}
-
-static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
-{
-	struct i915_address_space * const vm = &ppgtt->base.vm;
-	struct i915_page_directory * const pd = ppgtt->base.pd;
-	int ret;
-
-	ret = setup_scratch_page(vm, __GFP_HIGHMEM);
-	if (ret)
-		return ret;
-
-	vm->scratch[0].encode =
-		vm->pte_encode(px_dma(&vm->scratch[0]),
-			       I915_CACHE_NONE, PTE_READ_ONLY);
-
-	if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[1])))) {
-		cleanup_scratch_page(vm);
-		return -ENOMEM;
-	}
-
-	fill32_px(&vm->scratch[1], vm->scratch[0].encode);
-	memset_p(pd->entry, &vm->scratch[1], I915_PDES);
-
-	return 0;
-}
-
-static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt)
-{
-	struct i915_page_directory * const pd = ppgtt->base.pd;
-	struct i915_page_dma * const scratch =
-		px_base(&ppgtt->base.vm.scratch[1]);
-	struct i915_page_table *pt;
-	u32 pde;
-
-	gen6_for_all_pdes(pt, pd, pde)
-		if (px_base(pt) != scratch)
-			free_px(&ppgtt->base.vm, pt);
-}
-
-static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
-{
-	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
-
-	__i915_vma_put(ppgtt->vma);
-
-	gen6_ppgtt_free_pd(ppgtt);
-	free_scratch(vm);
-
-	mutex_destroy(&ppgtt->flush);
-	mutex_destroy(&ppgtt->pin_mutex);
-	kfree(ppgtt->base.pd);
-}
-
-static int pd_vma_set_pages(struct i915_vma *vma)
-{
-	vma->pages = ERR_PTR(-ENODEV);
-	return 0;
-}
-
-static void pd_vma_clear_pages(struct i915_vma *vma)
-{
-	GEM_BUG_ON(!vma->pages);
-
-	vma->pages = NULL;
-}
-
-static int pd_vma_bind(struct i915_vma *vma,
-		       enum i915_cache_level cache_level,
-		       u32 unused)
-{
-	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm);
-	struct gen6_ppgtt *ppgtt = vma->private;
-	u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE;
-
-	px_base(ppgtt->base.pd)->ggtt_offset = ggtt_offset * sizeof(gen6_pte_t);
-	ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset;
-
-	gen6_flush_pd(ppgtt, 0, ppgtt->base.vm.total);
-	return 0;
-}
-
-static void pd_vma_unbind(struct i915_vma *vma)
-{
-	struct gen6_ppgtt *ppgtt = vma->private;
-	struct i915_page_directory * const pd = ppgtt->base.pd;
-	struct i915_page_dma * const scratch =
-		px_base(&ppgtt->base.vm.scratch[1]);
-	struct i915_page_table *pt;
-	unsigned int pde;
-
-	if (!ppgtt->scan_for_unused_pt)
-		return;
-
-	/* Free all no longer used page tables */
-	gen6_for_all_pdes(pt, ppgtt->base.pd, pde) {
-		if (px_base(pt) == scratch || atomic_read(&pt->used))
-			continue;
-
-		free_px(&ppgtt->base.vm, pt);
-		pd->entry[pde] = scratch;
-	}
-
-	ppgtt->scan_for_unused_pt = false;
-}
-
-static const struct i915_vma_ops pd_vma_ops = {
-	.set_pages = pd_vma_set_pages,
-	.clear_pages = pd_vma_clear_pages,
-	.bind_vma = pd_vma_bind,
-	.unbind_vma = pd_vma_unbind,
-};
-
-static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
-{
-	struct i915_ggtt *ggtt = ppgtt->base.vm.gt->ggtt;
-	struct i915_vma *vma;
-
-	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
-	GEM_BUG_ON(size > ggtt->vm.total);
-
-	vma = i915_vma_alloc();
-	if (!vma)
-		return ERR_PTR(-ENOMEM);
-
-	i915_active_init(&vma->active, NULL, NULL);
-
-	kref_init(&vma->ref);
-	mutex_init(&vma->pages_mutex);
-	vma->vm = i915_vm_get(&ggtt->vm);
-	vma->ops = &pd_vma_ops;
-	vma->private = ppgtt;
-
-	vma->size = size;
-	vma->fence_size = size;
-	atomic_set(&vma->flags, I915_VMA_GGTT);
-	vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */
-
-	INIT_LIST_HEAD(&vma->obj_link);
-	INIT_LIST_HEAD(&vma->closed_link);
-
-	return vma;
-}
-
-int gen6_ppgtt_pin(struct i915_ppgtt *base)
-{
-	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
-	int err = 0;
-
-	GEM_BUG_ON(!atomic_read(&ppgtt->base.vm.open));
-
-	/*
-	 * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt
-	 * which will be pinned into every active context.
-	 * (When vma->pin_count becomes atomic, I expect we will naturally
-	 * need a larger, unpacked, type and kill this redundancy.)
-	 */
-	if (atomic_add_unless(&ppgtt->pin_count, 1, 0))
-		return 0;
-
-	if (mutex_lock_interruptible(&ppgtt->pin_mutex))
-		return -EINTR;
-
-	/*
-	 * PPGTT PDEs reside in the GGTT and consists of 512 entries. The
-	 * allocator works in address space sizes, so it's multiplied by page
-	 * size. We allocate at the top of the GTT to avoid fragmentation.
-	 */
-	if (!atomic_read(&ppgtt->pin_count)) {
-		err = i915_ggtt_pin(ppgtt->vma, GEN6_PD_ALIGN, PIN_HIGH);
-	}
-	if (!err)
-		atomic_inc(&ppgtt->pin_count);
-	mutex_unlock(&ppgtt->pin_mutex);
-
-	return err;
-}
-
-void gen6_ppgtt_unpin(struct i915_ppgtt *base)
-{
-	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
-
-	GEM_BUG_ON(!atomic_read(&ppgtt->pin_count));
-	if (atomic_dec_and_test(&ppgtt->pin_count))
-		i915_vma_unpin(ppgtt->vma);
-}
-
-void gen6_ppgtt_unpin_all(struct i915_ppgtt *base)
-{
-	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
-
-	if (!atomic_read(&ppgtt->pin_count))
-		return;
-
-	i915_vma_unpin(ppgtt->vma);
-	atomic_set(&ppgtt->pin_count, 0);
-}
-
-static struct i915_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
-{
-	struct i915_ggtt * const ggtt = &i915->ggtt;
-	struct gen6_ppgtt *ppgtt;
-	int err;
-
-	ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
-	if (!ppgtt)
-		return ERR_PTR(-ENOMEM);
-
-	mutex_init(&ppgtt->flush);
-	mutex_init(&ppgtt->pin_mutex);
-
-	ppgtt_init(&ppgtt->base, &i915->gt);
-	ppgtt->base.vm.top = 1;
-
-	ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND;
-	ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range;
-	ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range;
-	ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries;
-	ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup;
-
-	ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
-
-	ppgtt->base.pd = __alloc_pd(sizeof(*ppgtt->base.pd));
-	if (!ppgtt->base.pd) {
-		err = -ENOMEM;
-		goto err_free;
-	}
-
-	err = gen6_ppgtt_init_scratch(ppgtt);
-	if (err)
-		goto err_pd;
-
-	ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE);
-	if (IS_ERR(ppgtt->vma)) {
-		err = PTR_ERR(ppgtt->vma);
-		goto err_scratch;
-	}
-
-	return &ppgtt->base;
-
-err_scratch:
-	free_scratch(&ppgtt->base.vm);
-err_pd:
-	kfree(ppgtt->base.pd);
-err_free:
-	mutex_destroy(&ppgtt->pin_mutex);
-	kfree(ppgtt);
-	return ERR_PTR(err);
-}
-
-static void gtt_write_workarounds(struct intel_gt *gt)
-{
-	struct drm_i915_private *i915 = gt->i915;
-	struct intel_uncore *uncore = gt->uncore;
-
-	/* This function is for gtt related workarounds. This function is
-	 * called on driver load and after a GPU reset, so you can place
-	 * workarounds here even if they get overwritten by GPU reset.
-	 */
-	/* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */
-	if (IS_BROADWELL(i915))
-		intel_uncore_write(uncore,
-				   GEN8_L3_LRA_1_GPGPU,
-				   GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
-	else if (IS_CHERRYVIEW(i915))
-		intel_uncore_write(uncore,
-				   GEN8_L3_LRA_1_GPGPU,
-				   GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
-	else if (IS_GEN9_LP(i915))
-		intel_uncore_write(uncore,
-				   GEN8_L3_LRA_1_GPGPU,
-				   GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
-	else if (INTEL_GEN(i915) >= 9 && INTEL_GEN(i915) <= 11)
-		intel_uncore_write(uncore,
-				   GEN8_L3_LRA_1_GPGPU,
-				   GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
-
-	/*
-	 * To support 64K PTEs we need to first enable the use of the
-	 * Intermediate-Page-Size(IPS) bit of the PDE field via some magical
-	 * mmio, otherwise the page-walker will simply ignore the IPS bit. This
-	 * shouldn't be needed after GEN10.
-	 *
-	 * 64K pages were first introduced from BDW+, although technically they
-	 * only *work* from gen9+. For pre-BDW we instead have the option for
-	 * 32K pages, but we don't currently have any support for it in our
-	 * driver.
-	 */
-	if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) &&
-	    INTEL_GEN(i915) <= 10)
-		intel_uncore_rmw(uncore,
-				 GEN8_GAMW_ECO_DEV_RW_IA,
-				 0,
-				 GAMW_ECO_ENABLE_64K_IPS_FIELD);
-
-	if (IS_GEN_RANGE(i915, 8, 11)) {
-		bool can_use_gtt_cache = true;
-
-		/*
-		 * According to the BSpec if we use 2M/1G pages then we also
-		 * need to disable the GTT cache. At least on BDW we can see
-		 * visual corruption when using 2M pages, and not disabling the
-		 * GTT cache.
-		 */
-		if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M))
-			can_use_gtt_cache = false;
-
-		/* WaGttCachingOffByDefault */
-		intel_uncore_write(uncore,
-				   HSW_GTT_CACHE_EN,
-				   can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0);
-		WARN_ON_ONCE(can_use_gtt_cache &&
-			     intel_uncore_read(uncore,
-					       HSW_GTT_CACHE_EN) == 0);
-	}
-}
-
-int i915_ppgtt_init_hw(struct intel_gt *gt)
-{
-	struct drm_i915_private *i915 = gt->i915;
-
-	gtt_write_workarounds(gt);
-
-	if (IS_GEN(i915, 6))
-		gen6_ppgtt_enable(gt);
-	else if (IS_GEN(i915, 7))
-		gen7_ppgtt_enable(gt);
-
-	return 0;
-}
-
-static struct i915_ppgtt *
-__ppgtt_create(struct drm_i915_private *i915)
-{
-	if (INTEL_GEN(i915) < 8)
-		return gen6_ppgtt_create(i915);
-	else
-		return gen8_ppgtt_create(i915);
-}
-
-struct i915_ppgtt *
-i915_ppgtt_create(struct drm_i915_private *i915)
-{
-	struct i915_ppgtt *ppgtt;
-
-	ppgtt = __ppgtt_create(i915);
-	if (IS_ERR(ppgtt))
-		return ppgtt;
-
-	trace_i915_ppgtt_create(&ppgtt->vm);
-
-	return ppgtt;
-}
-
-/* Certain Gen5 chipsets require require idling the GPU before
- * unmapping anything from the GTT when VT-d is enabled.
- */
-static bool needs_idle_maps(struct drm_i915_private *dev_priv)
-{
-	/* Query intel_iommu to see if we need the workaround. Presumably that
-	 * was loaded first.
-	 */
-	return IS_GEN(dev_priv, 5) && IS_MOBILE(dev_priv) && intel_vtd_active();
-}
-
-static void ggtt_suspend_mappings(struct i915_ggtt *ggtt)
-{
-	struct drm_i915_private *i915 = ggtt->vm.i915;
-
-	/* Don't bother messing with faults pre GEN6 as we have little
-	 * documentation supporting that it's a good idea.
-	 */
-	if (INTEL_GEN(i915) < 6)
-		return;
-
-	intel_gt_check_and_clear_faults(ggtt->vm.gt);
-
-	ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
-
-	ggtt->invalidate(ggtt);
-}
-
-void i915_gem_suspend_gtt_mappings(struct drm_i915_private *i915)
-{
-	ggtt_suspend_mappings(&i915->ggtt);
-}
-
 int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
 			       struct sg_table *pages)
 {
@@ -2181,368 +52,6 @@ int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
 	return -ENOSPC;
 }
 
-static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
-{
-	writeq(pte, addr);
-}
-
-static void gen8_ggtt_insert_page(struct i915_address_space *vm,
-				  dma_addr_t addr,
-				  u64 offset,
-				  enum i915_cache_level level,
-				  u32 unused)
-{
-	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-	gen8_pte_t __iomem *pte =
-		(gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
-
-	gen8_set_pte(pte, gen8_pte_encode(addr, level, 0));
-
-	ggtt->invalidate(ggtt);
-}
-
-static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
-				     struct i915_vma *vma,
-				     enum i915_cache_level level,
-				     u32 flags)
-{
-	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-	struct sgt_iter sgt_iter;
-	gen8_pte_t __iomem *gtt_entries;
-	const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0);
-	dma_addr_t addr;
-
-	/*
-	 * Note that we ignore PTE_READ_ONLY here. The caller must be careful
-	 * not to allow the user to override access to a read only page.
-	 */
-
-	gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
-	gtt_entries += vma->node.start / I915_GTT_PAGE_SIZE;
-	for_each_sgt_daddr(addr, sgt_iter, vma->pages)
-		gen8_set_pte(gtt_entries++, pte_encode | addr);
-
-	/*
-	 * We want to flush the TLBs only after we're certain all the PTE
-	 * updates have finished.
-	 */
-	ggtt->invalidate(ggtt);
-}
-
-static void gen6_ggtt_insert_page(struct i915_address_space *vm,
-				  dma_addr_t addr,
-				  u64 offset,
-				  enum i915_cache_level level,
-				  u32 flags)
-{
-	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-	gen6_pte_t __iomem *pte =
-		(gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
-
-	iowrite32(vm->pte_encode(addr, level, flags), pte);
-
-	ggtt->invalidate(ggtt);
-}
-
-/*
- * Binds an object into the global gtt with the specified cache level. The object
- * will be accessible to the GPU via commands whose operands reference offsets
- * within the global GTT as well as accessible by the GPU through the GMADR
- * mapped BAR (dev_priv->mm.gtt->gtt).
- */
-static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
-				     struct i915_vma *vma,
-				     enum i915_cache_level level,
-				     u32 flags)
-{
-	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-	gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm;
-	unsigned int i = vma->node.start / I915_GTT_PAGE_SIZE;
-	struct sgt_iter iter;
-	dma_addr_t addr;
-	for_each_sgt_daddr(addr, iter, vma->pages)
-		iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]);
-
-	/*
-	 * We want to flush the TLBs only after we're certain all the PTE
-	 * updates have finished.
-	 */
-	ggtt->invalidate(ggtt);
-}
-
-static void nop_clear_range(struct i915_address_space *vm,
-			    u64 start, u64 length)
-{
-}
-
-static void gen8_ggtt_clear_range(struct i915_address_space *vm,
-				  u64 start, u64 length)
-{
-	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-	unsigned first_entry = start / I915_GTT_PAGE_SIZE;
-	unsigned num_entries = length / I915_GTT_PAGE_SIZE;
-	const gen8_pte_t scratch_pte = vm->scratch[0].encode;
-	gen8_pte_t __iomem *gtt_base =
-		(gen8_pte_t __iomem *)ggtt->gsm + first_entry;
-	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
-	int i;
-
-	if (WARN(num_entries > max_entries,
-		 "First entry = %d; Num entries = %d (max=%d)\n",
-		 first_entry, num_entries, max_entries))
-		num_entries = max_entries;
-
-	for (i = 0; i < num_entries; i++)
-		gen8_set_pte(&gtt_base[i], scratch_pte);
-}
-
-static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
-{
-	struct drm_i915_private *dev_priv = vm->i915;
-
-	/*
-	 * Make sure the internal GAM fifo has been cleared of all GTT
-	 * writes before exiting stop_machine(). This guarantees that
-	 * any aperture accesses waiting to start in another process
-	 * cannot back up behind the GTT writes causing a hang.
-	 * The register can be any arbitrary GAM register.
-	 */
-	POSTING_READ(GFX_FLSH_CNTL_GEN6);
-}
-
-struct insert_page {
-	struct i915_address_space *vm;
-	dma_addr_t addr;
-	u64 offset;
-	enum i915_cache_level level;
-};
-
-static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
-{
-	struct insert_page *arg = _arg;
-
-	gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0);
-	bxt_vtd_ggtt_wa(arg->vm);
-
-	return 0;
-}
-
-static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
-					  dma_addr_t addr,
-					  u64 offset,
-					  enum i915_cache_level level,
-					  u32 unused)
-{
-	struct insert_page arg = { vm, addr, offset, level };
-
-	stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
-}
-
-struct insert_entries {
-	struct i915_address_space *vm;
-	struct i915_vma *vma;
-	enum i915_cache_level level;
-	u32 flags;
-};
-
-static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
-{
-	struct insert_entries *arg = _arg;
-
-	gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags);
-	bxt_vtd_ggtt_wa(arg->vm);
-
-	return 0;
-}
-
-static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
-					     struct i915_vma *vma,
-					     enum i915_cache_level level,
-					     u32 flags)
-{
-	struct insert_entries arg = { vm, vma, level, flags };
-
-	stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
-}
-
-struct clear_range {
-	struct i915_address_space *vm;
-	u64 start;
-	u64 length;
-};
-
-static int bxt_vtd_ggtt_clear_range__cb(void *_arg)
-{
-	struct clear_range *arg = _arg;
-
-	gen8_ggtt_clear_range(arg->vm, arg->start, arg->length);
-	bxt_vtd_ggtt_wa(arg->vm);
-
-	return 0;
-}
-
-static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm,
-					  u64 start,
-					  u64 length)
-{
-	struct clear_range arg = { vm, start, length };
-
-	stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL);
-}
-
-static void gen6_ggtt_clear_range(struct i915_address_space *vm,
-				  u64 start, u64 length)
-{
-	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-	unsigned first_entry = start / I915_GTT_PAGE_SIZE;
-	unsigned num_entries = length / I915_GTT_PAGE_SIZE;
-	gen6_pte_t scratch_pte, __iomem *gtt_base =
-		(gen6_pte_t __iomem *)ggtt->gsm + first_entry;
-	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
-	int i;
-
-	if (WARN(num_entries > max_entries,
-		 "First entry = %d; Num entries = %d (max=%d)\n",
-		 first_entry, num_entries, max_entries))
-		num_entries = max_entries;
-
-	scratch_pte = vm->scratch[0].encode;
-	for (i = 0; i < num_entries; i++)
-		iowrite32(scratch_pte, &gtt_base[i]);
-}
-
-static void i915_ggtt_insert_page(struct i915_address_space *vm,
-				  dma_addr_t addr,
-				  u64 offset,
-				  enum i915_cache_level cache_level,
-				  u32 unused)
-{
-	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
-		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
-
-	intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
-}
-
-static void i915_ggtt_insert_entries(struct i915_address_space *vm,
-				     struct i915_vma *vma,
-				     enum i915_cache_level cache_level,
-				     u32 unused)
-{
-	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
-		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
-
-	intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT,
-				    flags);
-}
-
-static void i915_ggtt_clear_range(struct i915_address_space *vm,
-				  u64 start, u64 length)
-{
-	intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
-}
-
-static int ggtt_bind_vma(struct i915_vma *vma,
-			 enum i915_cache_level cache_level,
-			 u32 flags)
-{
-	struct drm_i915_private *i915 = vma->vm->i915;
-	struct drm_i915_gem_object *obj = vma->obj;
-	intel_wakeref_t wakeref;
-	u32 pte_flags;
-
-	/* Applicable to VLV (gen8+ do not support RO in the GGTT) */
-	pte_flags = 0;
-	if (i915_gem_object_is_readonly(obj))
-		pte_flags |= PTE_READ_ONLY;
-
-	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
-		vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
-
-	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
-
-	/*
-	 * Without aliasing PPGTT there's no difference between
-	 * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
-	 * upgrade to both bound if we bind either to avoid double-binding.
-	 */
-	atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags);
-
-	return 0;
-}
-
-static void ggtt_unbind_vma(struct i915_vma *vma)
-{
-	struct drm_i915_private *i915 = vma->vm->i915;
-	intel_wakeref_t wakeref;
-
-	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
-		vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
-}
-
-static int aliasing_gtt_bind_vma(struct i915_vma *vma,
-				 enum i915_cache_level cache_level,
-				 u32 flags)
-{
-	struct drm_i915_private *i915 = vma->vm->i915;
-	u32 pte_flags;
-	int ret;
-
-	/* Currently applicable only to VLV */
-	pte_flags = 0;
-	if (i915_gem_object_is_readonly(vma->obj))
-		pte_flags |= PTE_READ_ONLY;
-
-	if (flags & I915_VMA_LOCAL_BIND) {
-		struct i915_ppgtt *alias = i915_vm_to_ggtt(vma->vm)->alias;
-
-		if (flags & I915_VMA_ALLOC) {
-			ret = alias->vm.allocate_va_range(&alias->vm,
-							  vma->node.start,
-							  vma->size);
-			if (ret)
-				return ret;
-
-			set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma));
-		}
-
-		GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT,
-				     __i915_vma_flags(vma)));
-		alias->vm.insert_entries(&alias->vm, vma,
-					 cache_level, pte_flags);
-	}
-
-	if (flags & I915_VMA_GLOBAL_BIND) {
-		intel_wakeref_t wakeref;
-
-		with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
-			vma->vm->insert_entries(vma->vm, vma,
-						cache_level, pte_flags);
-		}
-	}
-
-	return 0;
-}
-
-static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
-{
-	struct drm_i915_private *i915 = vma->vm->i915;
-
-	if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) {
-		struct i915_address_space *vm = vma->vm;
-		intel_wakeref_t wakeref;
-
-		with_intel_runtime_pm(&i915->runtime_pm, wakeref)
-			vm->clear_range(vm, vma->node.start, vma->size);
-	}
-
-	if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) {
-		struct i915_address_space *vm =
-			&i915_vm_to_ggtt(vma->vm)->alias->vm;
-
-		vm->clear_range(vm, vma->node.start, vma->size);
-	}
-}
-
 void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
 			       struct sg_table *pages)
 {
@@ -2563,1070 +72,6 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
 	dma_unmap_sg(kdev, pages->sgl, pages->nents, PCI_DMA_BIDIRECTIONAL);
 }
 
-static int ggtt_set_pages(struct i915_vma *vma)
-{
-	int ret;
-
-	GEM_BUG_ON(vma->pages);
-
-	ret = i915_get_ggtt_vma_pages(vma);
-	if (ret)
-		return ret;
-
-	vma->page_sizes = vma->obj->mm.page_sizes;
-
-	return 0;
-}
-
-static void i915_ggtt_color_adjust(const struct drm_mm_node *node,
-				   unsigned long color,
-				   u64 *start,
-				   u64 *end)
-{
-	if (i915_node_color_differs(node, color))
-		*start += I915_GTT_PAGE_SIZE;
-
-	/* Also leave a space between the unallocated reserved node after the
-	 * GTT and any objects within the GTT, i.e. we use the color adjustment
-	 * to insert a guard page to prevent prefetches crossing over the
-	 * GTT boundary.
-	 */
-	node = list_next_entry(node, node_list);
-	if (node->color != color)
-		*end -= I915_GTT_PAGE_SIZE;
-}
-
-static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
-{
-	struct i915_ppgtt *ppgtt;
-	int err;
-
-	ppgtt = i915_ppgtt_create(ggtt->vm.i915);
-	if (IS_ERR(ppgtt))
-		return PTR_ERR(ppgtt);
-
-	if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) {
-		err = -ENODEV;
-		goto err_ppgtt;
-	}
-
-	/*
-	 * Note we only pre-allocate as far as the end of the global
-	 * GTT. On 48b / 4-level page-tables, the difference is very,
-	 * very significant! We have to preallocate as GVT/vgpu does
-	 * not like the page directory disappearing.
-	 */
-	err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, ggtt->vm.total);
-	if (err)
-		goto err_ppgtt;
-
-	ggtt->alias = ppgtt;
-	ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags;
-
-	GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma);
-	ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma;
-
-	GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma);
-	ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma;
-
-	return 0;
-
-err_ppgtt:
-	i915_vm_put(&ppgtt->vm);
-	return err;
-}
-
-static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt)
-{
-	struct i915_ppgtt *ppgtt;
-
-	ppgtt = fetch_and_zero(&ggtt->alias);
-	if (!ppgtt)
-		return;
-
-	i915_vm_put(&ppgtt->vm);
-
-	ggtt->vm.vma_ops.bind_vma   = ggtt_bind_vma;
-	ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
-}
-
-static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
-{
-	u64 size;
-	int ret;
-
-	if (!USES_GUC(ggtt->vm.i915))
-		return 0;
-
-	GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP);
-	size = ggtt->vm.total - GUC_GGTT_TOP;
-
-	ret = i915_gem_gtt_reserve(&ggtt->vm, &ggtt->uc_fw, size,
-				   GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE,
-				   PIN_NOEVICT);
-	if (ret)
-		DRM_DEBUG_DRIVER("Failed to reserve top of GGTT for GuC\n");
-
-	return ret;
-}
-
-static void ggtt_release_guc_top(struct i915_ggtt *ggtt)
-{
-	if (drm_mm_node_allocated(&ggtt->uc_fw))
-		drm_mm_remove_node(&ggtt->uc_fw);
-}
-
-static void cleanup_init_ggtt(struct i915_ggtt *ggtt)
-{
-	ggtt_release_guc_top(ggtt);
-	if (drm_mm_node_allocated(&ggtt->error_capture))
-		drm_mm_remove_node(&ggtt->error_capture);
-}
-
-static int init_ggtt(struct i915_ggtt *ggtt)
-{
-	/* Let GEM Manage all of the aperture.
-	 *
-	 * However, leave one page at the end still bound to the scratch page.
-	 * There are a number of places where the hardware apparently prefetches
-	 * past the end of the object, and we've seen multiple hangs with the
-	 * GPU head pointer stuck in a batchbuffer bound at the last page of the
-	 * aperture.  One page should be enough to keep any prefetching inside
-	 * of the aperture.
-	 */
-	unsigned long hole_start, hole_end;
-	struct drm_mm_node *entry;
-	int ret;
-
-	/*
-	 * GuC requires all resources that we're sharing with it to be placed in
-	 * non-WOPCM memory. If GuC is not present or not in use we still need a
-	 * small bias as ring wraparound at offset 0 sometimes hangs. No idea
-	 * why.
-	 */
-	ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE,
-			       intel_wopcm_guc_size(&ggtt->vm.i915->wopcm));
-
-	ret = intel_vgt_balloon(ggtt);
-	if (ret)
-		return ret;
-
-	if (ggtt->mappable_end) {
-		/* Reserve a mappable slot for our lockless error capture */
-		ret = drm_mm_insert_node_in_range(&ggtt->vm.mm, &ggtt->error_capture,
-						  PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
-						  0, ggtt->mappable_end,
-						  DRM_MM_INSERT_LOW);
-		if (ret)
-			return ret;
-	}
-
-	/*
-	 * The upper portion of the GuC address space has a sizeable hole
-	 * (several MB) that is inaccessible by GuC. Reserve this range within
-	 * GGTT as it can comfortably hold GuC/HuC firmware images.
-	 */
-	ret = ggtt_reserve_guc_top(ggtt);
-	if (ret)
-		goto err;
-
-	/* Clear any non-preallocated blocks */
-	drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) {
-		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
-			      hole_start, hole_end);
-		ggtt->vm.clear_range(&ggtt->vm, hole_start,
-				     hole_end - hole_start);
-	}
-
-	/* And finally clear the reserved guard page */
-	ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE);
-
-	return 0;
-
-err:
-	cleanup_init_ggtt(ggtt);
-	return ret;
-}
-
-int i915_init_ggtt(struct drm_i915_private *i915)
-{
-	int ret;
-
-	ret = init_ggtt(&i915->ggtt);
-	if (ret)
-		return ret;
-
-	if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) {
-		ret = init_aliasing_ppgtt(&i915->ggtt);
-		if (ret)
-			cleanup_init_ggtt(&i915->ggtt);
-	}
-
-	return 0;
-}
-
-static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
-{
-	struct i915_vma *vma, *vn;
-
-	atomic_set(&ggtt->vm.open, 0);
-
-	rcu_barrier(); /* flush the RCU'ed__i915_vm_release */
-	flush_workqueue(ggtt->vm.i915->wq);
-
-	mutex_lock(&ggtt->vm.mutex);
-
-	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link)
-		WARN_ON(__i915_vma_unbind(vma));
-
-	if (drm_mm_node_allocated(&ggtt->error_capture))
-		drm_mm_remove_node(&ggtt->error_capture);
-
-	ggtt_release_guc_top(ggtt);
-	intel_vgt_deballoon(ggtt);
-
-	ggtt->vm.cleanup(&ggtt->vm);
-
-	mutex_unlock(&ggtt->vm.mutex);
-	i915_address_space_fini(&ggtt->vm);
-
-	arch_phys_wc_del(ggtt->mtrr);
-
-	if (ggtt->iomap.size)
-		io_mapping_fini(&ggtt->iomap);
-}
-
-/**
- * i915_ggtt_driver_release - Clean up GGTT hardware initialization
- * @i915: i915 device
- */
-void i915_ggtt_driver_release(struct drm_i915_private *i915)
-{
-	struct pagevec *pvec;
-
-	fini_aliasing_ppgtt(&i915->ggtt);
-
-	ggtt_cleanup_hw(&i915->ggtt);
-
-	pvec = &i915->mm.wc_stash.pvec;
-	if (pvec->nr) {
-		set_pages_array_wb(pvec->pages, pvec->nr);
-		__pagevec_release(pvec);
-	}
-}
-
-static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
-{
-	snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
-	snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
-	return snb_gmch_ctl << 20;
-}
-
-static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
-{
-	bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
-	bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
-	if (bdw_gmch_ctl)
-		bdw_gmch_ctl = 1 << bdw_gmch_ctl;
-
-#ifdef CONFIG_X86_32
-	/* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */
-	if (bdw_gmch_ctl > 4)
-		bdw_gmch_ctl = 4;
-#endif
-
-	return bdw_gmch_ctl << 20;
-}
-
-static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
-{
-	gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
-	gmch_ctrl &= SNB_GMCH_GGMS_MASK;
-
-	if (gmch_ctrl)
-		return 1 << (20 + gmch_ctrl);
-
-	return 0;
-}
-
-static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
-{
-	struct drm_i915_private *dev_priv = ggtt->vm.i915;
-	struct pci_dev *pdev = dev_priv->drm.pdev;
-	phys_addr_t phys_addr;
-	int ret;
-
-	/* For Modern GENs the PTEs and register space are split in the BAR */
-	phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2;
-
-	/*
-	 * On BXT+/CNL+ writes larger than 64 bit to the GTT pagetable range
-	 * will be dropped. For WC mappings in general we have 64 byte burst
-	 * writes when the WC buffer is flushed, so we can't use it, but have to
-	 * resort to an uncached mapping. The WC issue is easily caught by the
-	 * readback check when writing GTT PTE entries.
-	 */
-	if (IS_GEN9_LP(dev_priv) || INTEL_GEN(dev_priv) >= 10)
-		ggtt->gsm = ioremap_nocache(phys_addr, size);
-	else
-		ggtt->gsm = ioremap_wc(phys_addr, size);
-	if (!ggtt->gsm) {
-		DRM_ERROR("Failed to map the ggtt page table\n");
-		return -ENOMEM;
-	}
-
-	ret = setup_scratch_page(&ggtt->vm, GFP_DMA32);
-	if (ret) {
-		DRM_ERROR("Scratch setup failed\n");
-		/* iounmap will also get called at remove, but meh */
-		iounmap(ggtt->gsm);
-		return ret;
-	}
-
-	ggtt->vm.scratch[0].encode =
-		ggtt->vm.pte_encode(px_dma(&ggtt->vm.scratch[0]),
-				    I915_CACHE_NONE, 0);
-
-	return 0;
-}
-
-static void tgl_setup_private_ppat(struct intel_uncore *uncore)
-{
-	/* TGL doesn't support LLC or AGE settings */
-	intel_uncore_write(uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB);
-	intel_uncore_write(uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC);
-	intel_uncore_write(uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT);
-	intel_uncore_write(uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC);
-	intel_uncore_write(uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB);
-	intel_uncore_write(uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB);
-	intel_uncore_write(uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB);
-	intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB);
-}
-
-static void cnl_setup_private_ppat(struct intel_uncore *uncore)
-{
-	intel_uncore_write(uncore,
-			   GEN10_PAT_INDEX(0),
-			   GEN8_PPAT_WB | GEN8_PPAT_LLC);
-	intel_uncore_write(uncore,
-			   GEN10_PAT_INDEX(1),
-			   GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
-	intel_uncore_write(uncore,
-			   GEN10_PAT_INDEX(2),
-			   GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
-	intel_uncore_write(uncore,
-			   GEN10_PAT_INDEX(3),
-			   GEN8_PPAT_UC);
-	intel_uncore_write(uncore,
-			   GEN10_PAT_INDEX(4),
-			   GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
-	intel_uncore_write(uncore,
-			   GEN10_PAT_INDEX(5),
-			   GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
-	intel_uncore_write(uncore,
-			   GEN10_PAT_INDEX(6),
-			   GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
-	intel_uncore_write(uncore,
-			   GEN10_PAT_INDEX(7),
-			   GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
-}
-
-/* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
- * bits. When using advanced contexts each context stores its own PAT, but
- * writing this data shouldn't be harmful even in those cases. */
-static void bdw_setup_private_ppat(struct intel_uncore *uncore)
-{
-	u64 pat;
-
-	pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) |	/* for normal objects, no eLLC */
-	      GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) |	/* for something pointing to ptes? */
-	      GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) |	/* for scanout with eLLC */
-	      GEN8_PPAT(3, GEN8_PPAT_UC) |			/* Uncached objects, mostly for scanout */
-	      GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
-	      GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
-	      GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
-	      GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
-
-	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
-	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
-}
-
-static void chv_setup_private_ppat(struct intel_uncore *uncore)
-{
-	u64 pat;
-
-	/*
-	 * Map WB on BDW to snooped on CHV.
-	 *
-	 * Only the snoop bit has meaning for CHV, the rest is
-	 * ignored.
-	 *
-	 * The hardware will never snoop for certain types of accesses:
-	 * - CPU GTT (GMADR->GGTT->no snoop->memory)
-	 * - PPGTT page tables
-	 * - some other special cycles
-	 *
-	 * As with BDW, we also need to consider the following for GT accesses:
-	 * "For GGTT, there is NO pat_sel[2:0] from the entry,
-	 * so RTL will always use the value corresponding to
-	 * pat_sel = 000".
-	 * Which means we must set the snoop bit in PAT entry 0
-	 * in order to keep the global status page working.
-	 */
-
-	pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
-	      GEN8_PPAT(1, 0) |
-	      GEN8_PPAT(2, 0) |
-	      GEN8_PPAT(3, 0) |
-	      GEN8_PPAT(4, CHV_PPAT_SNOOP) |
-	      GEN8_PPAT(5, CHV_PPAT_SNOOP) |
-	      GEN8_PPAT(6, CHV_PPAT_SNOOP) |
-	      GEN8_PPAT(7, CHV_PPAT_SNOOP);
-
-	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
-	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
-}
-
-static void gen6_gmch_remove(struct i915_address_space *vm)
-{
-	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-
-	iounmap(ggtt->gsm);
-	cleanup_scratch_page(vm);
-}
-
-static void setup_private_pat(struct intel_uncore *uncore)
-{
-	struct drm_i915_private *i915 = uncore->i915;
-
-	GEM_BUG_ON(INTEL_GEN(i915) < 8);
-
-	if (INTEL_GEN(i915) >= 12)
-		tgl_setup_private_ppat(uncore);
-	else if (INTEL_GEN(i915) >= 10)
-		cnl_setup_private_ppat(uncore);
-	else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915))
-		chv_setup_private_ppat(uncore);
-	else
-		bdw_setup_private_ppat(uncore);
-}
-
-static struct resource pci_resource(struct pci_dev *pdev, int bar)
-{
-	return (struct resource)DEFINE_RES_MEM(pci_resource_start(pdev, bar),
-					       pci_resource_len(pdev, bar));
-}
-
-static int gen8_gmch_probe(struct i915_ggtt *ggtt)
-{
-	struct drm_i915_private *dev_priv = ggtt->vm.i915;
-	struct pci_dev *pdev = dev_priv->drm.pdev;
-	unsigned int size;
-	u16 snb_gmch_ctl;
-	int err;
-
-	/* TODO: We're not aware of mappable constraints on gen8 yet */
-	if (!IS_DGFX(dev_priv)) {
-		ggtt->gmadr = pci_resource(pdev, 2);
-		ggtt->mappable_end = resource_size(&ggtt->gmadr);
-	}
-
-	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39));
-	if (!err)
-		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
-	if (err)
-		DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
-
-	pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
-	if (IS_CHERRYVIEW(dev_priv))
-		size = chv_get_total_gtt_size(snb_gmch_ctl);
-	else
-		size = gen8_get_total_gtt_size(snb_gmch_ctl);
-
-	ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
-	ggtt->vm.cleanup = gen6_gmch_remove;
-	ggtt->vm.insert_page = gen8_ggtt_insert_page;
-	ggtt->vm.clear_range = nop_clear_range;
-	if (intel_scanout_needs_vtd_wa(dev_priv))
-		ggtt->vm.clear_range = gen8_ggtt_clear_range;
-
-	ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
-
-	/* Serialize GTT updates with aperture access on BXT if VT-d is on. */
-	if (intel_ggtt_update_needs_vtd_wa(dev_priv) ||
-	    IS_CHERRYVIEW(dev_priv) /* fails with concurrent use/update */) {
-		ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
-		ggtt->vm.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
-		if (ggtt->vm.clear_range != nop_clear_range)
-			ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL;
-	}
-
-	ggtt->invalidate = gen8_ggtt_invalidate;
-
-	ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
-	ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
-	ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
-	ggtt->vm.vma_ops.clear_pages = clear_pages;
-
-	ggtt->vm.pte_encode = gen8_pte_encode;
-
-	setup_private_pat(ggtt->vm.gt->uncore);
-
-	return ggtt_probe_common(ggtt, size);
-}
-
-static int gen6_gmch_probe(struct i915_ggtt *ggtt)
-{
-	struct drm_i915_private *dev_priv = ggtt->vm.i915;
-	struct pci_dev *pdev = dev_priv->drm.pdev;
-	unsigned int size;
-	u16 snb_gmch_ctl;
-	int err;
-
-	ggtt->gmadr =
-		(struct resource) DEFINE_RES_MEM(pci_resource_start(pdev, 2),
-						 pci_resource_len(pdev, 2));
-	ggtt->mappable_end = resource_size(&ggtt->gmadr);
-
-	/* 64/512MB is the current min/max we actually know of, but this is just
-	 * a coarse sanity check.
-	 */
-	if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) {
-		DRM_ERROR("Unknown GMADR size (%pa)\n", &ggtt->mappable_end);
-		return -ENXIO;
-	}
-
-	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40));
-	if (!err)
-		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
-	if (err)
-		DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
-	pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
-
-	size = gen6_get_total_gtt_size(snb_gmch_ctl);
-	ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE;
-
-	ggtt->vm.clear_range = nop_clear_range;
-	if (!HAS_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv))
-		ggtt->vm.clear_range = gen6_ggtt_clear_range;
-	ggtt->vm.insert_page = gen6_ggtt_insert_page;
-	ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
-	ggtt->vm.cleanup = gen6_gmch_remove;
-
-	ggtt->invalidate = gen6_ggtt_invalidate;
-
-	if (HAS_EDRAM(dev_priv))
-		ggtt->vm.pte_encode = iris_pte_encode;
-	else if (IS_HASWELL(dev_priv))
-		ggtt->vm.pte_encode = hsw_pte_encode;
-	else if (IS_VALLEYVIEW(dev_priv))
-		ggtt->vm.pte_encode = byt_pte_encode;
-	else if (INTEL_GEN(dev_priv) >= 7)
-		ggtt->vm.pte_encode = ivb_pte_encode;
-	else
-		ggtt->vm.pte_encode = snb_pte_encode;
-
-	ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
-	ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
-	ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
-	ggtt->vm.vma_ops.clear_pages = clear_pages;
-
-	return ggtt_probe_common(ggtt, size);
-}
-
-static void i915_gmch_remove(struct i915_address_space *vm)
-{
-	intel_gmch_remove();
-}
-
-static int i915_gmch_probe(struct i915_ggtt *ggtt)
-{
-	struct drm_i915_private *dev_priv = ggtt->vm.i915;
-	phys_addr_t gmadr_base;
-	int ret;
-
-	ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL);
-	if (!ret) {
-		DRM_ERROR("failed to set up gmch\n");
-		return -EIO;
-	}
-
-	intel_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end);
-
-	ggtt->gmadr =
-		(struct resource) DEFINE_RES_MEM(gmadr_base,
-						 ggtt->mappable_end);
-
-	ggtt->do_idle_maps = needs_idle_maps(dev_priv);
-	ggtt->vm.insert_page = i915_ggtt_insert_page;
-	ggtt->vm.insert_entries = i915_ggtt_insert_entries;
-	ggtt->vm.clear_range = i915_ggtt_clear_range;
-	ggtt->vm.cleanup = i915_gmch_remove;
-
-	ggtt->invalidate = gmch_ggtt_invalidate;
-
-	ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
-	ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
-	ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
-	ggtt->vm.vma_ops.clear_pages = clear_pages;
-
-	if (unlikely(ggtt->do_idle_maps))
-		dev_notice(dev_priv->drm.dev,
-			   "Applying Ironlake quirks for intel_iommu\n");
-
-	return 0;
-}
-
-static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt)
-{
-	struct drm_i915_private *i915 = gt->i915;
-	int ret;
-
-	ggtt->vm.gt = gt;
-	ggtt->vm.i915 = i915;
-	ggtt->vm.dma = &i915->drm.pdev->dev;
-
-	if (INTEL_GEN(i915) <= 5)
-		ret = i915_gmch_probe(ggtt);
-	else if (INTEL_GEN(i915) < 8)
-		ret = gen6_gmch_probe(ggtt);
-	else
-		ret = gen8_gmch_probe(ggtt);
-	if (ret)
-		return ret;
-
-	if ((ggtt->vm.total - 1) >> 32) {
-		DRM_ERROR("We never expected a Global GTT with more than 32bits"
-			  " of address space! Found %lldM!\n",
-			  ggtt->vm.total >> 20);
-		ggtt->vm.total = 1ULL << 32;
-		ggtt->mappable_end =
-			min_t(u64, ggtt->mappable_end, ggtt->vm.total);
-	}
-
-	if (ggtt->mappable_end > ggtt->vm.total) {
-		DRM_ERROR("mappable aperture extends past end of GGTT,"
-			  " aperture=%pa, total=%llx\n",
-			  &ggtt->mappable_end, ggtt->vm.total);
-		ggtt->mappable_end = ggtt->vm.total;
-	}
-
-	/* GMADR is the PCI mmio aperture into the global GTT. */
-	DRM_DEBUG_DRIVER("GGTT size = %lluM\n", ggtt->vm.total >> 20);
-	DRM_DEBUG_DRIVER("GMADR size = %lluM\n", (u64)ggtt->mappable_end >> 20);
-	DRM_DEBUG_DRIVER("DSM size = %lluM\n",
-			 (u64)resource_size(&intel_graphics_stolen_res) >> 20);
-
-	return 0;
-}
-
-/**
- * i915_ggtt_probe_hw - Probe GGTT hardware location
- * @i915: i915 device
- */
-int i915_ggtt_probe_hw(struct drm_i915_private *i915)
-{
-	int ret;
-
-	ret = ggtt_probe_hw(&i915->ggtt, &i915->gt);
-	if (ret)
-		return ret;
-
-	if (intel_vtd_active())
-		dev_info(i915->drm.dev, "VT-d active for gfx access\n");
-
-	return 0;
-}
-
-static int ggtt_init_hw(struct i915_ggtt *ggtt)
-{
-	struct drm_i915_private *i915 = ggtt->vm.i915;
-
-	i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT);
-
-	ggtt->vm.is_ggtt = true;
-
-	/* Only VLV supports read-only GGTT mappings */
-	ggtt->vm.has_read_only = IS_VALLEYVIEW(i915);
-
-	if (!HAS_LLC(i915) && !HAS_PPGTT(i915))
-		ggtt->vm.mm.color_adjust = i915_ggtt_color_adjust;
-
-	if (ggtt->mappable_end) {
-		if (!io_mapping_init_wc(&ggtt->iomap,
-					ggtt->gmadr.start,
-					ggtt->mappable_end)) {
-			ggtt->vm.cleanup(&ggtt->vm);
-			return -EIO;
-		}
-
-		ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start,
-					      ggtt->mappable_end);
-	}
-
-	i915_ggtt_init_fences(ggtt);
-
-	return 0;
-}
-
-/**
- * i915_ggtt_init_hw - Initialize GGTT hardware
- * @dev_priv: i915 device
- */
-int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
-{
-	int ret;
-
-	stash_init(&dev_priv->mm.wc_stash);
-
-	/* Note that we use page colouring to enforce a guard page at the
-	 * end of the address space. This is required as the CS may prefetch
-	 * beyond the end of the batch buffer, across the page boundary,
-	 * and beyond the end of the GTT if we do not provide a guard.
-	 */
-	ret = ggtt_init_hw(&dev_priv->ggtt);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
-int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv)
-{
-	if (INTEL_GEN(dev_priv) < 6 && !intel_enable_gtt())
-		return -EIO;
-
-	return 0;
-}
-
-void i915_ggtt_enable_guc(struct i915_ggtt *ggtt)
-{
-	GEM_BUG_ON(ggtt->invalidate != gen8_ggtt_invalidate);
-
-	ggtt->invalidate = guc_ggtt_invalidate;
-
-	ggtt->invalidate(ggtt);
-}
-
-void i915_ggtt_disable_guc(struct i915_ggtt *ggtt)
-{
-	/* XXX Temporary pardon for error unload */
-	if (ggtt->invalidate == gen8_ggtt_invalidate)
-		return;
-
-	/* We should only be called after i915_ggtt_enable_guc() */
-	GEM_BUG_ON(ggtt->invalidate != guc_ggtt_invalidate);
-
-	ggtt->invalidate = gen8_ggtt_invalidate;
-
-	ggtt->invalidate(ggtt);
-}
-
-static void ggtt_restore_mappings(struct i915_ggtt *ggtt)
-{
-	struct i915_vma *vma, *vn;
-	bool flush = false;
-	int open;
-
-	intel_gt_check_and_clear_faults(ggtt->vm.gt);
-
-	mutex_lock(&ggtt->vm.mutex);
-
-	/* First fill our portion of the GTT with scratch pages */
-	ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
-
-	/* Skip rewriting PTE on VMA unbind. */
-	open = atomic_xchg(&ggtt->vm.open, 0);
-
-	/* clflush objects bound into the GGTT and rebind them. */
-	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {
-		struct drm_i915_gem_object *obj = vma->obj;
-
-		if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
-			continue;
-
-		if (!__i915_vma_unbind(vma))
-			continue;
-
-		clear_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma));
-		WARN_ON(i915_vma_bind(vma,
-				      obj ? obj->cache_level : 0,
-				      PIN_GLOBAL, NULL));
-		if (obj) { /* only used during resume => exclusive access */
-			flush |= fetch_and_zero(&obj->write_domain);
-			obj->read_domains |= I915_GEM_DOMAIN_GTT;
-		}
-	}
-
-	atomic_set(&ggtt->vm.open, open);
-	ggtt->invalidate(ggtt);
-
-	mutex_unlock(&ggtt->vm.mutex);
-
-	if (flush)
-		wbinvd_on_all_cpus();
-}
-
-void i915_gem_restore_gtt_mappings(struct drm_i915_private *i915)
-{
-	struct i915_ggtt *ggtt = &i915->ggtt;
-
-	ggtt_restore_mappings(ggtt);
-
-	if (INTEL_GEN(i915) >= 8)
-		setup_private_pat(ggtt->vm.gt->uncore);
-}
-
-static struct scatterlist *
-rotate_pages(struct drm_i915_gem_object *obj, unsigned int offset,
-	     unsigned int width, unsigned int height,
-	     unsigned int stride,
-	     struct sg_table *st, struct scatterlist *sg)
-{
-	unsigned int column, row;
-	unsigned int src_idx;
-
-	for (column = 0; column < width; column++) {
-		src_idx = stride * (height - 1) + column + offset;
-		for (row = 0; row < height; row++) {
-			st->nents++;
-			/* We don't need the pages, but need to initialize
-			 * the entries so the sg list can be happily traversed.
-			 * The only thing we need are DMA addresses.
-			 */
-			sg_set_page(sg, NULL, I915_GTT_PAGE_SIZE, 0);
-			sg_dma_address(sg) =
-				i915_gem_object_get_dma_address(obj, src_idx);
-			sg_dma_len(sg) = I915_GTT_PAGE_SIZE;
-			sg = sg_next(sg);
-			src_idx -= stride;
-		}
-	}
-
-	return sg;
-}
-
-static noinline struct sg_table *
-intel_rotate_pages(struct intel_rotation_info *rot_info,
-		   struct drm_i915_gem_object *obj)
-{
-	unsigned int size = intel_rotation_info_size(rot_info);
-	struct sg_table *st;
-	struct scatterlist *sg;
-	int ret = -ENOMEM;
-	int i;
-
-	/* Allocate target SG list. */
-	st = kmalloc(sizeof(*st), GFP_KERNEL);
-	if (!st)
-		goto err_st_alloc;
-
-	ret = sg_alloc_table(st, size, GFP_KERNEL);
-	if (ret)
-		goto err_sg_alloc;
-
-	st->nents = 0;
-	sg = st->sgl;
-
-	for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) {
-		sg = rotate_pages(obj, rot_info->plane[i].offset,
-				  rot_info->plane[i].width, rot_info->plane[i].height,
-				  rot_info->plane[i].stride, st, sg);
-	}
-
-	return st;
-
-err_sg_alloc:
-	kfree(st);
-err_st_alloc:
-
-	DRM_DEBUG_DRIVER("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
-			 obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
-
-	return ERR_PTR(ret);
-}
-
-static struct scatterlist *
-remap_pages(struct drm_i915_gem_object *obj, unsigned int offset,
-	    unsigned int width, unsigned int height,
-	    unsigned int stride,
-	    struct sg_table *st, struct scatterlist *sg)
-{
-	unsigned int row;
-
-	for (row = 0; row < height; row++) {
-		unsigned int left = width * I915_GTT_PAGE_SIZE;
-
-		while (left) {
-			dma_addr_t addr;
-			unsigned int length;
-
-			/* We don't need the pages, but need to initialize
-			 * the entries so the sg list can be happily traversed.
-			 * The only thing we need are DMA addresses.
-			 */
-
-			addr = i915_gem_object_get_dma_address_len(obj, offset, &length);
-
-			length = min(left, length);
-
-			st->nents++;
-
-			sg_set_page(sg, NULL, length, 0);
-			sg_dma_address(sg) = addr;
-			sg_dma_len(sg) = length;
-			sg = sg_next(sg);
-
-			offset += length / I915_GTT_PAGE_SIZE;
-			left -= length;
-		}
-
-		offset += stride - width;
-	}
-
-	return sg;
-}
-
-static noinline struct sg_table *
-intel_remap_pages(struct intel_remapped_info *rem_info,
-		  struct drm_i915_gem_object *obj)
-{
-	unsigned int size = intel_remapped_info_size(rem_info);
-	struct sg_table *st;
-	struct scatterlist *sg;
-	int ret = -ENOMEM;
-	int i;
-
-	/* Allocate target SG list. */
-	st = kmalloc(sizeof(*st), GFP_KERNEL);
-	if (!st)
-		goto err_st_alloc;
-
-	ret = sg_alloc_table(st, size, GFP_KERNEL);
-	if (ret)
-		goto err_sg_alloc;
-
-	st->nents = 0;
-	sg = st->sgl;
-
-	for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) {
-		sg = remap_pages(obj, rem_info->plane[i].offset,
-				 rem_info->plane[i].width, rem_info->plane[i].height,
-				 rem_info->plane[i].stride, st, sg);
-	}
-
-	i915_sg_trim(st);
-
-	return st;
-
-err_sg_alloc:
-	kfree(st);
-err_st_alloc:
-
-	DRM_DEBUG_DRIVER("Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n",
-			 obj->base.size, rem_info->plane[0].width, rem_info->plane[0].height, size);
-
-	return ERR_PTR(ret);
-}
-
-static noinline struct sg_table *
-intel_partial_pages(const struct i915_ggtt_view *view,
-		    struct drm_i915_gem_object *obj)
-{
-	struct sg_table *st;
-	struct scatterlist *sg, *iter;
-	unsigned int count = view->partial.size;
-	unsigned int offset;
-	int ret = -ENOMEM;
-
-	st = kmalloc(sizeof(*st), GFP_KERNEL);
-	if (!st)
-		goto err_st_alloc;
-
-	ret = sg_alloc_table(st, count, GFP_KERNEL);
-	if (ret)
-		goto err_sg_alloc;
-
-	iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset);
-	GEM_BUG_ON(!iter);
-
-	sg = st->sgl;
-	st->nents = 0;
-	do {
-		unsigned int len;
-
-		len = min(iter->length - (offset << PAGE_SHIFT),
-			  count << PAGE_SHIFT);
-		sg_set_page(sg, NULL, len, 0);
-		sg_dma_address(sg) =
-			sg_dma_address(iter) + (offset << PAGE_SHIFT);
-		sg_dma_len(sg) = len;
-
-		st->nents++;
-		count -= len >> PAGE_SHIFT;
-		if (count == 0) {
-			sg_mark_end(sg);
-			i915_sg_trim(st); /* Drop any unused tail entries. */
-
-			return st;
-		}
-
-		sg = __sg_next(sg);
-		iter = __sg_next(iter);
-		offset = 0;
-	} while (1);
-
-err_sg_alloc:
-	kfree(st);
-err_st_alloc:
-	return ERR_PTR(ret);
-}
-
-static int
-i915_get_ggtt_vma_pages(struct i915_vma *vma)
-{
-	int ret;
-
-	/* The vma->pages are only valid within the lifespan of the borrowed
-	 * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so
-	 * must be the vma->pages. A simple rule is that vma->pages must only
-	 * be accessed when the obj->mm.pages are pinned.
-	 */
-	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj));
-
-	switch (vma->ggtt_view.type) {
-	default:
-		GEM_BUG_ON(vma->ggtt_view.type);
-		/* fall through */
-	case I915_GGTT_VIEW_NORMAL:
-		vma->pages = vma->obj->mm.pages;
-		return 0;
-
-	case I915_GGTT_VIEW_ROTATED:
-		vma->pages =
-			intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj);
-		break;
-
-	case I915_GGTT_VIEW_REMAPPED:
-		vma->pages =
-			intel_remap_pages(&vma->ggtt_view.remapped, vma->obj);
-		break;
-
-	case I915_GGTT_VIEW_PARTIAL:
-		vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj);
-		break;
-	}
-
-	ret = 0;
-	if (IS_ERR(vma->pages)) {
-		ret = PTR_ERR(vma->pages);
-		vma->pages = NULL;
-		DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
-			  vma->ggtt_view.type, ret);
-	}
-	return ret;
-}
-
 /**
  * i915_gem_gtt_reserve - reserve a node in an address_space (GTT)
  * @vm: the &struct i915_address_space
@@ -3848,6 +293,5 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftests/mock_gtt.c"
 #include "selftests/i915_gem_gtt.c"
 #endif
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 31a4a96ddd0d..f6226df9f972 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -1,639 +1,21 @@
+/* SPDX-License-Identifier: MIT */
 /*
- * Copyright © 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Please try to maintain the following order within this file unless it makes
- * sense to do otherwise. From top to bottom:
- * 1. typedefs
- * 2. #defines, and macros
- * 3. structure definitions
- * 4. function prototypes
- *
- * Within each section, please try to order by generation in ascending order,
- * from top to bottom (ie. gen6 on the top, gen8 on the bottom).
+ * Copyright © 2020 Intel Corporation
  */
 
 #ifndef __I915_GEM_GTT_H__
 #define __I915_GEM_GTT_H__
 
 #include <linux/io-mapping.h>
-#include <linux/kref.h>
-#include <linux/mm.h>
-#include <linux/pagevec.h>
-#include <linux/workqueue.h>
+#include <linux/types.h>
 
 #include <drm/drm_mm.h>
 
-#include "gt/intel_reset.h"
-#include "i915_gem_fence_reg.h"
-#include "i915_request.h"
+#include "gt/intel_gtt.h"
 #include "i915_scatterlist.h"
-#include "i915_selftest.h"
-#include "gt/intel_timeline.h"
 
-#define I915_GTT_PAGE_SIZE_4K	BIT_ULL(12)
-#define I915_GTT_PAGE_SIZE_64K	BIT_ULL(16)
-#define I915_GTT_PAGE_SIZE_2M	BIT_ULL(21)
-
-#define I915_GTT_PAGE_SIZE I915_GTT_PAGE_SIZE_4K
-#define I915_GTT_MAX_PAGE_SIZE I915_GTT_PAGE_SIZE_2M
-
-#define I915_GTT_PAGE_MASK -I915_GTT_PAGE_SIZE
-
-#define I915_GTT_MIN_ALIGNMENT I915_GTT_PAGE_SIZE
-
-#define I915_FENCE_REG_NONE -1
-#define I915_MAX_NUM_FENCES 32
-/* 32 fences + sign bit for FENCE_REG_NONE */
-#define I915_MAX_NUM_FENCE_BITS 6
-
-struct drm_i915_file_private;
 struct drm_i915_gem_object;
-struct i915_vma;
-struct intel_gt;
-
-typedef u32 gen6_pte_t;
-typedef u64 gen8_pte_t;
-
-#define ggtt_total_entries(ggtt) ((ggtt)->vm.total >> PAGE_SHIFT)
-
-/* gen6-hsw has bit 11-4 for physical addr bit 39-32 */
-#define GEN6_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0xff0))
-#define GEN6_PTE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
-#define GEN6_PDE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
-#define GEN6_PTE_CACHE_LLC		(2 << 1)
-#define GEN6_PTE_UNCACHED		(1 << 1)
-#define GEN6_PTE_VALID			(1 << 0)
-
-#define I915_PTES(pte_len)		((unsigned int)(PAGE_SIZE / (pte_len)))
-#define I915_PTE_MASK(pte_len)		(I915_PTES(pte_len) - 1)
-#define I915_PDES			512
-#define I915_PDE_MASK			(I915_PDES - 1)
-#define NUM_PTE(pde_shift)     (1 << (pde_shift - PAGE_SHIFT))
-
-#define GEN6_PTES			I915_PTES(sizeof(gen6_pte_t))
-#define GEN6_PD_SIZE		        (I915_PDES * PAGE_SIZE)
-#define GEN6_PD_ALIGN			(PAGE_SIZE * 16)
-#define GEN6_PDE_SHIFT			22
-#define GEN6_PDE_VALID			(1 << 0)
-
-#define GEN7_PTE_CACHE_L3_LLC		(3 << 1)
-
-#define BYT_PTE_SNOOPED_BY_CPU_CACHES	(1 << 2)
-#define BYT_PTE_WRITEABLE		(1 << 1)
-
-/* Cacheability Control is a 4-bit value. The low three bits are stored in bits
- * 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE.
- */
-#define HSW_CACHEABILITY_CONTROL(bits)	((((bits) & 0x7) << 1) | \
-					 (((bits) & 0x8) << (11 - 3)))
-#define HSW_WB_LLC_AGE3			HSW_CACHEABILITY_CONTROL(0x2)
-#define HSW_WB_LLC_AGE0			HSW_CACHEABILITY_CONTROL(0x3)
-#define HSW_WB_ELLC_LLC_AGE3		HSW_CACHEABILITY_CONTROL(0x8)
-#define HSW_WB_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0xb)
-#define HSW_WT_ELLC_LLC_AGE3		HSW_CACHEABILITY_CONTROL(0x7)
-#define HSW_WT_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0x6)
-#define HSW_PTE_UNCACHED		(0)
-#define HSW_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0x7f0))
-#define HSW_PTE_ADDR_ENCODE(addr)	HSW_GTT_ADDR_ENCODE(addr)
-
-/*
- * GEN8 32b style address is defined as a 3 level page table:
- * 31:30 | 29:21 | 20:12 |  11:0
- * PDPE  |  PDE  |  PTE  | offset
- * The difference as compared to normal x86 3 level page table is the PDPEs are
- * programmed via register.
- *
- * GEN8 48b style address is defined as a 4 level page table:
- * 47:39 | 38:30 | 29:21 | 20:12 |  11:0
- * PML4E | PDPE  |  PDE  |  PTE  | offset
- */
-#define GEN8_3LVL_PDPES			4
-
-#define PPAT_UNCACHED			(_PAGE_PWT | _PAGE_PCD)
-#define PPAT_CACHED_PDE			0 /* WB LLC */
-#define PPAT_CACHED			_PAGE_PAT /* WB LLCeLLC */
-#define PPAT_DISPLAY_ELLC		_PAGE_PCD /* WT eLLC */
-
-#define CHV_PPAT_SNOOP			(1<<6)
-#define GEN8_PPAT_AGE(x)		((x)<<4)
-#define GEN8_PPAT_LLCeLLC		(3<<2)
-#define GEN8_PPAT_LLCELLC		(2<<2)
-#define GEN8_PPAT_LLC			(1<<2)
-#define GEN8_PPAT_WB			(3<<0)
-#define GEN8_PPAT_WT			(2<<0)
-#define GEN8_PPAT_WC			(1<<0)
-#define GEN8_PPAT_UC			(0<<0)
-#define GEN8_PPAT_ELLC_OVERRIDE		(0<<2)
-#define GEN8_PPAT(i, x)			((u64)(x) << ((i) * 8))
-
-#define GEN8_PDE_IPS_64K BIT(11)
-#define GEN8_PDE_PS_2M   BIT(7)
-
-#define for_each_sgt_daddr(__dp, __iter, __sgt) \
-	__for_each_sgt_daddr(__dp, __iter, __sgt, I915_GTT_PAGE_SIZE)
-
-struct intel_remapped_plane_info {
-	/* in gtt pages */
-	unsigned int width, height, stride, offset;
-} __packed;
-
-struct intel_remapped_info {
-	struct intel_remapped_plane_info plane[2];
-	unsigned int unused_mbz;
-} __packed;
-
-struct intel_rotation_info {
-	struct intel_remapped_plane_info plane[2];
-} __packed;
-
-struct intel_partial_info {
-	u64 offset;
-	unsigned int size;
-} __packed;
-
-enum i915_ggtt_view_type {
-	I915_GGTT_VIEW_NORMAL = 0,
-	I915_GGTT_VIEW_ROTATED = sizeof(struct intel_rotation_info),
-	I915_GGTT_VIEW_PARTIAL = sizeof(struct intel_partial_info),
-	I915_GGTT_VIEW_REMAPPED = sizeof(struct intel_remapped_info),
-};
-
-static inline void assert_i915_gem_gtt_types(void)
-{
-	BUILD_BUG_ON(sizeof(struct intel_rotation_info) != 8*sizeof(unsigned int));
-	BUILD_BUG_ON(sizeof(struct intel_partial_info) != sizeof(u64) + sizeof(unsigned int));
-	BUILD_BUG_ON(sizeof(struct intel_remapped_info) != 9*sizeof(unsigned int));
-
-	/* Check that rotation/remapped shares offsets for simplicity */
-	BUILD_BUG_ON(offsetof(struct intel_remapped_info, plane[0]) !=
-		     offsetof(struct intel_rotation_info, plane[0]));
-	BUILD_BUG_ON(offsetofend(struct intel_remapped_info, plane[1]) !=
-		     offsetofend(struct intel_rotation_info, plane[1]));
-
-	/* As we encode the size of each branch inside the union into its type,
-	 * we have to be careful that each branch has a unique size.
-	 */
-	switch ((enum i915_ggtt_view_type)0) {
-	case I915_GGTT_VIEW_NORMAL:
-	case I915_GGTT_VIEW_PARTIAL:
-	case I915_GGTT_VIEW_ROTATED:
-	case I915_GGTT_VIEW_REMAPPED:
-		/* gcc complains if these are identical cases */
-		break;
-	}
-}
-
-struct i915_ggtt_view {
-	enum i915_ggtt_view_type type;
-	union {
-		/* Members need to contain no holes/padding */
-		struct intel_partial_info partial;
-		struct intel_rotation_info rotated;
-		struct intel_remapped_info remapped;
-	};
-};
-
-enum i915_cache_level;
-
-struct i915_vma;
-
-struct i915_page_dma {
-	struct page *page;
-	union {
-		dma_addr_t daddr;
-
-		/* For gen6/gen7 only. This is the offset in the GGTT
-		 * where the page directory entries for PPGTT begin
-		 */
-		u32 ggtt_offset;
-	};
-};
-
-struct i915_page_scratch {
-	struct i915_page_dma base;
-	u64 encode;
-};
-
-struct i915_page_table {
-	struct i915_page_dma base;
-	atomic_t used;
-};
-
-struct i915_page_directory {
-	struct i915_page_table pt;
-	spinlock_t lock;
-	void *entry[512];
-};
-
-#define __px_choose_expr(x, type, expr, other) \
-	__builtin_choose_expr( \
-	__builtin_types_compatible_p(typeof(x), type) || \
-	__builtin_types_compatible_p(typeof(x), const type), \
-	({ type __x = (type)(x); expr; }), \
-	other)
-
-#define px_base(px) \
-	__px_choose_expr(px, struct i915_page_dma *, __x, \
-	__px_choose_expr(px, struct i915_page_scratch *, &__x->base, \
-	__px_choose_expr(px, struct i915_page_table *, &__x->base, \
-	__px_choose_expr(px, struct i915_page_directory *, &__x->pt.base, \
-	(void)0))))
-#define px_dma(px) (px_base(px)->daddr)
-
-#define px_pt(px) \
-	__px_choose_expr(px, struct i915_page_table *, __x, \
-	__px_choose_expr(px, struct i915_page_directory *, &__x->pt, \
-	(void)0))
-#define px_used(px) (&px_pt(px)->used)
-
-struct i915_vma_ops {
-	/* Map an object into an address space with the given cache flags. */
-	int (*bind_vma)(struct i915_vma *vma,
-			enum i915_cache_level cache_level,
-			u32 flags);
-	/*
-	 * Unmap an object from an address space. This usually consists of
-	 * setting the valid PTE entries to a reserved scratch page.
-	 */
-	void (*unbind_vma)(struct i915_vma *vma);
-
-	int (*set_pages)(struct i915_vma *vma);
-	void (*clear_pages)(struct i915_vma *vma);
-};
-
-struct pagestash {
-	spinlock_t lock;
-	struct pagevec pvec;
-};
-
-struct i915_address_space {
-	struct kref ref;
-	struct rcu_work rcu;
-
-	struct drm_mm mm;
-	struct intel_gt *gt;
-	struct drm_i915_private *i915;
-	struct device *dma;
-	/* Every address space belongs to a struct file - except for the global
-	 * GTT that is owned by the driver (and so @file is set to NULL). In
-	 * principle, no information should leak from one context to another
-	 * (or between files/processes etc) unless explicitly shared by the
-	 * owner. Tracking the owner is important in order to free up per-file
-	 * objects along with the file, to aide resource tracking, and to
-	 * assign blame.
-	 */
-	struct drm_i915_file_private *file;
-	u64 total;		/* size addr space maps (ex. 2GB for ggtt) */
-	u64 reserved;		/* size addr space reserved */
-
-	unsigned int bind_async_flags;
-
-	/*
-	 * Each active user context has its own address space (in full-ppgtt).
-	 * Since the vm may be shared between multiple contexts, we count how
-	 * many contexts keep us "open". Once open hits zero, we are closed
-	 * and do not allow any new attachments, and proceed to shutdown our
-	 * vma and page directories.
-	 */
-	atomic_t open;
-
-	struct mutex mutex; /* protects vma and our lists */
-#define VM_CLASS_GGTT 0
-#define VM_CLASS_PPGTT 1
-
-	struct i915_page_scratch scratch[4];
-	unsigned int scratch_order;
-	unsigned int top;
-
-	/**
-	 * List of vma currently bound.
-	 */
-	struct list_head bound_list;
-
-	struct pagestash free_pages;
-
-	/* Global GTT */
-	bool is_ggtt:1;
-
-	/* Some systems require uncached updates of the page directories */
-	bool pt_kmap_wc:1;
-
-	/* Some systems support read-only mappings for GGTT and/or PPGTT */
-	bool has_read_only:1;
-
-	u64 (*pte_encode)(dma_addr_t addr,
-			  enum i915_cache_level level,
-			  u32 flags); /* Create a valid PTE */
-#define PTE_READ_ONLY	(1<<0)
-
-	int (*allocate_va_range)(struct i915_address_space *vm,
-				 u64 start, u64 length);
-	void (*clear_range)(struct i915_address_space *vm,
-			    u64 start, u64 length);
-	void (*insert_page)(struct i915_address_space *vm,
-			    dma_addr_t addr,
-			    u64 offset,
-			    enum i915_cache_level cache_level,
-			    u32 flags);
-	void (*insert_entries)(struct i915_address_space *vm,
-			       struct i915_vma *vma,
-			       enum i915_cache_level cache_level,
-			       u32 flags);
-	void (*cleanup)(struct i915_address_space *vm);
-
-	struct i915_vma_ops vma_ops;
-
-	I915_SELFTEST_DECLARE(struct fault_attr fault_attr);
-	I915_SELFTEST_DECLARE(bool scrub_64K);
-};
-
-#define i915_is_ggtt(vm) ((vm)->is_ggtt)
-
-static inline bool
-i915_vm_is_4lvl(const struct i915_address_space *vm)
-{
-	return (vm->total - 1) >> 32;
-}
-
-static inline bool
-i915_vm_has_scratch_64K(struct i915_address_space *vm)
-{
-	return vm->scratch_order == get_order(I915_GTT_PAGE_SIZE_64K);
-}
-
-static inline bool
-i915_vm_has_cache_coloring(struct i915_address_space *vm)
-{
-	return i915_is_ggtt(vm) && vm->mm.color_adjust;
-}
-
-/* The Graphics Translation Table is the way in which GEN hardware translates a
- * Graphics Virtual Address into a Physical Address. In addition to the normal
- * collateral associated with any va->pa translations GEN hardware also has a
- * portion of the GTT which can be mapped by the CPU and remain both coherent
- * and correct (in cases like swizzling). That region is referred to as GMADR in
- * the spec.
- */
-struct i915_ggtt {
-	struct i915_address_space vm;
-
-	struct io_mapping iomap;	/* Mapping to our CPU mappable region */
-	struct resource gmadr;          /* GMADR resource */
-	resource_size_t mappable_end;	/* End offset that we can CPU map */
-
-	/** "Graphics Stolen Memory" holds the global PTEs */
-	void __iomem *gsm;
-	void (*invalidate)(struct i915_ggtt *ggtt);
-
-	/** PPGTT used for aliasing the PPGTT with the GTT */
-	struct i915_ppgtt *alias;
-
-	bool do_idle_maps;
-
-	int mtrr;
-
-	/** Bit 6 swizzling required for X tiling */
-	u32 bit_6_swizzle_x;
-	/** Bit 6 swizzling required for Y tiling */
-	u32 bit_6_swizzle_y;
-
-	u32 pin_bias;
-
-	unsigned int num_fences;
-	struct i915_fence_reg fence_regs[I915_MAX_NUM_FENCES];
-	struct list_head fence_list;
-
-	/** List of all objects in gtt_space, currently mmaped by userspace.
-	 * All objects within this list must also be on bound_list.
-	 */
-	struct list_head userfault_list;
-
-	/* Manual runtime pm autosuspend delay for user GGTT mmaps */
-	struct intel_wakeref_auto userfault_wakeref;
-
-	struct drm_mm_node error_capture;
-	struct drm_mm_node uc_fw;
-};
-
-struct i915_ppgtt {
-	struct i915_address_space vm;
-
-	struct i915_page_directory *pd;
-};
-
-struct gen6_ppgtt {
-	struct i915_ppgtt base;
-
-	struct mutex flush;
-	struct i915_vma *vma;
-	gen6_pte_t __iomem *pd_addr;
-
-	atomic_t pin_count;
-	struct mutex pin_mutex;
-
-	bool scan_for_unused_pt;
-};
-
-#define __to_gen6_ppgtt(base) container_of(base, struct gen6_ppgtt, base)
-
-static inline struct gen6_ppgtt *to_gen6_ppgtt(struct i915_ppgtt *base)
-{
-	BUILD_BUG_ON(offsetof(struct gen6_ppgtt, base));
-	return __to_gen6_ppgtt(base);
-}
-
-/*
- * gen6_for_each_pde() iterates over every pde from start until start+length.
- * If start and start+length are not perfectly divisible, the macro will round
- * down and up as needed. Start=0 and length=2G effectively iterates over
- * every PDE in the system. The macro modifies ALL its parameters except 'pd',
- * so each of the other parameters should preferably be a simple variable, or
- * at most an lvalue with no side-effects!
- */
-#define gen6_for_each_pde(pt, pd, start, length, iter)			\
-	for (iter = gen6_pde_index(start);				\
-	     length > 0 && iter < I915_PDES &&				\
-		     (pt = i915_pt_entry(pd, iter), true);		\
-	     ({ u32 temp = ALIGN(start+1, 1 << GEN6_PDE_SHIFT);		\
-		    temp = min(temp - start, length);			\
-		    start += temp, length -= temp; }), ++iter)
-
-#define gen6_for_all_pdes(pt, pd, iter)					\
-	for (iter = 0;							\
-	     iter < I915_PDES &&					\
-		     (pt = i915_pt_entry(pd, iter), true);		\
-	     ++iter)
-
-static inline u32 i915_pte_index(u64 address, unsigned int pde_shift)
-{
-	const u32 mask = NUM_PTE(pde_shift) - 1;
-
-	return (address >> PAGE_SHIFT) & mask;
-}
-
-/* Helper to counts the number of PTEs within the given length. This count
- * does not cross a page table boundary, so the max value would be
- * GEN6_PTES for GEN6, and GEN8_PTES for GEN8.
-*/
-static inline u32 i915_pte_count(u64 addr, u64 length, unsigned int pde_shift)
-{
-	const u64 mask = ~((1ULL << pde_shift) - 1);
-	u64 end;
-
-	GEM_BUG_ON(length == 0);
-	GEM_BUG_ON(offset_in_page(addr | length));
-
-	end = addr + length;
-
-	if ((addr & mask) != (end & mask))
-		return NUM_PTE(pde_shift) - i915_pte_index(addr, pde_shift);
-
-	return i915_pte_index(end, pde_shift) - i915_pte_index(addr, pde_shift);
-}
-
-static inline u32 i915_pde_index(u64 addr, u32 shift)
-{
-	return (addr >> shift) & I915_PDE_MASK;
-}
-
-static inline u32 gen6_pte_index(u32 addr)
-{
-	return i915_pte_index(addr, GEN6_PDE_SHIFT);
-}
-
-static inline u32 gen6_pte_count(u32 addr, u32 length)
-{
-	return i915_pte_count(addr, length, GEN6_PDE_SHIFT);
-}
-
-static inline u32 gen6_pde_index(u32 addr)
-{
-	return i915_pde_index(addr, GEN6_PDE_SHIFT);
-}
-
-static inline struct i915_page_table *
-i915_pt_entry(const struct i915_page_directory * const pd,
-	      const unsigned short n)
-{
-	return pd->entry[n];
-}
-
-static inline struct i915_page_directory *
-i915_pd_entry(const struct i915_page_directory * const pdp,
-	      const unsigned short n)
-{
-	return pdp->entry[n];
-}
-
-static inline dma_addr_t
-i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n)
-{
-	struct i915_page_dma *pt = ppgtt->pd->entry[n];
-
-	return px_dma(pt ?: px_base(&ppgtt->vm.scratch[ppgtt->vm.top]));
-}
-
-static inline struct i915_ggtt *
-i915_vm_to_ggtt(struct i915_address_space *vm)
-{
-	BUILD_BUG_ON(offsetof(struct i915_ggtt, vm));
-	GEM_BUG_ON(!i915_is_ggtt(vm));
-	return container_of(vm, struct i915_ggtt, vm);
-}
-
-static inline struct i915_ppgtt *
-i915_vm_to_ppgtt(struct i915_address_space *vm)
-{
-	BUILD_BUG_ON(offsetof(struct i915_ppgtt, vm));
-	GEM_BUG_ON(i915_is_ggtt(vm));
-	return container_of(vm, struct i915_ppgtt, vm);
-}
-
-int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv);
-int i915_ggtt_init_hw(struct drm_i915_private *dev_priv);
-int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv);
-void i915_ggtt_enable_guc(struct i915_ggtt *ggtt);
-void i915_ggtt_disable_guc(struct i915_ggtt *ggtt);
-int i915_init_ggtt(struct drm_i915_private *dev_priv);
-void i915_ggtt_driver_release(struct drm_i915_private *dev_priv);
-
-static inline bool i915_ggtt_has_aperture(const struct i915_ggtt *ggtt)
-{
-	return ggtt->mappable_end > 0;
-}
-
-int i915_ppgtt_init_hw(struct intel_gt *gt);
-
-struct i915_ppgtt *i915_ppgtt_create(struct drm_i915_private *dev_priv);
-
-static inline struct i915_address_space *
-i915_vm_get(struct i915_address_space *vm)
-{
-	kref_get(&vm->ref);
-	return vm;
-}
-
-void i915_vm_release(struct kref *kref);
-
-static inline void i915_vm_put(struct i915_address_space *vm)
-{
-	kref_put(&vm->ref, i915_vm_release);
-}
-
-static inline struct i915_address_space *
-i915_vm_open(struct i915_address_space *vm)
-{
-	GEM_BUG_ON(!atomic_read(&vm->open));
-	atomic_inc(&vm->open);
-	return i915_vm_get(vm);
-}
-
-static inline bool
-i915_vm_tryopen(struct i915_address_space *vm)
-{
-	if (atomic_add_unless(&vm->open, 1, 0))
-		return i915_vm_get(vm);
-
-	return false;
-}
-
-void __i915_vm_close(struct i915_address_space *vm);
-
-static inline void
-i915_vm_close(struct i915_address_space *vm)
-{
-	GEM_BUG_ON(!atomic_read(&vm->open));
-	if (atomic_dec_and_test(&vm->open))
-		__i915_vm_close(vm);
-
-	i915_vm_put(vm);
-}
-
-int gen6_ppgtt_pin(struct i915_ppgtt *base);
-void gen6_ppgtt_unpin(struct i915_ppgtt *base);
-void gen6_ppgtt_unpin_all(struct i915_ppgtt *base);
-
-void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv);
-void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv);
+struct i915_address_space;
 
 int __must_check i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
 					    struct sg_table *pages);
@@ -664,6 +46,6 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 #define PIN_GLOBAL		BIT_ULL(10) /* I915_VMA_GLOBAL_BIND */
 #define PIN_USER		BIT_ULL(11) /* I915_VMA_LOCAL_BIND */
 
-#define PIN_OFFSET_MASK		(-I915_GTT_PAGE_SIZE)
+#define PIN_OFFSET_MASK		I915_GTT_PAGE_MASK
 
 #endif
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index fda0977d2059..4c1836f0a991 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -41,6 +41,7 @@
 
 #include "gem/i915_gem_context.h"
 #include "gem/i915_gem_lmem.h"
+#include "gt/intel_gt_pm.h"
 
 #include "i915_drv.h"
 #include "i915_gpu_error.h"
@@ -232,14 +233,13 @@ static void pool_free(struct pagevec *pv, void *addr)
 
 #ifdef CONFIG_DRM_I915_COMPRESS_ERROR
 
-struct compress {
+struct i915_vma_compress {
 	struct pagevec pool;
 	struct z_stream_s zstream;
 	void *tmp;
-	bool wc;
 };
 
-static bool compress_init(struct compress *c)
+static bool compress_init(struct i915_vma_compress *c)
 {
 	struct z_stream_s *zstream = &c->zstream;
 
@@ -261,7 +261,7 @@ static bool compress_init(struct compress *c)
 	return true;
 }
 
-static bool compress_start(struct compress *c)
+static bool compress_start(struct i915_vma_compress *c)
 {
 	struct z_stream_s *zstream = &c->zstream;
 	void *workspace = zstream->workspace;
@@ -272,8 +272,8 @@ static bool compress_start(struct compress *c)
 	return zlib_deflateInit(zstream, Z_DEFAULT_COMPRESSION) == Z_OK;
 }
 
-static void *compress_next_page(struct compress *c,
-				struct drm_i915_error_object *dst)
+static void *compress_next_page(struct i915_vma_compress *c,
+				struct i915_vma_coredump *dst)
 {
 	void *page;
 
@@ -287,14 +287,15 @@ static void *compress_next_page(struct compress *c,
 	return dst->pages[dst->page_count++] = page;
 }
 
-static int compress_page(struct compress *c,
+static int compress_page(struct i915_vma_compress *c,
 			 void *src,
-			 struct drm_i915_error_object *dst)
+			 struct i915_vma_coredump *dst,
+			 bool wc)
 {
 	struct z_stream_s *zstream = &c->zstream;
 
 	zstream->next_in = src;
-	if (c->wc && c->tmp && i915_memcpy_from_wc(c->tmp, src, PAGE_SIZE))
+	if (wc && c->tmp && i915_memcpy_from_wc(c->tmp, src, PAGE_SIZE))
 		zstream->next_in = c->tmp;
 	zstream->avail_in = PAGE_SIZE;
 
@@ -318,8 +319,8 @@ static int compress_page(struct compress *c,
 	return 0;
 }
 
-static int compress_flush(struct compress *c,
-			  struct drm_i915_error_object *dst)
+static int compress_flush(struct i915_vma_compress *c,
+			  struct i915_vma_coredump *dst)
 {
 	struct z_stream_s *zstream = &c->zstream;
 
@@ -347,12 +348,12 @@ end:
 	return 0;
 }
 
-static void compress_finish(struct compress *c)
+static void compress_finish(struct i915_vma_compress *c)
 {
 	zlib_deflateEnd(&c->zstream);
 }
 
-static void compress_fini(struct compress *c)
+static void compress_fini(struct i915_vma_compress *c)
 {
 	kfree(c->zstream.workspace);
 	if (c->tmp)
@@ -367,24 +368,24 @@ static void err_compression_marker(struct drm_i915_error_state_buf *m)
 
 #else
 
-struct compress {
+struct i915_vma_compress {
 	struct pagevec pool;
-	bool wc;
 };
 
-static bool compress_init(struct compress *c)
+static bool compress_init(struct i915_vma_compress *c)
 {
 	return pool_init(&c->pool, ALLOW_FAIL) == 0;
 }
 
-static bool compress_start(struct compress *c)
+static bool compress_start(struct i915_vma_compress *c)
 {
 	return true;
 }
 
-static int compress_page(struct compress *c,
+static int compress_page(struct i915_vma_compress *c,
 			 void *src,
-			 struct drm_i915_error_object *dst)
+			 struct i915_vma_coredump *dst,
+			 bool wc)
 {
 	void *ptr;
 
@@ -392,24 +393,24 @@ static int compress_page(struct compress *c,
 	if (!ptr)
 		return -ENOMEM;
 
-	if (!(c->wc && i915_memcpy_from_wc(ptr, src, PAGE_SIZE)))
+	if (!(wc && i915_memcpy_from_wc(ptr, src, PAGE_SIZE)))
 		memcpy(ptr, src, PAGE_SIZE);
 	dst->pages[dst->page_count++] = ptr;
 
 	return 0;
 }
 
-static int compress_flush(struct compress *c,
-			  struct drm_i915_error_object *dst)
+static int compress_flush(struct i915_vma_compress *c,
+			  struct i915_vma_coredump *dst)
 {
 	return 0;
 }
 
-static void compress_finish(struct compress *c)
+static void compress_finish(struct i915_vma_compress *c)
 {
 }
 
-static void compress_fini(struct compress *c)
+static void compress_fini(struct i915_vma_compress *c)
 {
 	pool_fini(&c->pool);
 }
@@ -422,7 +423,7 @@ static void err_compression_marker(struct drm_i915_error_state_buf *m)
 #endif
 
 static void error_print_instdone(struct drm_i915_error_state_buf *m,
-				 const struct drm_i915_error_engine *ee)
+				 const struct intel_engine_coredump *ee)
 {
 	const struct sseu_dev_info *sseu = &RUNTIME_INFO(m->i915)->sseu;
 	int slice;
@@ -453,40 +454,56 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m,
 
 static void error_print_request(struct drm_i915_error_state_buf *m,
 				const char *prefix,
-				const struct drm_i915_error_request *erq,
-				const unsigned long epoch)
+				const struct i915_request_coredump *erq)
 {
 	if (!erq->seqno)
 		return;
 
-	err_printf(m, "%s pid %d, seqno %8x:%08x%s%s, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n",
+	err_printf(m, "%s pid %d, seqno %8x:%08x%s%s, prio %d, start %08x, head %08x, tail %08x\n",
 		   prefix, erq->pid, erq->context, erq->seqno,
 		   test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
 			    &erq->flags) ? "!" : "",
 		   test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
 			    &erq->flags) ? "+" : "",
 		   erq->sched_attr.priority,
-		   jiffies_to_msecs(erq->jiffies - epoch),
 		   erq->start, erq->head, erq->tail);
 }
 
 static void error_print_context(struct drm_i915_error_state_buf *m,
 				const char *header,
-				const struct drm_i915_error_context *ctx)
+				const struct i915_gem_context_coredump *ctx)
 {
 	err_printf(m, "%s%s[%d] prio %d, guilty %d active %d\n",
 		   header, ctx->comm, ctx->pid, ctx->sched_attr.priority,
 		   ctx->guilty, ctx->active);
 }
 
+static struct i915_vma_coredump *
+__find_vma(struct i915_vma_coredump *vma, const char *name)
+{
+	while (vma) {
+		if (strcmp(vma->name, name) == 0)
+			return vma;
+		vma = vma->next;
+	}
+
+	return NULL;
+}
+
+static struct i915_vma_coredump *
+find_batch(const struct intel_engine_coredump *ee)
+{
+	return __find_vma(ee->vma, "batch");
+}
+
 static void error_print_engine(struct drm_i915_error_state_buf *m,
-			       const struct drm_i915_error_engine *ee,
-			       const unsigned long epoch)
+			       const struct intel_engine_coredump *ee)
 {
+	struct i915_vma_coredump *batch;
 	int n;
 
 	err_printf(m, "%s command stream:\n", ee->engine->name);
-	err_printf(m, "  IDLE?: %s\n", yesno(ee->idle));
+	err_printf(m, "  CCID:  0x%08x\n", ee->ccid);
 	err_printf(m, "  START: 0x%08x\n", ee->start);
 	err_printf(m, "  HEAD:  0x%08x [0x%08x]\n", ee->head, ee->rq_head);
 	err_printf(m, "  TAIL:  0x%08x [0x%08x, 0x%08x]\n",
@@ -501,9 +518,10 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
 
 	error_print_instdone(m, ee);
 
-	if (ee->batchbuffer) {
-		u64 start = ee->batchbuffer->gtt_offset;
-		u64 end = start + ee->batchbuffer->gtt_size;
+	batch = find_batch(ee);
+	if (batch) {
+		u64 start = batch->gtt_offset;
+		u64 end = start + batch->gtt_size;
 
 		err_printf(m, "  batch: [0x%08x_%08x, 0x%08x_%08x]\n",
 			   upper_32_bits(start), lower_32_bits(start),
@@ -535,13 +553,11 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
 				   ee->vm_info.pp_dir_base);
 		}
 	}
-	err_printf(m, "  ring->head: 0x%08x\n", ee->cpu_ring_head);
-	err_printf(m, "  ring->tail: 0x%08x\n", ee->cpu_ring_tail);
 	err_printf(m, "  engine reset count: %u\n", ee->reset_count);
 
 	for (n = 0; n < ee->num_ports; n++) {
 		err_printf(m, "  ELSP[%d]:", n);
-		error_print_request(m, " ", &ee->execlist[n], epoch);
+		error_print_request(m, " ", &ee->execlist[n]);
 	}
 
 	error_print_context(m, "  Active context: ", &ee->context);
@@ -556,38 +572,35 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
 	va_end(args);
 }
 
-static void print_error_obj(struct drm_i915_error_state_buf *m,
+static void print_error_vma(struct drm_i915_error_state_buf *m,
 			    const struct intel_engine_cs *engine,
-			    const char *name,
-			    const struct drm_i915_error_object *obj)
+			    const struct i915_vma_coredump *vma)
 {
 	char out[ASCII85_BUFSZ];
 	int page;
 
-	if (!obj)
+	if (!vma)
 		return;
 
-	if (name) {
-		err_printf(m, "%s --- %s = 0x%08x %08x\n",
-			   engine ? engine->name : "global", name,
-			   upper_32_bits(obj->gtt_offset),
-			   lower_32_bits(obj->gtt_offset));
-	}
+	err_printf(m, "%s --- %s = 0x%08x %08x\n",
+		   engine ? engine->name : "global", vma->name,
+		   upper_32_bits(vma->gtt_offset),
+		   lower_32_bits(vma->gtt_offset));
 
-	if (obj->gtt_page_sizes > I915_GTT_PAGE_SIZE_4K)
-		err_printf(m, "gtt_page_sizes = 0x%08x\n", obj->gtt_page_sizes);
+	if (vma->gtt_page_sizes > I915_GTT_PAGE_SIZE_4K)
+		err_printf(m, "gtt_page_sizes = 0x%08x\n", vma->gtt_page_sizes);
 
 	err_compression_marker(m);
-	for (page = 0; page < obj->page_count; page++) {
+	for (page = 0; page < vma->page_count; page++) {
 		int i, len;
 
 		len = PAGE_SIZE;
-		if (page == obj->page_count - 1)
-			len -= obj->unused;
+		if (page == vma->page_count - 1)
+			len -= vma->unused;
 		len = ascii85_encode_len(len);
 
 		for (i = 0; i < len; i++)
-			err_puts(m, ascii85_encode(obj->pages[page][i], out));
+			err_puts(m, ascii85_encode(vma->pages[page][i], out));
 	}
 	err_puts(m, "\n");
 }
@@ -626,18 +639,13 @@ static void err_print_pciid(struct drm_i915_error_state_buf *m,
 }
 
 static void err_print_uc(struct drm_i915_error_state_buf *m,
-			 const struct i915_error_uc *error_uc)
+			 const struct intel_uc_coredump *error_uc)
 {
 	struct drm_printer p = i915_error_printer(m);
-	const struct i915_gpu_state *error =
-		container_of(error_uc, typeof(*error), uc);
-
-	if (!error->device_info.has_gt_uc)
-		return;
 
 	intel_uc_fw_dump(&error_uc->guc_fw, &p);
 	intel_uc_fw_dump(&error_uc->huc_fw, &p);
-	print_error_obj(m, NULL, "GuC log buffer", error_uc->guc_log);
+	print_error_vma(m, NULL, error_uc->guc_log);
 }
 
 static void err_free_sgl(struct scatterlist *sgl)
@@ -657,12 +665,69 @@ static void err_free_sgl(struct scatterlist *sgl)
 	}
 }
 
+static void err_print_gt(struct drm_i915_error_state_buf *m,
+			 struct intel_gt_coredump *gt)
+{
+	const struct intel_engine_coredump *ee;
+	int i;
+
+	err_printf(m, "GT awake: %s\n", yesno(gt->awake));
+	err_printf(m, "EIR: 0x%08x\n", gt->eir);
+	err_printf(m, "IER: 0x%08x\n", gt->ier);
+	for (i = 0; i < gt->ngtier; i++)
+		err_printf(m, "GTIER[%d]: 0x%08x\n", i, gt->gtier[i]);
+	err_printf(m, "PGTBL_ER: 0x%08x\n", gt->pgtbl_er);
+	err_printf(m, "FORCEWAKE: 0x%08x\n", gt->forcewake);
+	err_printf(m, "DERRMR: 0x%08x\n", gt->derrmr);
+
+	for (i = 0; i < gt->nfence; i++)
+		err_printf(m, "  fence[%d] = %08llx\n", i, gt->fence[i]);
+
+	if (IS_GEN_RANGE(m->i915, 6, 11)) {
+		err_printf(m, "ERROR: 0x%08x\n", gt->error);
+		err_printf(m, "DONE_REG: 0x%08x\n", gt->done_reg);
+	}
+
+	if (INTEL_GEN(m->i915) >= 8)
+		err_printf(m, "FAULT_TLB_DATA: 0x%08x 0x%08x\n",
+			   gt->fault_data1, gt->fault_data0);
+
+	if (IS_GEN(m->i915, 7))
+		err_printf(m, "ERR_INT: 0x%08x\n", gt->err_int);
+
+	if (IS_GEN_RANGE(m->i915, 8, 11))
+		err_printf(m, "GTT_CACHE_EN: 0x%08x\n", gt->gtt_cache);
+
+	if (IS_GEN(m->i915, 12))
+		err_printf(m, "AUX_ERR_DBG: 0x%08x\n", gt->aux_err);
+
+	if (INTEL_GEN(m->i915) >= 12) {
+		int i;
+
+		for (i = 0; i < GEN12_SFC_DONE_MAX; i++)
+			err_printf(m, "  SFC_DONE[%d]: 0x%08x\n", i,
+				   gt->sfc_done[i]);
+
+		err_printf(m, "  GAM_DONE: 0x%08x\n", gt->gam_done);
+	}
+
+	for (ee = gt->engine; ee; ee = ee->next) {
+		const struct i915_vma_coredump *vma;
+
+		error_print_engine(m, ee);
+		for (vma = ee->vma; vma; vma = vma->next)
+			print_error_vma(m, ee->engine, vma);
+	}
+
+	if (gt->uc)
+		err_print_uc(m, gt->uc);
+}
+
 static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
-			       struct i915_gpu_state *error)
+			       struct i915_gpu_coredump *error)
 {
-	const struct drm_i915_error_engine *ee;
+	const struct intel_engine_coredump *ee;
 	struct timespec64 ts;
-	int i, j;
 
 	if (*error->error_msg)
 		err_printf(m, "%s\n", error->error_msg);
@@ -682,7 +747,7 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
 	err_printf(m, "Capture: %lu jiffies; %d ms ago\n",
 		   error->capture, jiffies_to_msecs(jiffies - error->capture));
 
-	for (ee = error->engine; ee; ee = ee->next)
+	for (ee = error->gt ? error->gt->engine : NULL; ee; ee = ee->next)
 		err_printf(m, "Active process (on ring %s): %s [%d]\n",
 			   ee->engine->name,
 			   ee->context.comm,
@@ -708,90 +773,11 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
 			   CSR_VERSION_MINOR(csr->version));
 	}
 
-	err_printf(m, "GT awake: %s\n", yesno(error->awake));
 	err_printf(m, "RPM wakelock: %s\n", yesno(error->wakelock));
 	err_printf(m, "PM suspended: %s\n", yesno(error->suspended));
-	err_printf(m, "EIR: 0x%08x\n", error->eir);
-	err_printf(m, "IER: 0x%08x\n", error->ier);
-	for (i = 0; i < error->ngtier; i++)
-		err_printf(m, "GTIER[%d]: 0x%08x\n", i, error->gtier[i]);
-	err_printf(m, "PGTBL_ER: 0x%08x\n", error->pgtbl_er);
-	err_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake);
-	err_printf(m, "DERRMR: 0x%08x\n", error->derrmr);
-	err_printf(m, "CCID: 0x%08x\n", error->ccid);
-
-	for (i = 0; i < error->nfence; i++)
-		err_printf(m, "  fence[%d] = %08llx\n", i, error->fence[i]);
-
-	if (IS_GEN_RANGE(m->i915, 6, 11)) {
-		err_printf(m, "ERROR: 0x%08x\n", error->error);
-		err_printf(m, "DONE_REG: 0x%08x\n", error->done_reg);
-	}
 
-	if (INTEL_GEN(m->i915) >= 8)
-		err_printf(m, "FAULT_TLB_DATA: 0x%08x 0x%08x\n",
-			   error->fault_data1, error->fault_data0);
-
-	if (IS_GEN(m->i915, 7))
-		err_printf(m, "ERR_INT: 0x%08x\n", error->err_int);
-
-	if (IS_GEN_RANGE(m->i915, 8, 11))
-		err_printf(m, "GTT_CACHE_EN: 0x%08x\n", error->gtt_cache);
-
-	if (IS_GEN(m->i915, 12))
-		err_printf(m, "AUX_ERR_DBG: 0x%08x\n", error->aux_err);
-
-	if (INTEL_GEN(m->i915) >= 12) {
-		int i;
-
-		for (i = 0; i < GEN12_SFC_DONE_MAX; i++)
-			err_printf(m, "  SFC_DONE[%d]: 0x%08x\n", i,
-				   error->sfc_done[i]);
-
-		err_printf(m, "  GAM_DONE: 0x%08x\n", error->gam_done);
-	}
-
-	for (ee = error->engine; ee; ee = ee->next)
-		error_print_engine(m, ee, error->capture);
-
-	for (ee = error->engine; ee; ee = ee->next) {
-		const struct drm_i915_error_object *obj;
-
-		obj = ee->batchbuffer;
-		if (obj) {
-			err_puts(m, ee->engine->name);
-			if (ee->context.pid)
-				err_printf(m, " (submitted by %s [%d])",
-					   ee->context.comm,
-					   ee->context.pid);
-			err_printf(m, " --- gtt_offset = 0x%08x %08x\n",
-				   upper_32_bits(obj->gtt_offset),
-				   lower_32_bits(obj->gtt_offset));
-			print_error_obj(m, ee->engine, NULL, obj);
-		}
-
-		for (j = 0; j < ee->user_bo_count; j++)
-			print_error_obj(m, ee->engine, "user", ee->user_bo[j]);
-
-		if (ee->num_requests) {
-			err_printf(m, "%s --- %d requests\n",
-				   ee->engine->name,
-				   ee->num_requests);
-			for (j = 0; j < ee->num_requests; j++)
-				error_print_request(m, " ",
-						    &ee->requests[j],
-						    error->capture);
-		}
-
-		print_error_obj(m, ee->engine, "ringbuffer", ee->ringbuffer);
-		print_error_obj(m, ee->engine, "HW Status", ee->hws_page);
-		print_error_obj(m, ee->engine, "HW context", ee->ctx);
-		print_error_obj(m, ee->engine, "WA context", ee->wa_ctx);
-		print_error_obj(m, ee->engine,
-				"WA batchbuffer", ee->wa_batchbuffer);
-		print_error_obj(m, ee->engine,
-				"NULL context", ee->default_state);
-	}
+	if (error->gt)
+		err_print_gt(m, error->gt);
 
 	if (error->overlay)
 		intel_overlay_print_error_state(m, error->overlay);
@@ -802,10 +788,9 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
 	err_print_capabilities(m, &error->device_info, &error->runtime_info,
 			       &error->driver_caps);
 	err_print_params(m, &error->params);
-	err_print_uc(m, &error->uc);
 }
 
-static int err_print_to_sgl(struct i915_gpu_state *error)
+static int err_print_to_sgl(struct i915_gpu_coredump *error)
 {
 	struct drm_i915_error_state_buf m;
 
@@ -842,8 +827,8 @@ static int err_print_to_sgl(struct i915_gpu_state *error)
 	return 0;
 }
 
-ssize_t i915_gpu_state_copy_to_buffer(struct i915_gpu_state *error,
-				      char *buf, loff_t off, size_t rem)
+ssize_t i915_gpu_coredump_copy_to_buffer(struct i915_gpu_coredump *error,
+					 char *buf, loff_t off, size_t rem)
 {
 	struct scatterlist *sg;
 	size_t count;
@@ -906,85 +891,88 @@ ssize_t i915_gpu_state_copy_to_buffer(struct i915_gpu_state *error,
 	return count;
 }
 
-static void i915_error_object_free(struct drm_i915_error_object *obj)
+static void i915_vma_coredump_free(struct i915_vma_coredump *vma)
 {
-	int page;
+	while (vma) {
+		struct i915_vma_coredump *next = vma->next;
+		int page;
 
-	if (obj == NULL)
-		return;
+		for (page = 0; page < vma->page_count; page++)
+			free_page((unsigned long)vma->pages[page]);
 
-	for (page = 0; page < obj->page_count; page++)
-		free_page((unsigned long)obj->pages[page]);
-
-	kfree(obj);
+		kfree(vma);
+		vma = next;
+	}
 }
 
-
-static void cleanup_params(struct i915_gpu_state *error)
+static void cleanup_params(struct i915_gpu_coredump *error)
 {
 	i915_params_free(&error->params);
 }
 
-static void cleanup_uc_state(struct i915_gpu_state *error)
+static void cleanup_uc(struct intel_uc_coredump *uc)
 {
-	struct i915_error_uc *error_uc = &error->uc;
+	kfree(uc->guc_fw.path);
+	kfree(uc->huc_fw.path);
+	i915_vma_coredump_free(uc->guc_log);
 
-	kfree(error_uc->guc_fw.path);
-	kfree(error_uc->huc_fw.path);
-	i915_error_object_free(error_uc->guc_log);
+	kfree(uc);
 }
 
-void __i915_gpu_state_free(struct kref *error_ref)
+static void cleanup_gt(struct intel_gt_coredump *gt)
 {
-	struct i915_gpu_state *error =
-		container_of(error_ref, typeof(*error), ref);
-	long i;
+	while (gt->engine) {
+		struct intel_engine_coredump *ee = gt->engine;
+
+		gt->engine = ee->next;
+
+		i915_vma_coredump_free(ee->vma);
+		kfree(ee);
+	}
 
-	while (error->engine) {
-		struct drm_i915_error_engine *ee = error->engine;
+	if (gt->uc)
+		cleanup_uc(gt->uc);
 
-		error->engine = ee->next;
+	kfree(gt);
+}
 
-		for (i = 0; i < ee->user_bo_count; i++)
-			i915_error_object_free(ee->user_bo[i]);
-		kfree(ee->user_bo);
+void __i915_gpu_coredump_free(struct kref *error_ref)
+{
+	struct i915_gpu_coredump *error =
+		container_of(error_ref, typeof(*error), ref);
 
-		i915_error_object_free(ee->batchbuffer);
-		i915_error_object_free(ee->wa_batchbuffer);
-		i915_error_object_free(ee->ringbuffer);
-		i915_error_object_free(ee->hws_page);
-		i915_error_object_free(ee->ctx);
-		i915_error_object_free(ee->wa_ctx);
+	while (error->gt) {
+		struct intel_gt_coredump *gt = error->gt;
 
-		kfree(ee->requests);
-		kfree(ee);
+		error->gt = gt->next;
+		cleanup_gt(gt);
 	}
 
 	kfree(error->overlay);
 	kfree(error->display);
 
 	cleanup_params(error);
-	cleanup_uc_state(error);
 
 	err_free_sgl(error->sgl);
 	kfree(error);
 }
 
-static struct drm_i915_error_object *
-i915_error_object_create(struct drm_i915_private *i915,
-			 struct i915_vma *vma,
-			 struct compress *compress)
+static struct i915_vma_coredump *
+i915_vma_coredump_create(const struct intel_gt *gt,
+			 const struct i915_vma *vma,
+			 const char *name,
+			 struct i915_vma_compress *compress)
 {
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct i915_ggtt *ggtt = gt->ggtt;
 	const u64 slot = ggtt->error_capture.start;
-	struct drm_i915_error_object *dst;
+	struct i915_vma_coredump *dst;
 	unsigned long num_pages;
 	struct sgt_iter iter;
 	int ret;
 
 	might_sleep();
 
-	if (!vma || !vma->pages)
+	if (!vma || !vma->pages || !compress)
 		return NULL;
 
 	num_pages = min_t(u64, vma->size, vma->obj->base.size) >> PAGE_SHIFT;
@@ -998,6 +986,9 @@ i915_error_object_create(struct drm_i915_private *i915,
 		return NULL;
 	}
 
+	strcpy(dst->name, name);
+	dst->next = NULL;
+
 	dst->gtt_offset = vma->node.start;
 	dst->gtt_size = vma->node.size;
 	dst->gtt_page_sizes = vma->page_sizes.gtt;
@@ -1005,9 +996,6 @@ i915_error_object_create(struct drm_i915_private *i915,
 	dst->page_count = 0;
 	dst->unused = 0;
 
-	compress->wc = i915_gem_object_is_lmem(vma->obj) ||
-		       drm_mm_node_allocated(&ggtt->error_capture);
-
 	ret = -EINVAL;
 	if (drm_mm_node_allocated(&ggtt->error_capture)) {
 		void __iomem *s;
@@ -1016,9 +1004,12 @@ i915_error_object_create(struct drm_i915_private *i915,
 		for_each_sgt_daddr(dma, iter, vma->pages) {
 			ggtt->vm.insert_page(&ggtt->vm, dma, slot,
 					     I915_CACHE_NONE, 0);
+			mb();
 
 			s = io_mapping_map_wc(&ggtt->iomap, slot, PAGE_SIZE);
-			ret = compress_page(compress, (void  __force *)s, dst);
+			ret = compress_page(compress,
+					    (void  __force *)s, dst,
+					    true);
 			io_mapping_unmap(s);
 			if (ret)
 				break;
@@ -1031,7 +1022,9 @@ i915_error_object_create(struct drm_i915_private *i915,
 			void __iomem *s;
 
 			s = io_mapping_map_wc(&mem->iomap, dma, PAGE_SIZE);
-			ret = compress_page(compress, (void __force *)s, dst);
+			ret = compress_page(compress,
+					    (void __force *)s, dst,
+					    true);
 			io_mapping_unmap(s);
 			if (ret)
 				break;
@@ -1045,7 +1038,7 @@ i915_error_object_create(struct drm_i915_private *i915,
 			drm_clflush_pages(&page, 1);
 
 			s = kmap(page);
-			ret = compress_page(compress, s, dst);
+			ret = compress_page(compress, s, dst, false);
 			kunmap(page);
 
 			drm_clflush_pages(&page, 1);
@@ -1066,77 +1059,56 @@ i915_error_object_create(struct drm_i915_private *i915,
 	return dst;
 }
 
-/*
- * Generate a semi-unique error code. The code is not meant to have meaning, The
- * code's only purpose is to try to prevent false duplicated bug reports by
- * grossly estimating a GPU error state.
- *
- * TODO Ideally, hashing the batchbuffer would be a very nice way to determine
- * the hang if we could strip the GTT offset information from it.
- *
- * It's only a small step better than a random number in its current form.
- */
-static u32 i915_error_generate_code(struct i915_gpu_state *error)
-{
-	const struct drm_i915_error_engine *ee = error->engine;
-
-	/*
-	 * IPEHR would be an ideal way to detect errors, as it's the gross
-	 * measure of "the command that hung." However, has some very common
-	 * synchronization commands which almost always appear in the case
-	 * strictly a client bug. Use instdone to differentiate those some.
-	 */
-	return ee ? ee->ipehr ^ ee->instdone.instdone : 0;
-}
-
-static void gem_record_fences(struct i915_gpu_state *error)
+static void gt_record_fences(struct intel_gt_coredump *gt)
 {
-	struct drm_i915_private *dev_priv = error->i915;
-	struct intel_uncore *uncore = &dev_priv->uncore;
+	struct i915_ggtt *ggtt = gt->_gt->ggtt;
+	struct intel_uncore *uncore = gt->_gt->uncore;
 	int i;
 
-	if (INTEL_GEN(dev_priv) >= 6) {
-		for (i = 0; i < dev_priv->ggtt.num_fences; i++)
-			error->fence[i] =
+	if (INTEL_GEN(uncore->i915) >= 6) {
+		for (i = 0; i < ggtt->num_fences; i++)
+			gt->fence[i] =
 				intel_uncore_read64(uncore,
 						    FENCE_REG_GEN6_LO(i));
-	} else if (INTEL_GEN(dev_priv) >= 4) {
-		for (i = 0; i < dev_priv->ggtt.num_fences; i++)
-			error->fence[i] =
+	} else if (INTEL_GEN(uncore->i915) >= 4) {
+		for (i = 0; i < ggtt->num_fences; i++)
+			gt->fence[i] =
 				intel_uncore_read64(uncore,
 						    FENCE_REG_965_LO(i));
 	} else {
-		for (i = 0; i < dev_priv->ggtt.num_fences; i++)
-			error->fence[i] =
+		for (i = 0; i < ggtt->num_fences; i++)
+			gt->fence[i] =
 				intel_uncore_read(uncore, FENCE_REG(i));
 	}
-	error->nfence = i;
+	gt->nfence = i;
 }
 
-static void error_record_engine_registers(struct i915_gpu_state *error,
-					  struct intel_engine_cs *engine,
-					  struct drm_i915_error_engine *ee)
+static void engine_record_registers(struct intel_engine_coredump *ee)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
+	const struct intel_engine_cs *engine = ee->engine;
+	struct drm_i915_private *i915 = engine->i915;
 
-	if (INTEL_GEN(dev_priv) >= 6) {
+	if (INTEL_GEN(i915) >= 6) {
 		ee->rc_psmi = ENGINE_READ(engine, RING_PSMI_CTL);
 
-		if (INTEL_GEN(dev_priv) >= 12)
-			ee->fault_reg = I915_READ(GEN12_RING_FAULT_REG);
-		else if (INTEL_GEN(dev_priv) >= 8)
-			ee->fault_reg = I915_READ(GEN8_RING_FAULT_REG);
+		if (INTEL_GEN(i915) >= 12)
+			ee->fault_reg = intel_uncore_read(engine->uncore,
+							  GEN12_RING_FAULT_REG);
+		else if (INTEL_GEN(i915) >= 8)
+			ee->fault_reg = intel_uncore_read(engine->uncore,
+							  GEN8_RING_FAULT_REG);
 		else
 			ee->fault_reg = GEN6_RING_FAULT_REG_READ(engine);
 	}
 
-	if (INTEL_GEN(dev_priv) >= 4) {
+	if (INTEL_GEN(i915) >= 4) {
 		ee->faddr = ENGINE_READ(engine, RING_DMA_FADD);
 		ee->ipeir = ENGINE_READ(engine, RING_IPEIR);
 		ee->ipehr = ENGINE_READ(engine, RING_IPEHR);
 		ee->instps = ENGINE_READ(engine, RING_INSTPS);
 		ee->bbaddr = ENGINE_READ(engine, RING_BBADDR);
-		if (INTEL_GEN(dev_priv) >= 8) {
+		ee->ccid = ENGINE_READ(engine, CCID);
+		if (INTEL_GEN(i915) >= 8) {
 			ee->faddr |= (u64)ENGINE_READ(engine, RING_DMA_FADD_UDW) << 32;
 			ee->bbaddr |= (u64)ENGINE_READ(engine, RING_BBADDR_UDW) << 32;
 		}
@@ -1155,13 +1127,13 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
 	ee->head = ENGINE_READ(engine, RING_HEAD);
 	ee->tail = ENGINE_READ(engine, RING_TAIL);
 	ee->ctl = ENGINE_READ(engine, RING_CTL);
-	if (INTEL_GEN(dev_priv) > 2)
+	if (INTEL_GEN(i915) > 2)
 		ee->mode = ENGINE_READ(engine, RING_MI_MODE);
 
-	if (!HWS_NEEDS_PHYSICAL(dev_priv)) {
+	if (!HWS_NEEDS_PHYSICAL(i915)) {
 		i915_reg_t mmio;
 
-		if (IS_GEN(dev_priv, 7)) {
+		if (IS_GEN(i915, 7)) {
 			switch (engine->id) {
 			default:
 				MISSING_CASE(engine->id);
@@ -1186,40 +1158,40 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
 			mmio = RING_HWS_PGA(engine->mmio_base);
 		}
 
-		ee->hws = I915_READ(mmio);
+		ee->hws = intel_uncore_read(engine->uncore, mmio);
 	}
 
-	ee->idle = intel_engine_is_idle(engine);
-	ee->reset_count = i915_reset_engine_count(&dev_priv->gpu_error,
-						  engine);
+	ee->reset_count = i915_reset_engine_count(&i915->gpu_error, engine);
 
-	if (HAS_PPGTT(dev_priv)) {
+	if (HAS_PPGTT(i915)) {
 		int i;
 
 		ee->vm_info.gfx_mode = ENGINE_READ(engine, RING_MODE_GEN7);
 
-		if (IS_GEN(dev_priv, 6)) {
+		if (IS_GEN(i915, 6)) {
 			ee->vm_info.pp_dir_base =
 				ENGINE_READ(engine, RING_PP_DIR_BASE_READ);
-		} else if (IS_GEN(dev_priv, 7)) {
+		} else if (IS_GEN(i915, 7)) {
 			ee->vm_info.pp_dir_base =
 				ENGINE_READ(engine, RING_PP_DIR_BASE);
-		} else if (INTEL_GEN(dev_priv) >= 8) {
+		} else if (INTEL_GEN(i915) >= 8) {
 			u32 base = engine->mmio_base;
 
 			for (i = 0; i < 4; i++) {
 				ee->vm_info.pdp[i] =
-					I915_READ(GEN8_RING_PDP_UDW(base, i));
+					intel_uncore_read(engine->uncore,
+							  GEN8_RING_PDP_UDW(base, i));
 				ee->vm_info.pdp[i] <<= 32;
 				ee->vm_info.pdp[i] |=
-					I915_READ(GEN8_RING_PDP_LDW(base, i));
+					intel_uncore_read(engine->uncore,
+							  GEN8_RING_PDP_LDW(base, i));
 			}
 		}
 	}
 }
 
 static void record_request(const struct i915_request *request,
-			   struct drm_i915_error_request *erq)
+			   struct i915_request_coredump *erq)
 {
 	const struct i915_gem_context *ctx;
 
@@ -1227,7 +1199,6 @@ static void record_request(const struct i915_request *request,
 	erq->context = request->fence.context;
 	erq->seqno = request->fence.seqno;
 	erq->sched_attr = request->sched.attr;
-	erq->jiffies = request->emitted_jiffies;
 	erq->start = i915_ggtt_offset(request->ring->vma);
 	erq->head = request->head;
 	erq->tail = request->tail;
@@ -1240,59 +1211,10 @@ static void record_request(const struct i915_request *request,
 	rcu_read_unlock();
 }
 
-static void engine_record_requests(struct intel_engine_cs *engine,
-				   struct i915_request *first,
-				   struct drm_i915_error_engine *ee)
+static void engine_record_execlists(struct intel_engine_coredump *ee)
 {
-	struct i915_request *request;
-	int count;
-
-	count = 0;
-	request = first;
-	list_for_each_entry_from(request, &engine->active.requests, sched.link)
-		count++;
-	if (!count)
-		return;
-
-	ee->requests = kcalloc(count, sizeof(*ee->requests), ATOMIC_MAYFAIL);
-	if (!ee->requests)
-		return;
-
-	ee->num_requests = count;
-
-	count = 0;
-	request = first;
-	list_for_each_entry_from(request,
-				 &engine->active.requests, sched.link) {
-		if (count >= ee->num_requests) {
-			/*
-			 * If the ring request list was changed in
-			 * between the point where the error request
-			 * list was created and dimensioned and this
-			 * point then just exit early to avoid crashes.
-			 *
-			 * We don't need to communicate that the
-			 * request list changed state during error
-			 * state capture and that the error state is
-			 * slightly incorrect as a consequence since we
-			 * are typically only interested in the request
-			 * list state at the point of error state
-			 * capture, not in any changes happening during
-			 * the capture.
-			 */
-			break;
-		}
-
-		record_request(request, &ee->requests[count++]);
-	}
-	ee->num_requests = count;
-}
-
-static void error_record_engine_execlists(const struct intel_engine_cs *engine,
-					  struct drm_i915_error_engine *ee)
-{
-	const struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct i915_request * const *port = execlists->active;
+	const struct intel_engine_execlists * const el = &ee->engine->execlists;
+	struct i915_request * const *port = el->active;
 	unsigned int n = 0;
 
 	while (*port)
@@ -1301,7 +1223,7 @@ static void error_record_engine_execlists(const struct intel_engine_cs *engine,
 	ee->num_ports = n;
 }
 
-static bool record_context(struct drm_i915_error_context *e,
+static bool record_context(struct i915_gem_context_coredump *e,
 			   const struct i915_request *rq)
 {
 	struct i915_gem_context *ctx;
@@ -1334,23 +1256,24 @@ static bool record_context(struct drm_i915_error_context *e,
 	return capture;
 }
 
-struct capture_vma {
-	struct capture_vma *next;
-	void **slot;
+struct intel_engine_capture_vma {
+	struct intel_engine_capture_vma *next;
+	struct i915_vma *vma;
+	char name[16];
 };
 
-static struct capture_vma *
-capture_vma(struct capture_vma *next,
+static struct intel_engine_capture_vma *
+capture_vma(struct intel_engine_capture_vma *next,
 	    struct i915_vma *vma,
-	    struct drm_i915_error_object **out)
+	    const char *name,
+	    gfp_t gfp)
 {
-	struct capture_vma *c;
+	struct intel_engine_capture_vma *c;
 
-	*out = NULL;
 	if (!vma)
 		return next;
 
-	c = kmalloc(sizeof(*c), ATOMIC_MAYFAIL);
+	c = kmalloc(sizeof(*c), gfp);
 	if (!c)
 		return next;
 
@@ -1359,54 +1282,31 @@ capture_vma(struct capture_vma *next,
 		return next;
 	}
 
-	c->slot = (void **)out;
-	*c->slot = i915_vma_get(vma);
+	strcpy(c->name, name);
+	c->vma = i915_vma_get(vma);
 
 	c->next = next;
 	return c;
 }
 
-static struct capture_vma *
-request_record_user_bo(struct i915_request *request,
-		       struct drm_i915_error_engine *ee,
-		       struct capture_vma *capture)
+static struct intel_engine_capture_vma *
+capture_user(struct intel_engine_capture_vma *capture,
+	     const struct i915_request *rq,
+	     gfp_t gfp)
 {
 	struct i915_capture_list *c;
-	struct drm_i915_error_object **bo;
-	long count, max;
-
-	max = 0;
-	for (c = request->capture_list; c; c = c->next)
-		max++;
-	if (!max)
-		return capture;
-
-	bo = kmalloc_array(max, sizeof(*bo), ATOMIC_MAYFAIL);
-	if (!bo) {
-		/* If we can't capture everything, try to capture something. */
-		max = min_t(long, max, PAGE_SIZE / sizeof(*bo));
-		bo = kmalloc_array(max, sizeof(*bo), ATOMIC_MAYFAIL);
-	}
-	if (!bo)
-		return capture;
 
-	count = 0;
-	for (c = request->capture_list; c; c = c->next) {
-		capture = capture_vma(capture, c->vma, &bo[count]);
-		if (++count == max)
-			break;
-	}
-
-	ee->user_bo = bo;
-	ee->user_bo_count = count;
+	for (c = rq->capture_list; c; c = c->next)
+		capture = capture_vma(capture, c->vma, "user", gfp);
 
 	return capture;
 }
 
-static struct drm_i915_error_object *
-capture_object(struct drm_i915_private *dev_priv,
+static struct i915_vma_coredump *
+capture_object(const struct intel_gt *gt,
 	       struct drm_i915_gem_object *obj,
-	       struct compress *compress)
+	       const char *name,
+	       struct i915_vma_compress *compress)
 {
 	if (obj && i915_gem_object_has_pages(obj)) {
 		struct i915_vma fake = {
@@ -1416,127 +1316,175 @@ capture_object(struct drm_i915_private *dev_priv,
 			.obj = obj,
 		};
 
-		return i915_error_object_create(dev_priv, &fake, compress);
+		return i915_vma_coredump_create(gt, &fake, name, compress);
 	} else {
 		return NULL;
 	}
 }
 
-static void
-gem_record_rings(struct i915_gpu_state *error, struct compress *compress)
+static void add_vma(struct intel_engine_coredump *ee,
+		    struct i915_vma_coredump *vma)
 {
-	struct drm_i915_private *i915 = error->i915;
-	struct intel_engine_cs *engine;
-	struct drm_i915_error_engine *ee;
+	if (vma) {
+		vma->next = ee->vma;
+		ee->vma = vma;
+	}
+}
+
+struct intel_engine_coredump *
+intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
+{
+	struct intel_engine_coredump *ee;
 
-	ee = kzalloc(sizeof(*ee), GFP_KERNEL);
+	ee = kzalloc(sizeof(*ee), gfp);
 	if (!ee)
-		return;
+		return NULL;
 
-	for_each_uabi_engine(engine, i915) {
-		struct capture_vma *capture = NULL;
-		struct i915_request *request;
-		unsigned long flags;
+	ee->engine = engine;
 
-		/* Refill our page pool before entering atomic section */
-		pool_refill(&compress->pool, ALLOW_FAIL);
+	engine_record_registers(ee);
+	engine_record_execlists(ee);
 
-		spin_lock_irqsave(&engine->active.lock, flags);
-		request = intel_engine_find_active_request(engine);
-		if (!request) {
-			spin_unlock_irqrestore(&engine->active.lock, flags);
-			continue;
-		}
+	return ee;
+}
 
-		error->simulated |= record_context(&ee->context, request);
+struct intel_engine_capture_vma *
+intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
+				  struct i915_request *rq,
+				  gfp_t gfp)
+{
+	struct intel_engine_capture_vma *vma = NULL;
 
-		/*
-		 * We need to copy these to an anonymous buffer
-		 * as the simplest method to avoid being overwritten
-		 * by userspace.
-		 */
-		capture = capture_vma(capture,
-				      request->batch,
-				      &ee->batchbuffer);
+	ee->simulated |= record_context(&ee->context, rq);
+	if (ee->simulated)
+		return NULL;
 
-		if (HAS_BROKEN_CS_TLB(i915))
-			capture = capture_vma(capture,
-					      engine->gt->scratch,
-					      &ee->wa_batchbuffer);
+	/*
+	 * We need to copy these to an anonymous buffer
+	 * as the simplest method to avoid being overwritten
+	 * by userspace.
+	 */
+	vma = capture_vma(vma, rq->batch, "batch", gfp);
+	vma = capture_user(vma, rq, gfp);
+	vma = capture_vma(vma, rq->ring->vma, "ring", gfp);
+	vma = capture_vma(vma, rq->context->state, "HW context", gfp);
 
-		capture = request_record_user_bo(request, ee, capture);
+	ee->rq_head = rq->head;
+	ee->rq_post = rq->postfix;
+	ee->rq_tail = rq->tail;
 
-		capture = capture_vma(capture,
-				      request->context->state,
-				      &ee->ctx);
+	return vma;
+}
 
-		capture = capture_vma(capture,
-				      request->ring->vma,
-				      &ee->ringbuffer);
+void
+intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
+			      struct intel_engine_capture_vma *capture,
+			      struct i915_vma_compress *compress)
+{
+	const struct intel_engine_cs *engine = ee->engine;
 
-		ee->cpu_ring_head = request->ring->head;
-		ee->cpu_ring_tail = request->ring->tail;
+	while (capture) {
+		struct intel_engine_capture_vma *this = capture;
+		struct i915_vma *vma = this->vma;
 
-		ee->rq_head = request->head;
-		ee->rq_post = request->postfix;
-		ee->rq_tail = request->tail;
+		add_vma(ee,
+			i915_vma_coredump_create(engine->gt,
+						 vma, this->name,
+						 compress));
 
-		engine_record_requests(engine, request, ee);
-		spin_unlock_irqrestore(&engine->active.lock, flags);
+		i915_active_release(&vma->active);
+		i915_vma_put(vma);
 
-		error_record_engine_registers(error, engine, ee);
-		error_record_engine_execlists(engine, ee);
+		capture = this->next;
+		kfree(this);
+	}
 
-		while (capture) {
-			struct capture_vma *this = capture;
-			struct i915_vma *vma = *this->slot;
+	add_vma(ee,
+		i915_vma_coredump_create(engine->gt,
+					 engine->status_page.vma,
+					 "HW Status",
+					 compress));
 
-			*this->slot =
-				i915_error_object_create(i915, vma, compress);
+	add_vma(ee,
+		i915_vma_coredump_create(engine->gt,
+					 engine->wa_ctx.vma,
+					 "WA context",
+					 compress));
 
-			i915_active_release(&vma->active);
-			i915_vma_put(vma);
+	add_vma(ee,
+		capture_object(engine->gt,
+			       engine->default_state,
+			       "NULL context",
+			       compress));
+}
 
-			capture = this->next;
-			kfree(this);
-		}
+static struct intel_engine_coredump *
+capture_engine(struct intel_engine_cs *engine,
+	       struct i915_vma_compress *compress)
+{
+	struct intel_engine_capture_vma *capture = NULL;
+	struct intel_engine_coredump *ee;
+	struct i915_request *rq;
+	unsigned long flags;
 
-		ee->hws_page =
-			i915_error_object_create(i915,
-						 engine->status_page.vma,
-						 compress);
+	ee = intel_engine_coredump_alloc(engine, GFP_KERNEL);
+	if (!ee)
+		return NULL;
+
+	spin_lock_irqsave(&engine->active.lock, flags);
+	rq = intel_engine_find_active_request(engine);
+	if (rq)
+		capture = intel_engine_coredump_add_request(ee, rq,
+							    ATOMIC_MAYFAIL);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
+	if (!capture) {
+		kfree(ee);
+		return NULL;
+	}
 
-		ee->wa_ctx =
-			i915_error_object_create(i915,
-						 engine->wa_ctx.vma,
-						 compress);
+	intel_engine_coredump_add_vma(ee, capture, compress);
 
-		ee->default_state =
-			capture_object(i915, engine->default_state, compress);
+	return ee;
+}
 
-		ee->engine = engine;
+static void
+gt_record_engines(struct intel_gt_coredump *gt,
+		  struct i915_vma_compress *compress)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
 
-		ee->next = error->engine;
-		error->engine = ee;
+	for_each_engine(engine, gt->_gt, id) {
+		struct intel_engine_coredump *ee;
 
-		ee = kzalloc(sizeof(*ee), GFP_KERNEL);
+		/* Refill our page pool before entering atomic section */
+		pool_refill(&compress->pool, ALLOW_FAIL);
+
+		ee = capture_engine(engine, compress);
 		if (!ee)
-			return;
-	}
+			continue;
 
-	kfree(ee);
+		gt->simulated |= ee->simulated;
+		if (ee->simulated) {
+			kfree(ee);
+			continue;
+		}
+
+		ee->next = gt->engine;
+		gt->engine = ee;
+	}
 }
 
-static void
-capture_uc_state(struct i915_gpu_state *error, struct compress *compress)
+static struct intel_uc_coredump *
+gt_record_uc(struct intel_gt_coredump *gt,
+	     struct i915_vma_compress *compress)
 {
-	struct drm_i915_private *i915 = error->i915;
-	struct i915_error_uc *error_uc = &error->uc;
-	struct intel_uc *uc = &i915->gt.uc;
+	const struct intel_uc *uc = &gt->_gt->uc;
+	struct intel_uc_coredump *error_uc;
 
-	/* Capturing uC state won't be useful if there is no GuC */
-	if (!error->device_info.has_gt_uc)
-		return;
+	error_uc = kzalloc(sizeof(*error_uc), ALLOW_FAIL);
+	if (!error_uc)
+		return NULL;
 
 	memcpy(&error_uc->guc_fw, &uc->guc.fw, sizeof(uc->guc.fw));
 	memcpy(&error_uc->huc_fw, &uc->huc.fw, sizeof(uc->huc.fw));
@@ -1547,19 +1495,42 @@ capture_uc_state(struct i915_gpu_state *error, struct compress *compress)
 	 */
 	error_uc->guc_fw.path = kstrdup(uc->guc.fw.path, ALLOW_FAIL);
 	error_uc->huc_fw.path = kstrdup(uc->huc.fw.path, ALLOW_FAIL);
-	error_uc->guc_log = i915_error_object_create(i915,
-						     uc->guc.log.vma,
-						     compress);
+	error_uc->guc_log =
+		i915_vma_coredump_create(gt->_gt,
+					 uc->guc.log.vma, "GuC log buffer",
+					 compress);
+
+	return error_uc;
+}
+
+static void gt_capture_prepare(struct intel_gt_coredump *gt)
+{
+	struct i915_ggtt *ggtt = gt->_gt->ggtt;
+
+	mutex_lock(&ggtt->error_mutex);
+}
+
+static void gt_capture_finish(struct intel_gt_coredump *gt)
+{
+	struct i915_ggtt *ggtt = gt->_gt->ggtt;
+
+	if (drm_mm_node_allocated(&ggtt->error_capture))
+		ggtt->vm.clear_range(&ggtt->vm,
+				     ggtt->error_capture.start,
+				     PAGE_SIZE);
+
+	mutex_unlock(&ggtt->error_mutex);
 }
 
 /* Capture all registers which don't fit into another category. */
-static void capture_reg_state(struct i915_gpu_state *error)
+static void gt_record_regs(struct intel_gt_coredump *gt)
 {
-	struct drm_i915_private *i915 = error->i915;
-	struct intel_uncore *uncore = &i915->uncore;
+	struct intel_uncore *uncore = gt->_gt->uncore;
+	struct drm_i915_private *i915 = uncore->i915;
 	int i;
 
-	/* General organization
+	/*
+	 * General organization
 	 * 1. Registers specific to a single generation
 	 * 2. Registers which belong to multiple generations
 	 * 3. Feature specific registers.
@@ -1569,138 +1540,162 @@ static void capture_reg_state(struct i915_gpu_state *error)
 
 	/* 1: Registers specific to a single generation */
 	if (IS_VALLEYVIEW(i915)) {
-		error->gtier[0] = intel_uncore_read(uncore, GTIER);
-		error->ier = intel_uncore_read(uncore, VLV_IER);
-		error->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE_VLV);
+		gt->gtier[0] = intel_uncore_read(uncore, GTIER);
+		gt->ier = intel_uncore_read(uncore, VLV_IER);
+		gt->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE_VLV);
 	}
 
 	if (IS_GEN(i915, 7))
-		error->err_int = intel_uncore_read(uncore, GEN7_ERR_INT);
+		gt->err_int = intel_uncore_read(uncore, GEN7_ERR_INT);
 
 	if (INTEL_GEN(i915) >= 12) {
-		error->fault_data0 = intel_uncore_read(uncore,
-						       GEN12_FAULT_TLB_DATA0);
-		error->fault_data1 = intel_uncore_read(uncore,
-						       GEN12_FAULT_TLB_DATA1);
+		gt->fault_data0 = intel_uncore_read(uncore,
+						    GEN12_FAULT_TLB_DATA0);
+		gt->fault_data1 = intel_uncore_read(uncore,
+						    GEN12_FAULT_TLB_DATA1);
 	} else if (INTEL_GEN(i915) >= 8) {
-		error->fault_data0 = intel_uncore_read(uncore,
-						       GEN8_FAULT_TLB_DATA0);
-		error->fault_data1 = intel_uncore_read(uncore,
-						       GEN8_FAULT_TLB_DATA1);
+		gt->fault_data0 = intel_uncore_read(uncore,
+						    GEN8_FAULT_TLB_DATA0);
+		gt->fault_data1 = intel_uncore_read(uncore,
+						    GEN8_FAULT_TLB_DATA1);
 	}
 
 	if (IS_GEN(i915, 6)) {
-		error->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE);
-		error->gab_ctl = intel_uncore_read(uncore, GAB_CTL);
-		error->gfx_mode = intel_uncore_read(uncore, GFX_MODE);
+		gt->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE);
+		gt->gab_ctl = intel_uncore_read(uncore, GAB_CTL);
+		gt->gfx_mode = intel_uncore_read(uncore, GFX_MODE);
 	}
 
 	/* 2: Registers which belong to multiple generations */
 	if (INTEL_GEN(i915) >= 7)
-		error->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE_MT);
+		gt->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE_MT);
 
 	if (INTEL_GEN(i915) >= 6) {
-		error->derrmr = intel_uncore_read(uncore, DERRMR);
+		gt->derrmr = intel_uncore_read(uncore, DERRMR);
 		if (INTEL_GEN(i915) < 12) {
-			error->error = intel_uncore_read(uncore, ERROR_GEN6);
-			error->done_reg = intel_uncore_read(uncore, DONE_REG);
+			gt->error = intel_uncore_read(uncore, ERROR_GEN6);
+			gt->done_reg = intel_uncore_read(uncore, DONE_REG);
 		}
 	}
 
-	if (INTEL_GEN(i915) >= 5)
-		error->ccid = intel_uncore_read(uncore, CCID(RENDER_RING_BASE));
-
 	/* 3: Feature specific registers */
 	if (IS_GEN_RANGE(i915, 6, 7)) {
-		error->gam_ecochk = intel_uncore_read(uncore, GAM_ECOCHK);
-		error->gac_eco = intel_uncore_read(uncore, GAC_ECO_BITS);
+		gt->gam_ecochk = intel_uncore_read(uncore, GAM_ECOCHK);
+		gt->gac_eco = intel_uncore_read(uncore, GAC_ECO_BITS);
 	}
 
 	if (IS_GEN_RANGE(i915, 8, 11))
-		error->gtt_cache = intel_uncore_read(uncore, HSW_GTT_CACHE_EN);
+		gt->gtt_cache = intel_uncore_read(uncore, HSW_GTT_CACHE_EN);
 
 	if (IS_GEN(i915, 12))
-		error->aux_err = intel_uncore_read(uncore, GEN12_AUX_ERR_DBG);
+		gt->aux_err = intel_uncore_read(uncore, GEN12_AUX_ERR_DBG);
 
 	if (INTEL_GEN(i915) >= 12) {
 		for (i = 0; i < GEN12_SFC_DONE_MAX; i++) {
-			error->sfc_done[i] =
+			gt->sfc_done[i] =
 				intel_uncore_read(uncore, GEN12_SFC_DONE(i));
 		}
 
-		error->gam_done = intel_uncore_read(uncore, GEN12_GAM_DONE);
+		gt->gam_done = intel_uncore_read(uncore, GEN12_GAM_DONE);
 	}
 
 	/* 4: Everything else */
 	if (INTEL_GEN(i915) >= 11) {
-		error->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER);
-		error->gtier[0] =
+		gt->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER);
+		gt->gtier[0] =
 			intel_uncore_read(uncore,
 					  GEN11_RENDER_COPY_INTR_ENABLE);
-		error->gtier[1] =
+		gt->gtier[1] =
 			intel_uncore_read(uncore, GEN11_VCS_VECS_INTR_ENABLE);
-		error->gtier[2] =
+		gt->gtier[2] =
 			intel_uncore_read(uncore, GEN11_GUC_SG_INTR_ENABLE);
-		error->gtier[3] =
+		gt->gtier[3] =
 			intel_uncore_read(uncore,
 					  GEN11_GPM_WGBOXPERF_INTR_ENABLE);
-		error->gtier[4] =
+		gt->gtier[4] =
 			intel_uncore_read(uncore,
 					  GEN11_CRYPTO_RSVD_INTR_ENABLE);
-		error->gtier[5] =
+		gt->gtier[5] =
 			intel_uncore_read(uncore,
 					  GEN11_GUNIT_CSME_INTR_ENABLE);
-		error->ngtier = 6;
+		gt->ngtier = 6;
 	} else if (INTEL_GEN(i915) >= 8) {
-		error->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER);
+		gt->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER);
 		for (i = 0; i < 4; i++)
-			error->gtier[i] = intel_uncore_read(uncore,
-							    GEN8_GT_IER(i));
-		error->ngtier = 4;
+			gt->gtier[i] =
+				intel_uncore_read(uncore, GEN8_GT_IER(i));
+		gt->ngtier = 4;
 	} else if (HAS_PCH_SPLIT(i915)) {
-		error->ier = intel_uncore_read(uncore, DEIER);
-		error->gtier[0] = intel_uncore_read(uncore, GTIER);
-		error->ngtier = 1;
+		gt->ier = intel_uncore_read(uncore, DEIER);
+		gt->gtier[0] = intel_uncore_read(uncore, GTIER);
+		gt->ngtier = 1;
 	} else if (IS_GEN(i915, 2)) {
-		error->ier = intel_uncore_read16(uncore, GEN2_IER);
+		gt->ier = intel_uncore_read16(uncore, GEN2_IER);
 	} else if (!IS_VALLEYVIEW(i915)) {
-		error->ier = intel_uncore_read(uncore, GEN2_IER);
+		gt->ier = intel_uncore_read(uncore, GEN2_IER);
 	}
-	error->eir = intel_uncore_read(uncore, EIR);
-	error->pgtbl_er = intel_uncore_read(uncore, PGTBL_ER);
+	gt->eir = intel_uncore_read(uncore, EIR);
+	gt->pgtbl_er = intel_uncore_read(uncore, PGTBL_ER);
 }
 
-static const char *
-error_msg(struct i915_gpu_state *error,
-	  intel_engine_mask_t engines, const char *msg)
+/*
+ * Generate a semi-unique error code. The code is not meant to have meaning, The
+ * code's only purpose is to try to prevent false duplicated bug reports by
+ * grossly estimating a GPU error state.
+ *
+ * TODO Ideally, hashing the batchbuffer would be a very nice way to determine
+ * the hang if we could strip the GTT offset information from it.
+ *
+ * It's only a small step better than a random number in its current form.
+ */
+static u32 generate_ecode(const struct intel_engine_coredump *ee)
 {
+	/*
+	 * IPEHR would be an ideal way to detect errors, as it's the gross
+	 * measure of "the command that hung." However, has some very common
+	 * synchronization commands which almost always appear in the case
+	 * strictly a client bug. Use instdone to differentiate those some.
+	 */
+	return ee ? ee->ipehr ^ ee->instdone.instdone : 0;
+}
+
+static const char *error_msg(struct i915_gpu_coredump *error)
+{
+	struct intel_engine_coredump *first = NULL;
+	struct intel_gt_coredump *gt;
+	intel_engine_mask_t engines;
 	int len;
 
+	engines = 0;
+	for (gt = error->gt; gt; gt = gt->next) {
+		struct intel_engine_coredump *cs;
+
+		if (gt->engine && !first)
+			first = gt->engine;
+
+		for (cs = gt->engine; cs; cs = cs->next)
+			engines |= cs->engine->mask;
+	}
+
 	len = scnprintf(error->error_msg, sizeof(error->error_msg),
-			"GPU HANG: ecode %d:%x:0x%08x",
+			"GPU HANG: ecode %d:%x:%08x",
 			INTEL_GEN(error->i915), engines,
-			i915_error_generate_code(error));
-	if (error->engine) {
+			generate_ecode(first));
+	if (first) {
 		/* Just show the first executing process, more is confusing */
 		len += scnprintf(error->error_msg + len,
 				 sizeof(error->error_msg) - len,
 				 ", in %s [%d]",
-				 error->engine->context.comm,
-				 error->engine->context.pid);
+				 first->context.comm, first->context.pid);
 	}
-	if (msg)
-		len += scnprintf(error->error_msg + len,
-				 sizeof(error->error_msg) - len,
-				 ", %s", msg);
 
 	return error->error_msg;
 }
 
-static void capture_gen_state(struct i915_gpu_state *error)
+static void capture_gen(struct i915_gpu_coredump *error)
 {
 	struct drm_i915_private *i915 = error->i915;
 
-	error->awake = i915->gt.awake;
 	error->wakelock = atomic_read(&i915->runtime_pm.wakeref_count);
 	error->suspended = i915->runtime_pm.suspended;
 
@@ -1711,6 +1706,7 @@ static void capture_gen_state(struct i915_gpu_state *error)
 	error->reset_count = i915_reset_count(&i915->gpu_error);
 	error->suspend_count = i915->suspend_count;
 
+	i915_params_copy(&error->params, &i915_modparams);
 	memcpy(&error->device_info,
 	       INTEL_INFO(i915),
 	       sizeof(error->device_info));
@@ -1720,115 +1716,138 @@ static void capture_gen_state(struct i915_gpu_state *error)
 	error->driver_caps = i915->caps;
 }
 
-static void capture_params(struct i915_gpu_state *error)
+struct i915_gpu_coredump *
+i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp)
 {
-	i915_params_copy(&error->params, &i915_modparams);
+	struct i915_gpu_coredump *error;
+
+	if (!i915_modparams.error_capture)
+		return NULL;
+
+	error = kzalloc(sizeof(*error), gfp);
+	if (!error)
+		return NULL;
+
+	kref_init(&error->ref);
+	error->i915 = i915;
+
+	error->time = ktime_get_real();
+	error->boottime = ktime_get_boottime();
+	error->uptime = ktime_sub(ktime_get(), i915->gt.last_init_time);
+	error->capture = jiffies;
+
+	capture_gen(error);
+
+	return error;
 }
 
-static void capture_finish(struct i915_gpu_state *error)
+#define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x))
+
+struct intel_gt_coredump *
+intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp)
 {
-	struct i915_ggtt *ggtt = &error->i915->ggtt;
+	struct intel_gt_coredump *gc;
 
-	if (drm_mm_node_allocated(&ggtt->error_capture)) {
-		const u64 slot = ggtt->error_capture.start;
+	gc = kzalloc(sizeof(*gc), gfp);
+	if (!gc)
+		return NULL;
+
+	gc->_gt = gt;
+	gc->awake = intel_gt_pm_is_awake(gt);
 
-		ggtt->vm.clear_range(&ggtt->vm, slot, PAGE_SIZE);
+	gt_record_regs(gc);
+	gt_record_fences(gc);
+
+	return gc;
+}
+
+struct i915_vma_compress *
+i915_vma_capture_prepare(struct intel_gt_coredump *gt)
+{
+	struct i915_vma_compress *compress;
+
+	compress = kmalloc(sizeof(*compress), ALLOW_FAIL);
+	if (!compress)
+		return NULL;
+
+	if (!compress_init(compress)) {
+		kfree(compress);
+		return NULL;
 	}
+
+	gt_capture_prepare(gt);
+
+	return compress;
 }
 
-#define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x))
+void i915_vma_capture_finish(struct intel_gt_coredump *gt,
+			     struct i915_vma_compress *compress)
+{
+	if (!compress)
+		return;
+
+	gt_capture_finish(gt);
 
-struct i915_gpu_state *
-i915_capture_gpu_state(struct drm_i915_private *i915)
+	compress_fini(compress);
+	kfree(compress);
+}
+
+struct i915_gpu_coredump *i915_gpu_coredump(struct drm_i915_private *i915)
 {
-	struct i915_gpu_state *error;
-	struct compress compress;
+	struct i915_gpu_coredump *error;
 
 	/* Check if GPU capture has been disabled */
 	error = READ_ONCE(i915->gpu_error.first_error);
 	if (IS_ERR(error))
 		return error;
 
-	error = kzalloc(sizeof(*error), ALLOW_FAIL);
-	if (!error) {
-		i915_disable_error_state(i915, -ENOMEM);
+	error = i915_gpu_coredump_alloc(i915, ALLOW_FAIL);
+	if (!error)
 		return ERR_PTR(-ENOMEM);
-	}
 
-	if (!compress_init(&compress)) {
-		kfree(error);
-		i915_disable_error_state(i915, -ENOMEM);
-		return ERR_PTR(-ENOMEM);
-	}
+	error->gt = intel_gt_coredump_alloc(&i915->gt, ALLOW_FAIL);
+	if (error->gt) {
+		struct i915_vma_compress *compress;
 
-	kref_init(&error->ref);
-	error->i915 = i915;
+		compress = i915_vma_capture_prepare(error->gt);
+		if (!compress) {
+			kfree(error->gt);
+			kfree(error);
+			return ERR_PTR(-ENOMEM);
+		}
 
-	error->time = ktime_get_real();
-	error->boottime = ktime_get_boottime();
-	error->uptime = ktime_sub(ktime_get(), i915->gt.last_init_time);
-	error->capture = jiffies;
+		gt_record_engines(error->gt, compress);
 
-	capture_params(error);
-	capture_gen_state(error);
-	capture_uc_state(error, &compress);
-	capture_reg_state(error);
-	gem_record_fences(error);
-	gem_record_rings(error, &compress);
+		if (INTEL_INFO(i915)->has_gt_uc)
+			error->gt->uc = gt_record_uc(error->gt, compress);
+
+		i915_vma_capture_finish(error->gt, compress);
+
+		error->simulated |= error->gt->simulated;
+	}
 
 	error->overlay = intel_overlay_capture_error_state(i915);
 	error->display = intel_display_capture_error_state(i915);
 
-	capture_finish(error);
-	compress_fini(&compress);
-
 	return error;
 }
 
-/**
- * i915_capture_error_state - capture an error record for later analysis
- * @i915: i915 device
- * @engine_mask: the mask of engines triggering the hang
- * @msg: a message to insert into the error capture header
- *
- * Should be called when an error is detected (either a hang or an error
- * interrupt) to capture error state from the time of the error.  Fills
- * out a structure which becomes available in debugfs for user level tools
- * to pick up.
- */
-void i915_capture_error_state(struct drm_i915_private *i915,
-			      intel_engine_mask_t engine_mask,
-			      const char *msg)
+void i915_error_state_store(struct i915_gpu_coredump *error)
 {
+	struct drm_i915_private *i915;
 	static bool warned;
-	struct i915_gpu_state *error;
-	unsigned long flags;
 
-	if (!i915_modparams.error_capture)
+	if (IS_ERR_OR_NULL(error))
 		return;
 
-	if (READ_ONCE(i915->gpu_error.first_error))
-		return;
+	i915 = error->i915;
+	dev_info(i915->drm.dev, "%s\n", error_msg(error));
 
-	error = i915_capture_gpu_state(i915);
-	if (IS_ERR(error))
+	if (error->simulated ||
+	    cmpxchg(&i915->gpu_error.first_error, NULL, error))
 		return;
 
-	dev_info(i915->drm.dev, "%s\n", error_msg(error, engine_mask, msg));
-
-	if (!error->simulated) {
-		spin_lock_irqsave(&i915->gpu_error.lock, flags);
-		if (!i915->gpu_error.first_error) {
-			i915->gpu_error.first_error = error;
-			error = NULL;
-		}
-		spin_unlock_irqrestore(&i915->gpu_error.lock, flags);
-	}
-
-	if (error) {
-		__i915_gpu_state_free(&error->ref);
-		return;
-	}
+	i915_gpu_coredump_get(error);
 
 	if (!xchg(&warned, true) &&
 	    ktime_get_real_seconds() - DRIVER_TIMESTAMP < DAY_AS_SECONDS(180)) {
@@ -1841,15 +1860,38 @@ void i915_capture_error_state(struct drm_i915_private *i915,
 	}
 }
 
-struct i915_gpu_state *
+/**
+ * i915_capture_error_state - capture an error record for later analysis
+ * @i915: i915 device
+ *
+ * Should be called when an error is detected (either a hang or an error
+ * interrupt) to capture error state from the time of the error.  Fills
+ * out a structure which becomes available in debugfs for user level tools
+ * to pick up.
+ */
+void i915_capture_error_state(struct drm_i915_private *i915)
+{
+	struct i915_gpu_coredump *error;
+
+	error = i915_gpu_coredump(i915);
+	if (IS_ERR(error)) {
+		cmpxchg(&i915->gpu_error.first_error, NULL, error);
+		return;
+	}
+
+	i915_error_state_store(error);
+	i915_gpu_coredump_put(error);
+}
+
+struct i915_gpu_coredump *
 i915_first_error_state(struct drm_i915_private *i915)
 {
-	struct i915_gpu_state *error;
+	struct i915_gpu_coredump *error;
 
 	spin_lock_irq(&i915->gpu_error.lock);
 	error = i915->gpu_error.first_error;
 	if (!IS_ERR_OR_NULL(error))
-		i915_gpu_state_get(error);
+		i915_gpu_coredump_get(error);
 	spin_unlock_irq(&i915->gpu_error.lock);
 
 	return error;
@@ -1857,7 +1899,7 @@ i915_first_error_state(struct drm_i915_private *i915)
 
 void i915_reset_error_state(struct drm_i915_private *i915)
 {
-	struct i915_gpu_state *error;
+	struct i915_gpu_coredump *error;
 
 	spin_lock_irq(&i915->gpu_error.lock);
 	error = i915->gpu_error.first_error;
@@ -1866,7 +1908,7 @@ void i915_reset_error_state(struct drm_i915_private *i915)
 	spin_unlock_irq(&i915->gpu_error.lock);
 
 	if (!IS_ERR_OR_NULL(error))
-		i915_gpu_state_put(error);
+		i915_gpu_coredump_put(error);
 }
 
 void i915_disable_error_state(struct drm_i915_private *i915, int err)
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 5d2c3372ff99..9109004956bd 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -25,43 +25,100 @@
 #include "i915_scheduler.h"
 
 struct drm_i915_private;
+struct i915_vma_compress;
+struct intel_engine_capture_vma;
 struct intel_overlay_error_state;
 struct intel_display_error_state;
 
-struct i915_gpu_state {
-	struct kref ref;
-	ktime_t time;
-	ktime_t boottime;
-	ktime_t uptime;
-	unsigned long capture;
+struct i915_vma_coredump {
+	struct i915_vma_coredump *next;
 
-	struct drm_i915_private *i915;
+	char name[20];
+
+	u64 gtt_offset;
+	u64 gtt_size;
+	u32 gtt_page_sizes;
+
+	int num_pages;
+	int page_count;
+	int unused;
+	u32 *pages[0];
+};
+
+struct i915_request_coredump {
+	unsigned long flags;
+	pid_t pid;
+	u32 context;
+	u32 seqno;
+	u32 start;
+	u32 head;
+	u32 tail;
+	struct i915_sched_attr sched_attr;
+};
+
+struct intel_engine_coredump {
+	const struct intel_engine_cs *engine;
 
-	char error_msg[128];
 	bool simulated;
-	bool awake;
-	bool wakelock;
-	bool suspended;
-	int iommu;
 	u32 reset_count;
-	u32 suspend_count;
-	struct intel_device_info device_info;
-	struct intel_runtime_info runtime_info;
-	struct intel_driver_caps driver_caps;
-	struct i915_params params;
 
-	struct i915_error_uc {
-		struct intel_uc_fw guc_fw;
-		struct intel_uc_fw huc_fw;
-		struct drm_i915_error_object *guc_log;
-	} uc;
+	/* position of active request inside the ring */
+	u32 rq_head, rq_post, rq_tail;
+
+	/* Register state */
+	u32 ccid;
+	u32 start;
+	u32 tail;
+	u32 head;
+	u32 ctl;
+	u32 mode;
+	u32 hws;
+	u32 ipeir;
+	u32 ipehr;
+	u32 bbstate;
+	u32 instpm;
+	u32 instps;
+	u64 bbaddr;
+	u64 acthd;
+	u32 fault_reg;
+	u64 faddr;
+	u32 rc_psmi; /* sleep state */
+	struct intel_instdone instdone;
+
+	struct i915_gem_context_coredump {
+		char comm[TASK_COMM_LEN];
+		pid_t pid;
+		int active;
+		int guilty;
+		struct i915_sched_attr sched_attr;
+	} context;
+
+	struct i915_vma_coredump *vma;
+
+	struct i915_request_coredump execlist[EXECLIST_MAX_PORTS];
+	unsigned int num_ports;
+
+	struct {
+		u32 gfx_mode;
+		union {
+			u64 pdp[4];
+			u32 pp_dir_base;
+		};
+	} vm_info;
+
+	struct intel_engine_coredump *next;
+};
+
+struct intel_gt_coredump {
+	const struct intel_gt *_gt;
+	bool awake;
+	bool simulated;
 
 	/* Generic register state */
 	u32 eir;
 	u32 pgtbl_er;
 	u32 ier;
 	u32 gtier[6], ngtier;
-	u32 ccid;
 	u32 derrmr;
 	u32 forcewake;
 	u32 error; /* gen6+ */
@@ -80,91 +137,45 @@ struct i915_gpu_state {
 
 	u32 nfence;
 	u64 fence[I915_MAX_NUM_FENCES];
+
+	struct intel_engine_coredump *engine;
+
+	struct intel_uc_coredump {
+		struct intel_uc_fw guc_fw;
+		struct intel_uc_fw huc_fw;
+		struct i915_vma_coredump *guc_log;
+	} *uc;
+
+	struct intel_gt_coredump *next;
+};
+
+struct i915_gpu_coredump {
+	struct kref ref;
+	ktime_t time;
+	ktime_t boottime;
+	ktime_t uptime;
+	unsigned long capture;
+
+	struct drm_i915_private *i915;
+
+	struct intel_gt_coredump *gt;
+
+	char error_msg[128];
+	bool simulated;
+	bool wakelock;
+	bool suspended;
+	int iommu;
+	u32 reset_count;
+	u32 suspend_count;
+
+	struct intel_device_info device_info;
+	struct intel_runtime_info runtime_info;
+	struct intel_driver_caps driver_caps;
+	struct i915_params params;
+
 	struct intel_overlay_error_state *overlay;
 	struct intel_display_error_state *display;
 
-	struct drm_i915_error_engine {
-		const struct intel_engine_cs *engine;
-
-		/* Software tracked state */
-		bool idle;
-		int num_requests;
-		u32 reset_count;
-
-		/* position of active request inside the ring */
-		u32 rq_head, rq_post, rq_tail;
-
-		/* our own tracking of ring head and tail */
-		u32 cpu_ring_head;
-		u32 cpu_ring_tail;
-
-		/* Register state */
-		u32 start;
-		u32 tail;
-		u32 head;
-		u32 ctl;
-		u32 mode;
-		u32 hws;
-		u32 ipeir;
-		u32 ipehr;
-		u32 bbstate;
-		u32 instpm;
-		u32 instps;
-		u64 bbaddr;
-		u64 acthd;
-		u32 fault_reg;
-		u64 faddr;
-		u32 rc_psmi; /* sleep state */
-		struct intel_instdone instdone;
-
-		struct drm_i915_error_context {
-			char comm[TASK_COMM_LEN];
-			pid_t pid;
-			int active;
-			int guilty;
-			struct i915_sched_attr sched_attr;
-		} context;
-
-		struct drm_i915_error_object {
-			u64 gtt_offset;
-			u64 gtt_size;
-			u32 gtt_page_sizes;
-			int num_pages;
-			int page_count;
-			int unused;
-			u32 *pages[0];
-		} *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
-
-		struct drm_i915_error_object **user_bo;
-		long user_bo_count;
-
-		struct drm_i915_error_object *wa_ctx;
-		struct drm_i915_error_object *default_state;
-
-		struct drm_i915_error_request {
-			unsigned long flags;
-			long jiffies;
-			pid_t pid;
-			u32 context;
-			u32 seqno;
-			u32 start;
-			u32 head;
-			u32 tail;
-			struct i915_sched_attr sched_attr;
-		} *requests, execlist[EXECLIST_MAX_PORTS];
-		unsigned int num_ports;
-
-		struct {
-			u32 gfx_mode;
-			union {
-				u64 pdp[4];
-				u32 pp_dir_base;
-			};
-		} vm_info;
-
-		struct drm_i915_error_engine *next;
-	} *engine;
-
 	struct scatterlist *sgl, *fit;
 };
 
@@ -172,7 +183,7 @@ struct i915_gpu_error {
 	/* For reset and error_state handling. */
 	spinlock_t lock;
 	/* Protected by the above dev->gpu_error.lock. */
-	struct i915_gpu_state *first_error;
+	struct i915_gpu_coredump *first_error;
 
 	atomic_t pending_fb_pin;
 
@@ -200,41 +211,115 @@ struct drm_i915_error_state_buf {
 __printf(2, 3)
 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
 
-struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915);
-void i915_capture_error_state(struct drm_i915_private *dev_priv,
-			      intel_engine_mask_t engine_mask,
-			      const char *error_msg);
+struct i915_gpu_coredump *i915_gpu_coredump(struct drm_i915_private *i915);
+void i915_capture_error_state(struct drm_i915_private *i915);
+
+struct i915_gpu_coredump *
+i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp);
+
+struct intel_gt_coredump *
+intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp);
+
+struct intel_engine_coredump *
+intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp);
+
+struct intel_engine_capture_vma *
+intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
+				  struct i915_request *rq,
+				  gfp_t gfp);
 
-static inline struct i915_gpu_state *
-i915_gpu_state_get(struct i915_gpu_state *gpu)
+void intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
+				   struct intel_engine_capture_vma *capture,
+				   struct i915_vma_compress *compress);
+
+struct i915_vma_compress *
+i915_vma_capture_prepare(struct intel_gt_coredump *gt);
+
+void i915_vma_capture_finish(struct intel_gt_coredump *gt,
+			     struct i915_vma_compress *compress);
+
+void i915_error_state_store(struct i915_gpu_coredump *error);
+
+static inline struct i915_gpu_coredump *
+i915_gpu_coredump_get(struct i915_gpu_coredump *gpu)
 {
 	kref_get(&gpu->ref);
 	return gpu;
 }
 
-ssize_t i915_gpu_state_copy_to_buffer(struct i915_gpu_state *error,
-				      char *buf, loff_t offset, size_t count);
+ssize_t
+i915_gpu_coredump_copy_to_buffer(struct i915_gpu_coredump *error,
+				 char *buf, loff_t offset, size_t count);
 
-void __i915_gpu_state_free(struct kref *kref);
-static inline void i915_gpu_state_put(struct i915_gpu_state *gpu)
+void __i915_gpu_coredump_free(struct kref *kref);
+static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
 {
 	if (gpu)
-		kref_put(&gpu->ref, __i915_gpu_state_free);
+		kref_put(&gpu->ref, __i915_gpu_coredump_free);
 }
 
-struct i915_gpu_state *i915_first_error_state(struct drm_i915_private *i915);
+struct i915_gpu_coredump *i915_first_error_state(struct drm_i915_private *i915);
 void i915_reset_error_state(struct drm_i915_private *i915);
 void i915_disable_error_state(struct drm_i915_private *i915, int err);
 
 #else
 
-static inline void i915_capture_error_state(struct drm_i915_private *dev_priv,
-					    u32 engine_mask,
-					    const char *error_msg)
+static inline void i915_capture_error_state(struct drm_i915_private *i915)
+{
+}
+
+static inline struct i915_gpu_coredump *
+i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp)
+{
+	return NULL;
+}
+
+static inline struct intel_gt_coredump *
+intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp)
+{
+	return NULL;
+}
+
+static inline struct intel_engine_coredump *
+intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
+{
+	return NULL;
+}
+
+static inline struct intel_engine_capture_vma *
+intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
+				  struct i915_request *rq,
+				  gfp_t gfp)
+{
+	return NULL;
+}
+
+static inline void
+intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
+			      struct intel_engine_capture_vma *capture,
+			      struct i915_vma_compress *compress)
+{
+}
+
+static inline struct i915_vma_compress *
+i915_vma_capture_prepare(struct intel_gt_coredump *gt)
+{
+	return NULL;
+}
+
+static inline void
+i915_vma_capture_finish(struct intel_gt_coredump *gt,
+			struct i915_vma_compress *compress)
+{
+}
+
+static inline void
+i915_error_state_store(struct drm_i915_private *i915,
+		       struct i915_gpu_coredump *error)
 {
 }
 
-static inline struct i915_gpu_state *
+static inline struct i915_gpu_coredump *
 i915_first_error_state(struct drm_i915_private *i915)
 {
 	return ERR_PTR(-ENODEV);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 42b79f577500..afc6aad9bf8c 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -893,7 +893,7 @@ int intel_get_crtc_scanline(struct intel_crtc *crtc)
 }
 
 /**
- * ivybridge_parity_work - Workqueue called when a parity error interrupt
+ * ivb_parity_work - Workqueue called when a parity error interrupt
  * occurred.
  * @work: workqueue struct
  *
@@ -901,7 +901,7 @@ int intel_get_crtc_scanline(struct intel_crtc *crtc)
  * this event, userspace should try to remap the bad rows since statistically
  * it is likely the same row is more likely to go bad again.
  */
-static void ivybridge_parity_work(struct work_struct *work)
+static void ivb_parity_work(struct work_struct *work)
 {
 	struct drm_i915_private *dev_priv =
 		container_of(work, typeof(*dev_priv), l3_parity.error_work);
@@ -2031,7 +2031,7 @@ static void ivb_display_irq_handler(struct drm_i915_private *dev_priv,
  * 4 - Process the interrupt(s) that had bits set in the IIRs.
  * 5 - Re-enable Master Interrupt Control.
  */
-static irqreturn_t ironlake_irq_handler(int irq, void *arg)
+static irqreturn_t ilk_irq_handler(int irq, void *arg)
 {
 	struct drm_i915_private *dev_priv = arg;
 	u32 de_iir, gt_iir, de_ier, sde_ier = 0;
@@ -2742,7 +2742,7 @@ static void vlv_display_irq_postinstall(struct drm_i915_private *dev_priv)
 
 /* drm_dma.h hooks
 */
-static void ironlake_irq_reset(struct drm_i915_private *dev_priv)
+static void ilk_irq_reset(struct drm_i915_private *dev_priv)
 {
 	struct intel_uncore *uncore = &dev_priv->uncore;
 
@@ -3225,7 +3225,7 @@ static void ibx_irq_postinstall(struct drm_i915_private *dev_priv)
 		spt_hpd_detection_setup(dev_priv);
 }
 
-static void ironlake_irq_postinstall(struct drm_i915_private *dev_priv)
+static void ilk_irq_postinstall(struct drm_i915_private *dev_priv)
 {
 	struct intel_uncore *uncore = &dev_priv->uncore;
 	u32 display_mask, extra_mask;
@@ -3899,7 +3899,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
 
 	intel_hpd_init_work(dev_priv);
 
-	INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);
+	INIT_WORK(&dev_priv->l3_parity.error_work, ivb_parity_work);
 	for (i = 0; i < MAX_L3_SLICES; ++i)
 		dev_priv->l3_parity.remap_info[i] = NULL;
 
@@ -3980,7 +3980,7 @@ static irq_handler_t intel_irq_handler(struct drm_i915_private *dev_priv)
 		else if (INTEL_GEN(dev_priv) >= 8)
 			return gen8_irq_handler;
 		else
-			return ironlake_irq_handler;
+			return ilk_irq_handler;
 	}
 }
 
@@ -4003,7 +4003,7 @@ static void intel_irq_reset(struct drm_i915_private *dev_priv)
 		else if (INTEL_GEN(dev_priv) >= 8)
 			gen8_irq_reset(dev_priv);
 		else
-			ironlake_irq_reset(dev_priv);
+			ilk_irq_reset(dev_priv);
 	}
 }
 
@@ -4026,7 +4026,7 @@ static void intel_irq_postinstall(struct drm_i915_private *dev_priv)
 		else if (INTEL_GEN(dev_priv) >= 8)
 			gen8_irq_postinstall(dev_priv);
 		else
-			ironlake_irq_postinstall(dev_priv);
+			ilk_irq_postinstall(dev_priv);
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/i915_mm.c b/drivers/gpu/drm/i915/i915_mm.c
index 318562ce64c0..b6376b25ef63 100644
--- a/drivers/gpu/drm/i915/i915_mm.c
+++ b/drivers/gpu/drm/i915/i915_mm.c
@@ -33,6 +33,9 @@ struct remap_pfn {
 	struct mm_struct *mm;
 	unsigned long pfn;
 	pgprot_t prot;
+
+	struct sgt_iter sgt;
+	resource_size_t iobase;
 };
 
 static int remap_pfn(pte_t *pte, unsigned long addr, void *data)
@@ -46,6 +49,35 @@ static int remap_pfn(pte_t *pte, unsigned long addr, void *data)
 	return 0;
 }
 
+#define use_dma(io) ((io) != -1)
+
+static inline unsigned long sgt_pfn(const struct remap_pfn *r)
+{
+	if (use_dma(r->iobase))
+		return (r->sgt.dma + r->sgt.curr + r->iobase) >> PAGE_SHIFT;
+	else
+		return r->sgt.pfn + (r->sgt.curr >> PAGE_SHIFT);
+}
+
+static int remap_sg(pte_t *pte, unsigned long addr, void *data)
+{
+	struct remap_pfn *r = data;
+
+	if (GEM_WARN_ON(!r->sgt.pfn))
+		return -EINVAL;
+
+	/* Special PTE are not associated with any struct page */
+	set_pte_at(r->mm, addr, pte,
+		   pte_mkspecial(pfn_pte(sgt_pfn(r), r->prot)));
+	r->pfn++; /* track insertions in case we need to unwind later */
+
+	r->sgt.curr += PAGE_SIZE;
+	if (r->sgt.curr >= r->sgt.max)
+		r->sgt = __sgt_iter(__sg_next(r->sgt.sgp), use_dma(r->iobase));
+
+	return 0;
+}
+
 /**
  * remap_io_mapping - remap an IO mapping to userspace
  * @vma: user vma to map to
@@ -80,3 +112,40 @@ int remap_io_mapping(struct vm_area_struct *vma,
 
 	return 0;
 }
+
+/**
+ * remap_io_sg - remap an IO mapping to userspace
+ * @vma: user vma to map to
+ * @addr: target user address to start at
+ * @size: size of map area
+ * @sgl: Start sg entry
+ * @iobase: Use stored dma address offset by this address or pfn if -1
+ *
+ *  Note: this is only safe if the mm semaphore is held when called.
+ */
+int remap_io_sg(struct vm_area_struct *vma,
+		unsigned long addr, unsigned long size,
+		struct scatterlist *sgl, resource_size_t iobase)
+{
+	struct remap_pfn r = {
+		.mm = vma->vm_mm,
+		.prot = vma->vm_page_prot,
+		.sgt = __sgt_iter(sgl, use_dma(iobase)),
+		.iobase = iobase,
+	};
+	int err;
+
+	/* We rely on prevalidation of the io-mapping to skip track_pfn(). */
+	GEM_BUG_ON((vma->vm_flags & EXPECTED_FLAGS) != EXPECTED_FLAGS);
+
+	if (!use_dma(iobase))
+		flush_cache_range(vma, addr, size);
+
+	err = apply_to_page_range(r.mm, addr, size, remap_sg, &r);
+	if (unlikely(err)) {
+		zap_vma_ptes(vma, addr, r.pfn << PAGE_SHIFT);
+		return err;
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 9571611b4b16..83f01401b8b5 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -193,23 +193,23 @@
 	GEN_DEFAULT_PAGE_SIZES, \
 	GEN_DEFAULT_REGIONS
 
-static const struct intel_device_info intel_i830_info = {
+static const struct intel_device_info i830_info = {
 	I830_FEATURES,
 	PLATFORM(INTEL_I830),
 };
 
-static const struct intel_device_info intel_i845g_info = {
+static const struct intel_device_info i845g_info = {
 	I845_FEATURES,
 	PLATFORM(INTEL_I845G),
 };
 
-static const struct intel_device_info intel_i85x_info = {
+static const struct intel_device_info i85x_info = {
 	I830_FEATURES,
 	PLATFORM(INTEL_I85X),
 	.display.has_fbc = 1,
 };
 
-static const struct intel_device_info intel_i865g_info = {
+static const struct intel_device_info i865g_info = {
 	I845_FEATURES,
 	PLATFORM(INTEL_I865G),
 };
@@ -228,7 +228,7 @@ static const struct intel_device_info intel_i865g_info = {
 	GEN_DEFAULT_PAGE_SIZES, \
 	GEN_DEFAULT_REGIONS
 
-static const struct intel_device_info intel_i915g_info = {
+static const struct intel_device_info i915g_info = {
 	GEN3_FEATURES,
 	PLATFORM(INTEL_I915G),
 	.has_coherent_ggtt = false,
@@ -239,7 +239,7 @@ static const struct intel_device_info intel_i915g_info = {
 	.unfenced_needs_alignment = 1,
 };
 
-static const struct intel_device_info intel_i915gm_info = {
+static const struct intel_device_info i915gm_info = {
 	GEN3_FEATURES,
 	PLATFORM(INTEL_I915GM),
 	.is_mobile = 1,
@@ -252,7 +252,7 @@ static const struct intel_device_info intel_i915gm_info = {
 	.unfenced_needs_alignment = 1,
 };
 
-static const struct intel_device_info intel_i945g_info = {
+static const struct intel_device_info i945g_info = {
 	GEN3_FEATURES,
 	PLATFORM(INTEL_I945G),
 	.display.has_hotplug = 1,
@@ -263,7 +263,7 @@ static const struct intel_device_info intel_i945g_info = {
 	.unfenced_needs_alignment = 1,
 };
 
-static const struct intel_device_info intel_i945gm_info = {
+static const struct intel_device_info i945gm_info = {
 	GEN3_FEATURES,
 	PLATFORM(INTEL_I945GM),
 	.is_mobile = 1,
@@ -277,21 +277,21 @@ static const struct intel_device_info intel_i945gm_info = {
 	.unfenced_needs_alignment = 1,
 };
 
-static const struct intel_device_info intel_g33_info = {
+static const struct intel_device_info g33_info = {
 	GEN3_FEATURES,
 	PLATFORM(INTEL_G33),
 	.display.has_hotplug = 1,
 	.display.has_overlay = 1,
 };
 
-static const struct intel_device_info intel_pineview_g_info = {
+static const struct intel_device_info pnv_g_info = {
 	GEN3_FEATURES,
 	PLATFORM(INTEL_PINEVIEW),
 	.display.has_hotplug = 1,
 	.display.has_overlay = 1,
 };
 
-static const struct intel_device_info intel_pineview_m_info = {
+static const struct intel_device_info pnv_m_info = {
 	GEN3_FEATURES,
 	PLATFORM(INTEL_PINEVIEW),
 	.is_mobile = 1,
@@ -314,7 +314,7 @@ static const struct intel_device_info intel_pineview_m_info = {
 	GEN_DEFAULT_PAGE_SIZES, \
 	GEN_DEFAULT_REGIONS
 
-static const struct intel_device_info intel_i965g_info = {
+static const struct intel_device_info i965g_info = {
 	GEN4_FEATURES,
 	PLATFORM(INTEL_I965G),
 	.display.has_overlay = 1,
@@ -322,7 +322,7 @@ static const struct intel_device_info intel_i965g_info = {
 	.has_snoop = false,
 };
 
-static const struct intel_device_info intel_i965gm_info = {
+static const struct intel_device_info i965gm_info = {
 	GEN4_FEATURES,
 	PLATFORM(INTEL_I965GM),
 	.is_mobile = 1,
@@ -333,14 +333,14 @@ static const struct intel_device_info intel_i965gm_info = {
 	.has_snoop = false,
 };
 
-static const struct intel_device_info intel_g45_info = {
+static const struct intel_device_info g45_info = {
 	GEN4_FEATURES,
 	PLATFORM(INTEL_G45),
 	.engine_mask = BIT(RCS0) | BIT(VCS0),
 	.gpu_reset_clobbers_display = false,
 };
 
-static const struct intel_device_info intel_gm45_info = {
+static const struct intel_device_info gm45_info = {
 	GEN4_FEATURES,
 	PLATFORM(INTEL_GM45),
 	.is_mobile = 1,
@@ -365,12 +365,12 @@ static const struct intel_device_info intel_gm45_info = {
 	GEN_DEFAULT_PAGE_SIZES, \
 	GEN_DEFAULT_REGIONS
 
-static const struct intel_device_info intel_ironlake_d_info = {
+static const struct intel_device_info ilk_d_info = {
 	GEN5_FEATURES,
 	PLATFORM(INTEL_IRONLAKE),
 };
 
-static const struct intel_device_info intel_ironlake_m_info = {
+static const struct intel_device_info ilk_m_info = {
 	GEN5_FEATURES,
 	PLATFORM(INTEL_IRONLAKE),
 	.is_mobile = 1,
@@ -400,12 +400,12 @@ static const struct intel_device_info intel_ironlake_m_info = {
 	GEN6_FEATURES, \
 	PLATFORM(INTEL_SANDYBRIDGE)
 
-static const struct intel_device_info intel_sandybridge_d_gt1_info = {
+static const struct intel_device_info snb_d_gt1_info = {
 	SNB_D_PLATFORM,
 	.gt = 1,
 };
 
-static const struct intel_device_info intel_sandybridge_d_gt2_info = {
+static const struct intel_device_info snb_d_gt2_info = {
 	SNB_D_PLATFORM,
 	.gt = 2,
 };
@@ -416,12 +416,12 @@ static const struct intel_device_info intel_sandybridge_d_gt2_info = {
 	.is_mobile = 1
 
 
-static const struct intel_device_info intel_sandybridge_m_gt1_info = {
+static const struct intel_device_info snb_m_gt1_info = {
 	SNB_M_PLATFORM,
 	.gt = 1,
 };
 
-static const struct intel_device_info intel_sandybridge_m_gt2_info = {
+static const struct intel_device_info snb_m_gt2_info = {
 	SNB_M_PLATFORM,
 	.gt = 2,
 };
@@ -450,12 +450,12 @@ static const struct intel_device_info intel_sandybridge_m_gt2_info = {
 	PLATFORM(INTEL_IVYBRIDGE), \
 	.has_l3_dpf = 1
 
-static const struct intel_device_info intel_ivybridge_d_gt1_info = {
+static const struct intel_device_info ivb_d_gt1_info = {
 	IVB_D_PLATFORM,
 	.gt = 1,
 };
 
-static const struct intel_device_info intel_ivybridge_d_gt2_info = {
+static const struct intel_device_info ivb_d_gt2_info = {
 	IVB_D_PLATFORM,
 	.gt = 2,
 };
@@ -466,17 +466,17 @@ static const struct intel_device_info intel_ivybridge_d_gt2_info = {
 	.is_mobile = 1, \
 	.has_l3_dpf = 1
 
-static const struct intel_device_info intel_ivybridge_m_gt1_info = {
+static const struct intel_device_info ivb_m_gt1_info = {
 	IVB_M_PLATFORM,
 	.gt = 1,
 };
 
-static const struct intel_device_info intel_ivybridge_m_gt2_info = {
+static const struct intel_device_info ivb_m_gt2_info = {
 	IVB_M_PLATFORM,
 	.gt = 2,
 };
 
-static const struct intel_device_info intel_ivybridge_q_info = {
+static const struct intel_device_info ivb_q_info = {
 	GEN7_FEATURES,
 	PLATFORM(INTEL_IVYBRIDGE),
 	.gt = 2,
@@ -484,7 +484,7 @@ static const struct intel_device_info intel_ivybridge_q_info = {
 	.has_l3_dpf = 1,
 };
 
-static const struct intel_device_info intel_valleyview_info = {
+static const struct intel_device_info vlv_info = {
 	PLATFORM(INTEL_VALLEYVIEW),
 	GEN(7),
 	.is_lp = 1,
@@ -523,17 +523,17 @@ static const struct intel_device_info intel_valleyview_info = {
 	PLATFORM(INTEL_HASWELL), \
 	.has_l3_dpf = 1
 
-static const struct intel_device_info intel_haswell_gt1_info = {
+static const struct intel_device_info hsw_gt1_info = {
 	HSW_PLATFORM,
 	.gt = 1,
 };
 
-static const struct intel_device_info intel_haswell_gt2_info = {
+static const struct intel_device_info hsw_gt2_info = {
 	HSW_PLATFORM,
 	.gt = 2,
 };
 
-static const struct intel_device_info intel_haswell_gt3_info = {
+static const struct intel_device_info hsw_gt3_info = {
 	HSW_PLATFORM,
 	.gt = 3,
 };
@@ -551,17 +551,17 @@ static const struct intel_device_info intel_haswell_gt3_info = {
 	GEN8_FEATURES, \
 	PLATFORM(INTEL_BROADWELL)
 
-static const struct intel_device_info intel_broadwell_gt1_info = {
+static const struct intel_device_info bdw_gt1_info = {
 	BDW_PLATFORM,
 	.gt = 1,
 };
 
-static const struct intel_device_info intel_broadwell_gt2_info = {
+static const struct intel_device_info bdw_gt2_info = {
 	BDW_PLATFORM,
 	.gt = 2,
 };
 
-static const struct intel_device_info intel_broadwell_rsvd_info = {
+static const struct intel_device_info bdw_rsvd_info = {
 	BDW_PLATFORM,
 	.gt = 3,
 	/* According to the device ID those devices are GT3, they were
@@ -569,14 +569,14 @@ static const struct intel_device_info intel_broadwell_rsvd_info = {
 	 */
 };
 
-static const struct intel_device_info intel_broadwell_gt3_info = {
+static const struct intel_device_info bdw_gt3_info = {
 	BDW_PLATFORM,
 	.gt = 3,
 	.engine_mask =
 		BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1),
 };
 
-static const struct intel_device_info intel_cherryview_info = {
+static const struct intel_device_info chv_info = {
 	PLATFORM(INTEL_CHERRYVIEW),
 	GEN(8),
 	.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C),
@@ -621,12 +621,12 @@ static const struct intel_device_info intel_cherryview_info = {
 	GEN9_FEATURES, \
 	PLATFORM(INTEL_SKYLAKE)
 
-static const struct intel_device_info intel_skylake_gt1_info = {
+static const struct intel_device_info skl_gt1_info = {
 	SKL_PLATFORM,
 	.gt = 1,
 };
 
-static const struct intel_device_info intel_skylake_gt2_info = {
+static const struct intel_device_info skl_gt2_info = {
 	SKL_PLATFORM,
 	.gt = 2,
 };
@@ -637,12 +637,12 @@ static const struct intel_device_info intel_skylake_gt2_info = {
 		BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1)
 
 
-static const struct intel_device_info intel_skylake_gt3_info = {
+static const struct intel_device_info skl_gt3_info = {
 	SKL_GT3_PLUS_PLATFORM,
 	.gt = 3,
 };
 
-static const struct intel_device_info intel_skylake_gt4_info = {
+static const struct intel_device_info skl_gt4_info = {
 	SKL_GT3_PLUS_PLATFORM,
 	.gt = 4,
 };
@@ -679,13 +679,13 @@ static const struct intel_device_info intel_skylake_gt4_info = {
 	GEN9_DEFAULT_PAGE_SIZES, \
 	GEN_DEFAULT_REGIONS
 
-static const struct intel_device_info intel_broxton_info = {
+static const struct intel_device_info bxt_info = {
 	GEN9_LP_FEATURES,
 	PLATFORM(INTEL_BROXTON),
 	.ddb_size = 512,
 };
 
-static const struct intel_device_info intel_geminilake_info = {
+static const struct intel_device_info glk_info = {
 	GEN9_LP_FEATURES,
 	PLATFORM(INTEL_GEMINILAKE),
 	.ddb_size = 1024,
@@ -696,17 +696,17 @@ static const struct intel_device_info intel_geminilake_info = {
 	GEN9_FEATURES, \
 	PLATFORM(INTEL_KABYLAKE)
 
-static const struct intel_device_info intel_kabylake_gt1_info = {
+static const struct intel_device_info kbl_gt1_info = {
 	KBL_PLATFORM,
 	.gt = 1,
 };
 
-static const struct intel_device_info intel_kabylake_gt2_info = {
+static const struct intel_device_info kbl_gt2_info = {
 	KBL_PLATFORM,
 	.gt = 2,
 };
 
-static const struct intel_device_info intel_kabylake_gt3_info = {
+static const struct intel_device_info kbl_gt3_info = {
 	KBL_PLATFORM,
 	.gt = 3,
 	.engine_mask =
@@ -717,17 +717,17 @@ static const struct intel_device_info intel_kabylake_gt3_info = {
 	GEN9_FEATURES, \
 	PLATFORM(INTEL_COFFEELAKE)
 
-static const struct intel_device_info intel_coffeelake_gt1_info = {
+static const struct intel_device_info cfl_gt1_info = {
 	CFL_PLATFORM,
 	.gt = 1,
 };
 
-static const struct intel_device_info intel_coffeelake_gt2_info = {
+static const struct intel_device_info cfl_gt2_info = {
 	CFL_PLATFORM,
 	.gt = 2,
 };
 
-static const struct intel_device_info intel_coffeelake_gt3_info = {
+static const struct intel_device_info cfl_gt3_info = {
 	CFL_PLATFORM,
 	.gt = 3,
 	.engine_mask =
@@ -742,7 +742,7 @@ static const struct intel_device_info intel_coffeelake_gt3_info = {
 	.has_coherent_ggtt = false, \
 	GLK_COLORS
 
-static const struct intel_device_info intel_cannonlake_info = {
+static const struct intel_device_info cnl_info = {
 	GEN10_FEATURES,
 	PLATFORM(INTEL_CANNONLAKE),
 	.gt = 2,
@@ -777,14 +777,14 @@ static const struct intel_device_info intel_cannonlake_info = {
 	.has_logical_ring_elsq = 1, \
 	.color = { .degamma_lut_size = 33, .gamma_lut_size = 262145 }
 
-static const struct intel_device_info intel_icelake_11_info = {
+static const struct intel_device_info icl_info = {
 	GEN11_FEATURES,
 	PLATFORM(INTEL_ICELAKE),
 	.engine_mask =
 		BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2),
 };
 
-static const struct intel_device_info intel_elkhartlake_info = {
+static const struct intel_device_info ehl_info = {
 	GEN11_FEATURES,
 	PLATFORM(INTEL_ELKHARTLAKE),
 	.require_force_probe = 1,
@@ -815,7 +815,7 @@ static const struct intel_device_info intel_elkhartlake_info = {
 	.has_global_mocs = 1, \
 	.display.has_dsb = 1
 
-static const struct intel_device_info intel_tigerlake_12_info = {
+static const struct intel_device_info tgl_info = {
 	GEN12_FEATURES,
 	PLATFORM(INTEL_TIGERLAKE),
 	.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D),
@@ -840,70 +840,70 @@ static const struct intel_device_info intel_tigerlake_12_info = {
  * PCI ID matches, otherwise we'll use the wrong info struct above.
  */
 static const struct pci_device_id pciidlist[] = {
-	INTEL_I830_IDS(&intel_i830_info),
-	INTEL_I845G_IDS(&intel_i845g_info),
-	INTEL_I85X_IDS(&intel_i85x_info),
-	INTEL_I865G_IDS(&intel_i865g_info),
-	INTEL_I915G_IDS(&intel_i915g_info),
-	INTEL_I915GM_IDS(&intel_i915gm_info),
-	INTEL_I945G_IDS(&intel_i945g_info),
-	INTEL_I945GM_IDS(&intel_i945gm_info),
-	INTEL_I965G_IDS(&intel_i965g_info),
-	INTEL_G33_IDS(&intel_g33_info),
-	INTEL_I965GM_IDS(&intel_i965gm_info),
-	INTEL_GM45_IDS(&intel_gm45_info),
-	INTEL_G45_IDS(&intel_g45_info),
-	INTEL_PINEVIEW_G_IDS(&intel_pineview_g_info),
-	INTEL_PINEVIEW_M_IDS(&intel_pineview_m_info),
-	INTEL_IRONLAKE_D_IDS(&intel_ironlake_d_info),
-	INTEL_IRONLAKE_M_IDS(&intel_ironlake_m_info),
-	INTEL_SNB_D_GT1_IDS(&intel_sandybridge_d_gt1_info),
-	INTEL_SNB_D_GT2_IDS(&intel_sandybridge_d_gt2_info),
-	INTEL_SNB_M_GT1_IDS(&intel_sandybridge_m_gt1_info),
-	INTEL_SNB_M_GT2_IDS(&intel_sandybridge_m_gt2_info),
-	INTEL_IVB_Q_IDS(&intel_ivybridge_q_info), /* must be first IVB */
-	INTEL_IVB_M_GT1_IDS(&intel_ivybridge_m_gt1_info),
-	INTEL_IVB_M_GT2_IDS(&intel_ivybridge_m_gt2_info),
-	INTEL_IVB_D_GT1_IDS(&intel_ivybridge_d_gt1_info),
-	INTEL_IVB_D_GT2_IDS(&intel_ivybridge_d_gt2_info),
-	INTEL_HSW_GT1_IDS(&intel_haswell_gt1_info),
-	INTEL_HSW_GT2_IDS(&intel_haswell_gt2_info),
-	INTEL_HSW_GT3_IDS(&intel_haswell_gt3_info),
-	INTEL_VLV_IDS(&intel_valleyview_info),
-	INTEL_BDW_GT1_IDS(&intel_broadwell_gt1_info),
-	INTEL_BDW_GT2_IDS(&intel_broadwell_gt2_info),
-	INTEL_BDW_GT3_IDS(&intel_broadwell_gt3_info),
-	INTEL_BDW_RSVD_IDS(&intel_broadwell_rsvd_info),
-	INTEL_CHV_IDS(&intel_cherryview_info),
-	INTEL_SKL_GT1_IDS(&intel_skylake_gt1_info),
-	INTEL_SKL_GT2_IDS(&intel_skylake_gt2_info),
-	INTEL_SKL_GT3_IDS(&intel_skylake_gt3_info),
-	INTEL_SKL_GT4_IDS(&intel_skylake_gt4_info),
-	INTEL_BXT_IDS(&intel_broxton_info),
-	INTEL_GLK_IDS(&intel_geminilake_info),
-	INTEL_KBL_GT1_IDS(&intel_kabylake_gt1_info),
-	INTEL_KBL_GT2_IDS(&intel_kabylake_gt2_info),
-	INTEL_KBL_GT3_IDS(&intel_kabylake_gt3_info),
-	INTEL_KBL_GT4_IDS(&intel_kabylake_gt3_info),
-	INTEL_AML_KBL_GT2_IDS(&intel_kabylake_gt2_info),
-	INTEL_CFL_S_GT1_IDS(&intel_coffeelake_gt1_info),
-	INTEL_CFL_S_GT2_IDS(&intel_coffeelake_gt2_info),
-	INTEL_CFL_H_GT1_IDS(&intel_coffeelake_gt1_info),
-	INTEL_CFL_H_GT2_IDS(&intel_coffeelake_gt2_info),
-	INTEL_CFL_U_GT2_IDS(&intel_coffeelake_gt2_info),
-	INTEL_CFL_U_GT3_IDS(&intel_coffeelake_gt3_info),
-	INTEL_WHL_U_GT1_IDS(&intel_coffeelake_gt1_info),
-	INTEL_WHL_U_GT2_IDS(&intel_coffeelake_gt2_info),
-	INTEL_AML_CFL_GT2_IDS(&intel_coffeelake_gt2_info),
-	INTEL_WHL_U_GT3_IDS(&intel_coffeelake_gt3_info),
-	INTEL_CML_GT1_IDS(&intel_coffeelake_gt1_info),
-	INTEL_CML_GT2_IDS(&intel_coffeelake_gt2_info),
-	INTEL_CML_U_GT1_IDS(&intel_coffeelake_gt1_info),
-	INTEL_CML_U_GT2_IDS(&intel_coffeelake_gt2_info),
-	INTEL_CNL_IDS(&intel_cannonlake_info),
-	INTEL_ICL_11_IDS(&intel_icelake_11_info),
-	INTEL_EHL_IDS(&intel_elkhartlake_info),
-	INTEL_TGL_12_IDS(&intel_tigerlake_12_info),
+	INTEL_I830_IDS(&i830_info),
+	INTEL_I845G_IDS(&i845g_info),
+	INTEL_I85X_IDS(&i85x_info),
+	INTEL_I865G_IDS(&i865g_info),
+	INTEL_I915G_IDS(&i915g_info),
+	INTEL_I915GM_IDS(&i915gm_info),
+	INTEL_I945G_IDS(&i945g_info),
+	INTEL_I945GM_IDS(&i945gm_info),
+	INTEL_I965G_IDS(&i965g_info),
+	INTEL_G33_IDS(&g33_info),
+	INTEL_I965GM_IDS(&i965gm_info),
+	INTEL_GM45_IDS(&gm45_info),
+	INTEL_G45_IDS(&g45_info),
+	INTEL_PINEVIEW_G_IDS(&pnv_g_info),
+	INTEL_PINEVIEW_M_IDS(&pnv_m_info),
+	INTEL_IRONLAKE_D_IDS(&ilk_d_info),
+	INTEL_IRONLAKE_M_IDS(&ilk_m_info),
+	INTEL_SNB_D_GT1_IDS(&snb_d_gt1_info),
+	INTEL_SNB_D_GT2_IDS(&snb_d_gt2_info),
+	INTEL_SNB_M_GT1_IDS(&snb_m_gt1_info),
+	INTEL_SNB_M_GT2_IDS(&snb_m_gt2_info),
+	INTEL_IVB_Q_IDS(&ivb_q_info), /* must be first IVB */
+	INTEL_IVB_M_GT1_IDS(&ivb_m_gt1_info),
+	INTEL_IVB_M_GT2_IDS(&ivb_m_gt2_info),
+	INTEL_IVB_D_GT1_IDS(&ivb_d_gt1_info),
+	INTEL_IVB_D_GT2_IDS(&ivb_d_gt2_info),
+	INTEL_HSW_GT1_IDS(&hsw_gt1_info),
+	INTEL_HSW_GT2_IDS(&hsw_gt2_info),
+	INTEL_HSW_GT3_IDS(&hsw_gt3_info),
+	INTEL_VLV_IDS(&vlv_info),
+	INTEL_BDW_GT1_IDS(&bdw_gt1_info),
+	INTEL_BDW_GT2_IDS(&bdw_gt2_info),
+	INTEL_BDW_GT3_IDS(&bdw_gt3_info),
+	INTEL_BDW_RSVD_IDS(&bdw_rsvd_info),
+	INTEL_CHV_IDS(&chv_info),
+	INTEL_SKL_GT1_IDS(&skl_gt1_info),
+	INTEL_SKL_GT2_IDS(&skl_gt2_info),
+	INTEL_SKL_GT3_IDS(&skl_gt3_info),
+	INTEL_SKL_GT4_IDS(&skl_gt4_info),
+	INTEL_BXT_IDS(&bxt_info),
+	INTEL_GLK_IDS(&glk_info),
+	INTEL_KBL_GT1_IDS(&kbl_gt1_info),
+	INTEL_KBL_GT2_IDS(&kbl_gt2_info),
+	INTEL_KBL_GT3_IDS(&kbl_gt3_info),
+	INTEL_KBL_GT4_IDS(&kbl_gt3_info),
+	INTEL_AML_KBL_GT2_IDS(&kbl_gt2_info),
+	INTEL_CFL_S_GT1_IDS(&cfl_gt1_info),
+	INTEL_CFL_S_GT2_IDS(&cfl_gt2_info),
+	INTEL_CFL_H_GT1_IDS(&cfl_gt1_info),
+	INTEL_CFL_H_GT2_IDS(&cfl_gt2_info),
+	INTEL_CFL_U_GT2_IDS(&cfl_gt2_info),
+	INTEL_CFL_U_GT3_IDS(&cfl_gt3_info),
+	INTEL_WHL_U_GT1_IDS(&cfl_gt1_info),
+	INTEL_WHL_U_GT2_IDS(&cfl_gt2_info),
+	INTEL_AML_CFL_GT2_IDS(&cfl_gt2_info),
+	INTEL_WHL_U_GT3_IDS(&cfl_gt3_info),
+	INTEL_CML_GT1_IDS(&cfl_gt1_info),
+	INTEL_CML_GT2_IDS(&cfl_gt2_info),
+	INTEL_CML_U_GT1_IDS(&cfl_gt1_info),
+	INTEL_CML_U_GT2_IDS(&cfl_gt2_info),
+	INTEL_CNL_IDS(&cnl_info),
+	INTEL_ICL_11_IDS(&icl_info),
+	INTEL_EHL_IDS(&ehl_info),
+	INTEL_TGL_12_IDS(&tgl_info),
 	{0, 0, 0}
 };
 MODULE_DEVICE_TABLE(pci, pciidlist);
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 84350c7bc711..0f556d80ba36 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -2159,8 +2159,6 @@ static int gen8_modify_context(struct intel_context *ce,
 	struct i915_request *rq;
 	int err;
 
-	lockdep_assert_held(&ce->pin_mutex);
-
 	rq = intel_engine_create_kernel_request(ce->engine);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
@@ -2203,17 +2201,14 @@ static int gen8_configure_context(struct i915_gem_context *ctx,
 		if (ce->engine->class != RENDER_CLASS)
 			continue;
 
-		err = intel_context_lock_pinned(ce);
-		if (err)
-			break;
+		/* Otherwise OA settings will be set upon first use */
+		if (!intel_context_pin_if_active(ce))
+			continue;
 
 		flex->value = intel_sseu_make_rpcs(ctx->i915, &ce->sseu);
+		err = gen8_modify_context(ce, flex, count);
 
-		/* Otherwise OA settings will be set upon first use */
-		if (intel_context_is_pinned(ce))
-			err = gen8_modify_context(ce, flex, count);
-
-		intel_context_unlock_pinned(ce);
+		intel_context_unpin(ce);
 		if (err)
 			break;
 	}
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index f3ef6700a5f2..28a82c849bac 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -1117,12 +1117,17 @@ void i915_pmu_register(struct drm_i915_private *i915)
 	hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	pmu->timer.function = i915_sample;
 
-	if (!is_igp(i915))
+	if (!is_igp(i915)) {
 		pmu->name = kasprintf(GFP_KERNEL,
-				      "i915-%s",
+				      "i915_%s",
 				      dev_name(i915->drm.dev));
-	else
+		if (pmu->name) {
+			/* tools/perf reserves colons as special. */
+			strreplace((char *)pmu->name, ':', '_');
+		}
+	} else {
 		pmu->name = "i915";
+	}
 	if (!pmu->name)
 		goto err;
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index bbfedeb00b7f..6cc55c103f67 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2244,26 +2244,6 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 				    MG_DP_MODE_LN1_ACU_PORT1)
 #define   MG_DP_MODE_CFG_DP_X2_MODE			(1 << 7)
 #define   MG_DP_MODE_CFG_DP_X1_MODE			(1 << 6)
-#define   MG_DP_MODE_CFG_TR2PWR_GATING			(1 << 5)
-#define   MG_DP_MODE_CFG_TRPWR_GATING			(1 << 4)
-#define   MG_DP_MODE_CFG_CLNPWR_GATING			(1 << 3)
-#define   MG_DP_MODE_CFG_DIGPWR_GATING			(1 << 2)
-#define   MG_DP_MODE_CFG_GAONPWR_GATING			(1 << 1)
-
-#define MG_MISC_SUS0_PORT1				0x168814
-#define MG_MISC_SUS0_PORT2				0x169814
-#define MG_MISC_SUS0_PORT3				0x16A814
-#define MG_MISC_SUS0_PORT4				0x16B814
-#define MG_MISC_SUS0(tc_port) \
-	_MMIO(_PORT(tc_port, MG_MISC_SUS0_PORT1, MG_MISC_SUS0_PORT2))
-#define   MG_MISC_SUS0_SUSCLK_DYNCLKGATE_MODE_MASK	(3 << 14)
-#define   MG_MISC_SUS0_SUSCLK_DYNCLKGATE_MODE(x)	((x) << 14)
-#define   MG_MISC_SUS0_CFG_TR2PWR_GATING		(1 << 12)
-#define   MG_MISC_SUS0_CFG_CL2PWR_GATING		(1 << 11)
-#define   MG_MISC_SUS0_CFG_GAONPWR_GATING		(1 << 10)
-#define   MG_MISC_SUS0_CFG_TRPWR_GATING			(1 << 7)
-#define   MG_MISC_SUS0_CFG_CL1PWR_GATING		(1 << 6)
-#define   MG_MISC_SUS0_CFG_DGPWR_GATING			(1 << 5)
 
 /* The spec defines this only for BXT PHY0, but lets assume that this
  * would exist for PHY1 too if it had a second channel.
@@ -4177,7 +4157,13 @@ enum {
 #define  CPSSUNIT_CLKGATE_DIS		REG_BIT(9)
 
 #define UNSLICE_UNIT_LEVEL_CLKGATE	_MMIO(0x9434)
-#define  VFUNIT_CLKGATE_DIS		(1 << 20)
+#define   VFUNIT_CLKGATE_DIS		REG_BIT(20)
+#define   HSUNIT_CLKGATE_DIS		REG_BIT(8)
+#define   VSUNIT_CLKGATE_DIS		REG_BIT(3)
+
+#define UNSLICE_UNIT_LEVEL_CLKGATE2	_MMIO(0x94e4)
+#define   VSUNIT_CLKGATE_DIS_TGL	REG_BIT(19)
+#define   PSDUNIT_CLKGATE_DIS		REG_BIT(5)
 
 #define INF_UNIT_LEVEL_CLKGATE		_MMIO(0x9560)
 #define   CGPSF_CLKGATE_DIS		(1 << 3)
@@ -6808,6 +6794,7 @@ enum {
 #define   PLANE_CTL_TILED_Y			(4 << 10)
 #define   PLANE_CTL_TILED_YF			(5 << 10)
 #define   PLANE_CTL_FLIP_HORIZONTAL		(1 << 8)
+#define   PLANE_CTL_MEDIA_DECOMPRESSION_ENABLE	(1 << 4) /* TGL+ */
 #define   PLANE_CTL_ALPHA_MASK			(0x3 << 4) /* Pre-GLK */
 #define   PLANE_CTL_ALPHA_DISABLE		(0 << 4)
 #define   PLANE_CTL_ALPHA_SW_PREMULTIPLY	(2 << 4)
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 44a0d1a950c5..be185886e4fc 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -658,7 +658,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
 	rq->engine = ce->engine;
 	rq->ring = ce->ring;
 	rq->execution_mask = ce->engine->mask;
-	rq->flags = 0;
 
 	RCU_INIT_POINTER(rq->timeline, tl);
 	RCU_INIT_POINTER(rq->hwsp_cacheline, tl->hwsp_cacheline);
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 565322640378..031433691a06 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -51,7 +51,7 @@ struct i915_capture_list {
 
 #define RQ_TRACE(rq, fmt, ...) do {					\
 	const struct i915_request *rq__ = (rq);				\
-	ENGINE_TRACE(rq__->engine, "fence %llx:%lld, current %d" fmt,	\
+	ENGINE_TRACE(rq__->engine, "fence %llx:%lld, current %d " fmt,	\
 		     rq__->fence.context, rq__->fence.seqno,		\
 		     hwsp_seqno(rq__), ##__VA_ARGS__);			\
 } while (0)
@@ -77,6 +77,38 @@ enum {
 	 * a request is on the various signal_list.
 	 */
 	I915_FENCE_FLAG_SIGNAL,
+
+	/*
+	 * I915_FENCE_FLAG_NOPREEMPT - this request should not be preempted
+	 *
+	 * The execution of some requests should not be interrupted. This is
+	 * a sensitive operation as it makes the request super important,
+	 * blocking other higher priority work. Abuse of this flag will
+	 * lead to quality of service issues.
+	 */
+	I915_FENCE_FLAG_NOPREEMPT,
+
+	/*
+	 * I915_FENCE_FLAG_SENTINEL - this request should be last in the queue
+	 *
+	 * A high priority sentinel request may be submitted to clear the
+	 * submission queue. As it will be the only request in-flight, upon
+	 * execution all other active requests will have been preempted and
+	 * unsubmitted. This preemptive pulse is used to re-evaluate the
+	 * in-flight requests, particularly in cases where an active context
+	 * is banned and those active requests need to be cancelled.
+	 */
+	I915_FENCE_FLAG_SENTINEL,
+
+	/*
+	 * I915_FENCE_FLAG_BOOST - upclock the gpu for this request
+	 *
+	 * Some requests are more important than others! In particular, a
+	 * request that the user is waiting on is typically required for
+	 * interactive latency, for which we want to minimise by upclocking
+	 * the GPU. Here we track such boost requests on a per-request basis.
+	 */
+	I915_FENCE_FLAG_BOOST,
 };
 
 /**
@@ -225,11 +257,6 @@ struct i915_request {
 	/** Time at which this request was emitted, in jiffies. */
 	unsigned long emitted_jiffies;
 
-	unsigned long flags;
-#define I915_REQUEST_WAITBOOST	BIT(0)
-#define I915_REQUEST_NOPREEMPT	BIT(1)
-#define I915_REQUEST_SENTINEL	BIT(2)
-
 	/** timeline->request entry for this request */
 	struct list_head link;
 
@@ -442,18 +469,18 @@ static inline void i915_request_mark_complete(struct i915_request *rq)
 
 static inline bool i915_request_has_waitboost(const struct i915_request *rq)
 {
-	return rq->flags & I915_REQUEST_WAITBOOST;
+	return test_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags);
 }
 
 static inline bool i915_request_has_nopreempt(const struct i915_request *rq)
 {
 	/* Preemption should only be disabled very rarely */
-	return unlikely(rq->flags & I915_REQUEST_NOPREEMPT);
+	return unlikely(test_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags));
 }
 
 static inline bool i915_request_has_sentinel(const struct i915_request *rq)
 {
-	return unlikely(rq->flags & I915_REQUEST_SENTINEL);
+	return unlikely(test_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags));
 }
 
 static inline struct intel_timeline *
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index ad2b1b833d7b..0cef3130db05 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -498,15 +498,15 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
 
 	struct device *kdev = kobj_to_dev(kobj);
 	struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
-	struct i915_gpu_state *gpu;
+	struct i915_gpu_coredump *gpu;
 	ssize_t ret;
 
 	gpu = i915_first_error_state(i915);
 	if (IS_ERR(gpu)) {
 		ret = PTR_ERR(gpu);
 	} else if (gpu) {
-		ret = i915_gpu_state_copy_to_buffer(gpu, buf, off, count);
-		i915_gpu_state_put(gpu);
+		ret = i915_gpu_coredump_copy_to_buffer(gpu, buf, off, count);
+		i915_gpu_coredump_put(gpu);
 	} else {
 		const char *str = "No error state collected\n";
 		size_t len = strlen(str);
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index cbd783c31adb..17d7c525ea5c 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -423,8 +423,6 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 	void __iomem *ptr;
 	int err;
 
-	/* Access through the GTT requires the device to be awake. */
-	assert_rpm_wakelock_held(vma->vm->gt->uncore->rpm);
 	if (GEM_WARN_ON(!i915_vma_is_map_and_fenceable(vma))) {
 		err = -ENODEV;
 		goto err;
@@ -456,6 +454,8 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 		goto err_unpin;
 
 	i915_vma_set_ggtt_write(vma);
+
+	/* NB Access through the GTT requires the device to be awake. */
 	return ptr;
 
 err_unpin:
@@ -858,6 +858,7 @@ static void vma_unbind_pages(struct i915_vma *vma)
 int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 {
 	struct i915_vma_work *work = NULL;
+	intel_wakeref_t wakeref = 0;
 	unsigned int bound;
 	int err;
 
@@ -883,6 +884,9 @@ int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 		}
 	}
 
+	if (flags & PIN_GLOBAL)
+		wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
+
 	/* No more allocations allowed once we hold vm->mutex */
 	err = mutex_lock_interruptible(&vma->vm->mutex);
 	if (err)
@@ -946,6 +950,8 @@ err_unlock:
 err_fence:
 	if (work)
 		dma_fence_work_commit(&work->base);
+	if (wakeref)
+		intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref);
 err_pages:
 	vma_put_pages(vma);
 	return err;
@@ -1246,11 +1252,16 @@ int __i915_vma_unbind(struct i915_vma *vma)
 int i915_vma_unbind(struct i915_vma *vma)
 {
 	struct i915_address_space *vm = vma->vm;
+	intel_wakeref_t wakeref = 0;
 	int err;
 
 	if (!drm_mm_node_allocated(&vma->node))
 		return 0;
 
+	if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
+		/* XXX not always required: nop_clear_range */
+		wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm);
+
 	err = mutex_lock_interruptible(&vm->mutex);
 	if (err)
 		return err;
@@ -1258,6 +1269,9 @@ int i915_vma_unbind(struct i915_vma *vma)
 	err = __i915_vma_unbind(vma);
 	mutex_unlock(&vm->mutex);
 
+	if (wakeref)
+		intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref);
+
 	return err;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 5fffa3c58908..02b31a62951e 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -30,148 +30,14 @@
 
 #include <drm/drm_mm.h>
 
+#include "gem/i915_gem_object.h"
+
 #include "i915_gem_gtt.h"
 #include "i915_gem_fence_reg.h"
-#include "gem/i915_gem_object.h"
 
 #include "i915_active.h"
 #include "i915_request.h"
-
-enum i915_cache_level;
-
-/**
- * DOC: Virtual Memory Address
- *
- * A VMA represents a GEM BO that is bound into an address space. Therefore, a
- * VMA's presence cannot be guaranteed before binding, or after unbinding the
- * object into/from the address space.
- *
- * To make things as simple as possible (ie. no refcounting), a VMA's lifetime
- * will always be <= an objects lifetime. So object refcounting should cover us.
- */
-struct i915_vma {
-	struct drm_mm_node node;
-
-	struct i915_address_space *vm;
-	const struct i915_vma_ops *ops;
-
-	struct drm_i915_gem_object *obj;
-	struct dma_resv *resv; /** Alias of obj->resv */
-
-	struct sg_table *pages;
-	void __iomem *iomap;
-	void *private; /* owned by creator */
-
-	struct i915_fence_reg *fence;
-
-	u64 size;
-	u64 display_alignment;
-	struct i915_page_sizes page_sizes;
-
-	/* mmap-offset associated with fencing for this vma */
-	struct i915_mmap_offset	*mmo;
-
-	u32 fence_size;
-	u32 fence_alignment;
-
-	/**
-	 * Count of the number of times this vma has been opened by different
-	 * handles (but same file) for execbuf, i.e. the number of aliases
-	 * that exist in the ctx->handle_vmas LUT for this vma.
-	 */
-	struct kref ref;
-	atomic_t open_count;
-	atomic_t flags;
-	/**
-	 * How many users have pinned this object in GTT space.
-	 *
-	 * This is a tightly bound, fairly small number of users, so we
-	 * stuff inside the flags field so that we can both check for overflow
-	 * and detect a no-op i915_vma_pin() in a single check, while also
-	 * pinning the vma.
-	 *
-	 * The worst case display setup would have the same vma pinned for
-	 * use on each plane on each crtc, while also building the next atomic
-	 * state and holding a pin for the length of the cleanup queue. In the
-	 * future, the flip queue may be increased from 1.
-	 * Estimated worst case: 3 [qlen] * 4 [max crtcs] * 7 [max planes] = 84
-	 *
-	 * For GEM, the number of concurrent users for pwrite/pread is
-	 * unbounded. For execbuffer, it is currently one but will in future
-	 * be extended to allow multiple clients to pin vma concurrently.
-	 *
-	 * We also use suballocated pages, with each suballocation claiming
-	 * its own pin on the shared vma. At present, this is limited to
-	 * exclusive cachelines of a single page, so a maximum of 64 possible
-	 * users.
-	 */
-#define I915_VMA_PIN_MASK 0x3ff
-#define I915_VMA_OVERFLOW 0x200
-
-	/** Flags and address space this VMA is bound to */
-#define I915_VMA_GLOBAL_BIND_BIT 10
-#define I915_VMA_LOCAL_BIND_BIT  11
-
-#define I915_VMA_GLOBAL_BIND	((int)BIT(I915_VMA_GLOBAL_BIND_BIT))
-#define I915_VMA_LOCAL_BIND	((int)BIT(I915_VMA_LOCAL_BIND_BIT))
-
-#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)
-
-#define I915_VMA_ALLOC_BIT	12
-#define I915_VMA_ALLOC		((int)BIT(I915_VMA_ALLOC_BIT))
-
-#define I915_VMA_ERROR_BIT	13
-#define I915_VMA_ERROR		((int)BIT(I915_VMA_ERROR_BIT))
-
-#define I915_VMA_GGTT_BIT	14
-#define I915_VMA_CAN_FENCE_BIT	15
-#define I915_VMA_USERFAULT_BIT	16
-#define I915_VMA_GGTT_WRITE_BIT	17
-
-#define I915_VMA_GGTT		((int)BIT(I915_VMA_GGTT_BIT))
-#define I915_VMA_CAN_FENCE	((int)BIT(I915_VMA_CAN_FENCE_BIT))
-#define I915_VMA_USERFAULT	((int)BIT(I915_VMA_USERFAULT_BIT))
-#define I915_VMA_GGTT_WRITE	((int)BIT(I915_VMA_GGTT_WRITE_BIT))
-
-	struct i915_active active;
-
-#define I915_VMA_PAGES_BIAS 24
-#define I915_VMA_PAGES_ACTIVE (BIT(24) | 1)
-	atomic_t pages_count; /* number of active binds to the pages */
-	struct mutex pages_mutex; /* protect acquire/release of backing pages */
-
-	/**
-	 * Support different GGTT views into the same object.
-	 * This means there can be multiple VMA mappings per object and per VM.
-	 * i915_ggtt_view_type is used to distinguish between those entries.
-	 * The default one of zero (I915_GGTT_VIEW_NORMAL) is default and also
-	 * assumed in GEM functions which take no ggtt view parameter.
-	 */
-	struct i915_ggtt_view ggtt_view;
-
-	/** This object's place on the active/inactive lists */
-	struct list_head vm_link;
-
-	struct list_head obj_link; /* Link in the object's VMA list */
-	struct rb_node obj_node;
-	struct hlist_node obj_hash;
-
-	/** This vma's place in the execbuf reservation list */
-	struct list_head exec_link;
-	struct list_head reloc_link;
-
-	/** This vma's place in the eviction list */
-	struct list_head evict_link;
-
-	struct list_head closed_link;
-
-	/**
-	 * Used for performing relocations during execbuffer insertion.
-	 */
-	unsigned int *exec_flags;
-	struct hlist_node exec_node;
-	u32 exec_handle;
-};
+#include "i915_vma_types.h"
 
 struct i915_vma *
 i915_vma_instance(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h
new file mode 100644
index 000000000000..e0942efd5236
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_vma_types.h
@@ -0,0 +1,294 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __I915_VMA_TYPES_H__
+#define __I915_VMA_TYPES_H__
+
+#include <linux/rbtree.h>
+
+#include <drm/drm_mm.h>
+
+#include "gem/i915_gem_object_types.h"
+
+enum i915_cache_level;
+
+/**
+ * DOC: Global GTT views
+ *
+ * Background and previous state
+ *
+ * Historically objects could exists (be bound) in global GTT space only as
+ * singular instances with a view representing all of the object's backing pages
+ * in a linear fashion. This view will be called a normal view.
+ *
+ * To support multiple views of the same object, where the number of mapped
+ * pages is not equal to the backing store, or where the layout of the pages
+ * is not linear, concept of a GGTT view was added.
+ *
+ * One example of an alternative view is a stereo display driven by a single
+ * image. In this case we would have a framebuffer looking like this
+ * (2x2 pages):
+ *
+ *    12
+ *    34
+ *
+ * Above would represent a normal GGTT view as normally mapped for GPU or CPU
+ * rendering. In contrast, fed to the display engine would be an alternative
+ * view which could look something like this:
+ *
+ *   1212
+ *   3434
+ *
+ * In this example both the size and layout of pages in the alternative view is
+ * different from the normal view.
+ *
+ * Implementation and usage
+ *
+ * GGTT views are implemented using VMAs and are distinguished via enum
+ * i915_ggtt_view_type and struct i915_ggtt_view.
+ *
+ * A new flavour of core GEM functions which work with GGTT bound objects were
+ * added with the _ggtt_ infix, and sometimes with _view postfix to avoid
+ * renaming  in large amounts of code. They take the struct i915_ggtt_view
+ * parameter encapsulating all metadata required to implement a view.
+ *
+ * As a helper for callers which are only interested in the normal view,
+ * globally const i915_ggtt_view_normal singleton instance exists. All old core
+ * GEM API functions, the ones not taking the view parameter, are operating on,
+ * or with the normal GGTT view.
+ *
+ * Code wanting to add or use a new GGTT view needs to:
+ *
+ * 1. Add a new enum with a suitable name.
+ * 2. Extend the metadata in the i915_ggtt_view structure if required.
+ * 3. Add support to i915_get_vma_pages().
+ *
+ * New views are required to build a scatter-gather table from within the
+ * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and
+ * exists for the lifetime of an VMA.
+ *
+ * Core API is designed to have copy semantics which means that passed in
+ * struct i915_ggtt_view does not need to be persistent (left around after
+ * calling the core API functions).
+ *
+ */
+
+struct intel_remapped_plane_info {
+	/* in gtt pages */
+	unsigned int width, height, stride, offset;
+} __packed;
+
+struct intel_remapped_info {
+	struct intel_remapped_plane_info plane[2];
+	unsigned int unused_mbz;
+} __packed;
+
+struct intel_rotation_info {
+	struct intel_remapped_plane_info plane[2];
+} __packed;
+
+struct intel_partial_info {
+	u64 offset;
+	unsigned int size;
+} __packed;
+
+enum i915_ggtt_view_type {
+	I915_GGTT_VIEW_NORMAL = 0,
+	I915_GGTT_VIEW_ROTATED = sizeof(struct intel_rotation_info),
+	I915_GGTT_VIEW_PARTIAL = sizeof(struct intel_partial_info),
+	I915_GGTT_VIEW_REMAPPED = sizeof(struct intel_remapped_info),
+};
+
+static inline void assert_i915_gem_gtt_types(void)
+{
+	BUILD_BUG_ON(sizeof(struct intel_rotation_info) != 8*sizeof(unsigned int));
+	BUILD_BUG_ON(sizeof(struct intel_partial_info) != sizeof(u64) + sizeof(unsigned int));
+	BUILD_BUG_ON(sizeof(struct intel_remapped_info) != 9*sizeof(unsigned int));
+
+	/* Check that rotation/remapped shares offsets for simplicity */
+	BUILD_BUG_ON(offsetof(struct intel_remapped_info, plane[0]) !=
+		     offsetof(struct intel_rotation_info, plane[0]));
+	BUILD_BUG_ON(offsetofend(struct intel_remapped_info, plane[1]) !=
+		     offsetofend(struct intel_rotation_info, plane[1]));
+
+	/* As we encode the size of each branch inside the union into its type,
+	 * we have to be careful that each branch has a unique size.
+	 */
+	switch ((enum i915_ggtt_view_type)0) {
+	case I915_GGTT_VIEW_NORMAL:
+	case I915_GGTT_VIEW_PARTIAL:
+	case I915_GGTT_VIEW_ROTATED:
+	case I915_GGTT_VIEW_REMAPPED:
+		/* gcc complains if these are identical cases */
+		break;
+	}
+}
+
+struct i915_ggtt_view {
+	enum i915_ggtt_view_type type;
+	union {
+		/* Members need to contain no holes/padding */
+		struct intel_partial_info partial;
+		struct intel_rotation_info rotated;
+		struct intel_remapped_info remapped;
+	};
+};
+
+/**
+ * DOC: Virtual Memory Address
+ *
+ * A VMA represents a GEM BO that is bound into an address space. Therefore, a
+ * VMA's presence cannot be guaranteed before binding, or after unbinding the
+ * object into/from the address space.
+ *
+ * To make things as simple as possible (ie. no refcounting), a VMA's lifetime
+ * will always be <= an objects lifetime. So object refcounting should cover us.
+ */
+struct i915_vma {
+	struct drm_mm_node node;
+
+	struct i915_address_space *vm;
+	const struct i915_vma_ops *ops;
+
+	struct drm_i915_gem_object *obj;
+	struct dma_resv *resv; /** Alias of obj->resv */
+
+	struct sg_table *pages;
+	void __iomem *iomap;
+	void *private; /* owned by creator */
+
+	struct i915_fence_reg *fence;
+
+	u64 size;
+	u64 display_alignment;
+	struct i915_page_sizes page_sizes;
+
+	/* mmap-offset associated with fencing for this vma */
+	struct i915_mmap_offset	*mmo;
+
+	u32 fence_size;
+	u32 fence_alignment;
+
+	/**
+	 * Count of the number of times this vma has been opened by different
+	 * handles (but same file) for execbuf, i.e. the number of aliases
+	 * that exist in the ctx->handle_vmas LUT for this vma.
+	 */
+	struct kref ref;
+	atomic_t open_count;
+	atomic_t flags;
+	/**
+	 * How many users have pinned this object in GTT space.
+	 *
+	 * This is a tightly bound, fairly small number of users, so we
+	 * stuff inside the flags field so that we can both check for overflow
+	 * and detect a no-op i915_vma_pin() in a single check, while also
+	 * pinning the vma.
+	 *
+	 * The worst case display setup would have the same vma pinned for
+	 * use on each plane on each crtc, while also building the next atomic
+	 * state and holding a pin for the length of the cleanup queue. In the
+	 * future, the flip queue may be increased from 1.
+	 * Estimated worst case: 3 [qlen] * 4 [max crtcs] * 7 [max planes] = 84
+	 *
+	 * For GEM, the number of concurrent users for pwrite/pread is
+	 * unbounded. For execbuffer, it is currently one but will in future
+	 * be extended to allow multiple clients to pin vma concurrently.
+	 *
+	 * We also use suballocated pages, with each suballocation claiming
+	 * its own pin on the shared vma. At present, this is limited to
+	 * exclusive cachelines of a single page, so a maximum of 64 possible
+	 * users.
+	 */
+#define I915_VMA_PIN_MASK 0x3ff
+#define I915_VMA_OVERFLOW 0x200
+
+	/** Flags and address space this VMA is bound to */
+#define I915_VMA_GLOBAL_BIND_BIT 10
+#define I915_VMA_LOCAL_BIND_BIT  11
+
+#define I915_VMA_GLOBAL_BIND	((int)BIT(I915_VMA_GLOBAL_BIND_BIT))
+#define I915_VMA_LOCAL_BIND	((int)BIT(I915_VMA_LOCAL_BIND_BIT))
+
+#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)
+
+#define I915_VMA_ALLOC_BIT	12
+#define I915_VMA_ALLOC		((int)BIT(I915_VMA_ALLOC_BIT))
+
+#define I915_VMA_ERROR_BIT	13
+#define I915_VMA_ERROR		((int)BIT(I915_VMA_ERROR_BIT))
+
+#define I915_VMA_GGTT_BIT	14
+#define I915_VMA_CAN_FENCE_BIT	15
+#define I915_VMA_USERFAULT_BIT	16
+#define I915_VMA_GGTT_WRITE_BIT	17
+
+#define I915_VMA_GGTT		((int)BIT(I915_VMA_GGTT_BIT))
+#define I915_VMA_CAN_FENCE	((int)BIT(I915_VMA_CAN_FENCE_BIT))
+#define I915_VMA_USERFAULT	((int)BIT(I915_VMA_USERFAULT_BIT))
+#define I915_VMA_GGTT_WRITE	((int)BIT(I915_VMA_GGTT_WRITE_BIT))
+
+	struct i915_active active;
+
+#define I915_VMA_PAGES_BIAS 24
+#define I915_VMA_PAGES_ACTIVE (BIT(24) | 1)
+	atomic_t pages_count; /* number of active binds to the pages */
+	struct mutex pages_mutex; /* protect acquire/release of backing pages */
+
+	/**
+	 * Support different GGTT views into the same object.
+	 * This means there can be multiple VMA mappings per object and per VM.
+	 * i915_ggtt_view_type is used to distinguish between those entries.
+	 * The default one of zero (I915_GGTT_VIEW_NORMAL) is default and also
+	 * assumed in GEM functions which take no ggtt view parameter.
+	 */
+	struct i915_ggtt_view ggtt_view;
+
+	/** This object's place on the active/inactive lists */
+	struct list_head vm_link;
+
+	struct list_head obj_link; /* Link in the object's VMA list */
+	struct rb_node obj_node;
+	struct hlist_node obj_hash;
+
+	/** This vma's place in the execbuf reservation list */
+	struct list_head exec_link;
+	struct list_head reloc_link;
+
+	/** This vma's place in the eviction list */
+	struct list_head evict_link;
+
+	struct list_head closed_link;
+
+	/**
+	 * Used for performing relocations during execbuffer insertion.
+	 */
+	unsigned int *exec_flags;
+	struct hlist_node exec_node;
+	u32 exec_handle;
+};
+
+#endif
+
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index 1acb5db77431..6670a0763be2 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -519,7 +519,7 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
 	}
 }
 
-static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
+static void bdw_sseu_info_init(struct drm_i915_private *dev_priv)
 {
 	struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
 	int s, ss;
@@ -600,7 +600,7 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
 	sseu->has_eu_pg = 0;
 }
 
-static void haswell_sseu_info_init(struct drm_i915_private *dev_priv)
+static void hsw_sseu_info_init(struct drm_i915_private *dev_priv)
 {
 	struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
 	u32 fuse1;
@@ -1021,11 +1021,11 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
 
 	/* Initialize slice/subslice/EU info */
 	if (IS_HASWELL(dev_priv))
-		haswell_sseu_info_init(dev_priv);
+		hsw_sseu_info_init(dev_priv);
 	else if (IS_CHERRYVIEW(dev_priv))
 		cherryview_sseu_info_init(dev_priv);
 	else if (IS_BROADWELL(dev_priv))
-		broadwell_sseu_info_init(dev_priv);
+		bdw_sseu_info_init(dev_priv);
 	else if (IS_GEN(dev_priv, 9))
 		gen9_sseu_info_init(dev_priv);
 	else if (IS_GEN(dev_priv, 10))
@@ -1093,7 +1093,7 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv)
 		 * hooked up to an SFC (Scaler & Format Converter) unit.
 		 * In TGL each VDBOX has access to an SFC.
 		 */
-		if (IS_TIGERLAKE(dev_priv) || logical_vdbox++ % 2 == 0)
+		if (INTEL_GEN(dev_priv) >= 12 || logical_vdbox++ % 2 == 0)
 			RUNTIME_INFO(dev_priv)->vdbox_sfc_access |= BIT(i);
 	}
 	DRM_DEBUG_DRIVER("vdbox enable: %04x, instances: %04lx\n",
diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c
index e24c280e5930..d0d038b3cd79 100644
--- a/drivers/gpu/drm/i915/intel_memory_region.c
+++ b/drivers/gpu/drm/i915/intel_memory_region.c
@@ -16,6 +16,20 @@ const u32 intel_region_map[] = {
 	[INTEL_REGION_STOLEN] = REGION_MAP(INTEL_MEMORY_STOLEN, 0),
 };
 
+struct intel_memory_region *
+intel_memory_region_by_type(struct drm_i915_private *i915,
+			    enum intel_memory_type mem_type)
+{
+	struct intel_memory_region *mr;
+	int id;
+
+	for_each_memory_region(mr, i915, id)
+		if (mr->type == mem_type)
+			return mr;
+
+	return NULL;
+}
+
 static u64
 intel_memory_region_free_pages(struct intel_memory_region *mem,
 			       struct list_head *blocks)
@@ -37,7 +51,7 @@ __intel_memory_region_put_pages_buddy(struct intel_memory_region *mem,
 				      struct list_head *blocks)
 {
 	mutex_lock(&mem->mm_lock);
-	intel_memory_region_free_pages(mem, blocks);
+	mem->avail += intel_memory_region_free_pages(mem, blocks);
 	mutex_unlock(&mem->mm_lock);
 }
 
@@ -106,6 +120,7 @@ __intel_memory_region_get_pages_buddy(struct intel_memory_region *mem,
 			break;
 	} while (1);
 
+	mem->avail -= size;
 	mutex_unlock(&mem->mm_lock);
 	return 0;
 
@@ -164,6 +179,8 @@ intel_memory_region_create(struct drm_i915_private *i915,
 	mem->io_start = io_start;
 	mem->min_page_size = min_page_size;
 	mem->ops = ops;
+	mem->total = size;
+	mem->avail = mem->total;
 
 	mutex_init(&mem->objects.lock);
 	INIT_LIST_HEAD(&mem->objects.list);
@@ -185,6 +202,16 @@ err_free:
 	return ERR_PTR(err);
 }
 
+void intel_memory_region_set_name(struct intel_memory_region *mem,
+				  const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	vsnprintf(mem->name, sizeof(mem->name), fmt, ap);
+	va_end(ap);
+}
+
 static void __intel_memory_region_destroy(struct kref *kref)
 {
 	struct intel_memory_region *mem =
diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h
index 238722009677..232490d89a83 100644
--- a/drivers/gpu/drm/i915/intel_memory_region.h
+++ b/drivers/gpu/drm/i915/intel_memory_region.h
@@ -47,6 +47,10 @@ enum intel_region_id {
 #define I915_ALLOC_MIN_PAGE_SIZE  BIT(0)
 #define I915_ALLOC_CONTIGUOUS     BIT(1)
 
+#define for_each_memory_region(mr, i915, id) \
+	for (id = 0; id < ARRAY_SIZE((i915)->mm.regions); id++) \
+		for_each_if((mr) = (i915)->mm.regions[id])
+
 /**
  * Memory regions encoded as type | instance
  */
@@ -82,10 +86,13 @@ struct intel_memory_region {
 
 	resource_size_t io_start;
 	resource_size_t min_page_size;
+	resource_size_t total;
+	resource_size_t avail;
 
 	unsigned int type;
 	unsigned int instance;
 	unsigned int id;
+	char name[8];
 
 	dma_addr_t remap_addr;
 
@@ -125,5 +132,12 @@ void intel_memory_region_put(struct intel_memory_region *mem);
 
 int intel_memory_regions_hw_probe(struct drm_i915_private *i915);
 void intel_memory_regions_driver_release(struct drm_i915_private *i915);
+struct intel_memory_region *
+intel_memory_region_by_type(struct drm_i915_private *i915,
+			    enum intel_memory_type mem_type);
+
+__printf(2, 3) void
+intel_memory_region_set_name(struct intel_memory_region *mem,
+			     const char *fmt, ...);
 
 #endif
diff --git a/drivers/gpu/drm/i915/intel_pch.c b/drivers/gpu/drm/i915/intel_pch.c
index 43b68b5fc562..4ed60e1f01db 100644
--- a/drivers/gpu/drm/i915/intel_pch.c
+++ b/drivers/gpu/drm/i915/intel_pch.c
@@ -12,90 +12,91 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id)
 {
 	switch (id) {
 	case INTEL_PCH_IBX_DEVICE_ID_TYPE:
-		DRM_DEBUG_KMS("Found Ibex Peak PCH\n");
+		drm_dbg_kms(&dev_priv->drm, "Found Ibex Peak PCH\n");
 		WARN_ON(!IS_GEN(dev_priv, 5));
 		return PCH_IBX;
 	case INTEL_PCH_CPT_DEVICE_ID_TYPE:
-		DRM_DEBUG_KMS("Found CougarPoint PCH\n");
+		drm_dbg_kms(&dev_priv->drm, "Found CougarPoint PCH\n");
 		WARN_ON(!IS_GEN(dev_priv, 6) && !IS_IVYBRIDGE(dev_priv));
 		return PCH_CPT;
 	case INTEL_PCH_PPT_DEVICE_ID_TYPE:
-		DRM_DEBUG_KMS("Found PantherPoint PCH\n");
+		drm_dbg_kms(&dev_priv->drm, "Found PantherPoint PCH\n");
 		WARN_ON(!IS_GEN(dev_priv, 6) && !IS_IVYBRIDGE(dev_priv));
 		/* PantherPoint is CPT compatible */
 		return PCH_CPT;
 	case INTEL_PCH_LPT_DEVICE_ID_TYPE:
-		DRM_DEBUG_KMS("Found LynxPoint PCH\n");
+		drm_dbg_kms(&dev_priv->drm, "Found LynxPoint PCH\n");
 		WARN_ON(!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv));
 		WARN_ON(IS_HSW_ULT(dev_priv) || IS_BDW_ULT(dev_priv));
 		return PCH_LPT;
 	case INTEL_PCH_LPT_LP_DEVICE_ID_TYPE:
-		DRM_DEBUG_KMS("Found LynxPoint LP PCH\n");
+		drm_dbg_kms(&dev_priv->drm, "Found LynxPoint LP PCH\n");
 		WARN_ON(!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv));
 		WARN_ON(!IS_HSW_ULT(dev_priv) && !IS_BDW_ULT(dev_priv));
 		return PCH_LPT;
 	case INTEL_PCH_WPT_DEVICE_ID_TYPE:
-		DRM_DEBUG_KMS("Found WildcatPoint PCH\n");
+		drm_dbg_kms(&dev_priv->drm, "Found WildcatPoint PCH\n");
 		WARN_ON(!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv));
 		WARN_ON(IS_HSW_ULT(dev_priv) || IS_BDW_ULT(dev_priv));
 		/* WildcatPoint is LPT compatible */
 		return PCH_LPT;
 	case INTEL_PCH_WPT_LP_DEVICE_ID_TYPE:
-		DRM_DEBUG_KMS("Found WildcatPoint LP PCH\n");
+		drm_dbg_kms(&dev_priv->drm, "Found WildcatPoint LP PCH\n");
 		WARN_ON(!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv));
 		WARN_ON(!IS_HSW_ULT(dev_priv) && !IS_BDW_ULT(dev_priv));
 		/* WildcatPoint is LPT compatible */
 		return PCH_LPT;
 	case INTEL_PCH_SPT_DEVICE_ID_TYPE:
-		DRM_DEBUG_KMS("Found SunrisePoint PCH\n");
+		drm_dbg_kms(&dev_priv->drm, "Found SunrisePoint PCH\n");
 		WARN_ON(!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv));
 		return PCH_SPT;
 	case INTEL_PCH_SPT_LP_DEVICE_ID_TYPE:
-		DRM_DEBUG_KMS("Found SunrisePoint LP PCH\n");
+		drm_dbg_kms(&dev_priv->drm, "Found SunrisePoint LP PCH\n");
 		WARN_ON(!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv) &&
 			!IS_COFFEELAKE(dev_priv));
 		return PCH_SPT;
 	case INTEL_PCH_KBP_DEVICE_ID_TYPE:
-		DRM_DEBUG_KMS("Found Kaby Lake PCH (KBP)\n");
+		drm_dbg_kms(&dev_priv->drm, "Found Kaby Lake PCH (KBP)\n");
 		WARN_ON(!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv) &&
 			!IS_COFFEELAKE(dev_priv));
 		/* KBP is SPT compatible */
 		return PCH_SPT;
 	case INTEL_PCH_CNP_DEVICE_ID_TYPE:
-		DRM_DEBUG_KMS("Found Cannon Lake PCH (CNP)\n");
+		drm_dbg_kms(&dev_priv->drm, "Found Cannon Lake PCH (CNP)\n");
 		WARN_ON(!IS_CANNONLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv));
 		return PCH_CNP;
 	case INTEL_PCH_CNP_LP_DEVICE_ID_TYPE:
-		DRM_DEBUG_KMS("Found Cannon Lake LP PCH (CNP-LP)\n");
+		drm_dbg_kms(&dev_priv->drm,
+			    "Found Cannon Lake LP PCH (CNP-LP)\n");
 		WARN_ON(!IS_CANNONLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv));
 		return PCH_CNP;
 	case INTEL_PCH_CMP_DEVICE_ID_TYPE:
 	case INTEL_PCH_CMP2_DEVICE_ID_TYPE:
-		DRM_DEBUG_KMS("Found Comet Lake PCH (CMP)\n");
+		drm_dbg_kms(&dev_priv->drm, "Found Comet Lake PCH (CMP)\n");
 		WARN_ON(!IS_COFFEELAKE(dev_priv));
 		/* CometPoint is CNP Compatible */
 		return PCH_CNP;
 	case INTEL_PCH_CMP_V_DEVICE_ID_TYPE:
-		DRM_DEBUG_KMS("Found Comet Lake V PCH (CMP-V)\n");
+		drm_dbg_kms(&dev_priv->drm, "Found Comet Lake V PCH (CMP-V)\n");
 		WARN_ON(!IS_COFFEELAKE(dev_priv));
 		/* Comet Lake V PCH is based on KBP, which is SPT compatible */
 		return PCH_SPT;
 	case INTEL_PCH_ICP_DEVICE_ID_TYPE:
-		DRM_DEBUG_KMS("Found Ice Lake PCH\n");
+		drm_dbg_kms(&dev_priv->drm, "Found Ice Lake PCH\n");
 		WARN_ON(!IS_ICELAKE(dev_priv));
 		return PCH_ICP;
 	case INTEL_PCH_MCC_DEVICE_ID_TYPE:
-		DRM_DEBUG_KMS("Found Mule Creek Canyon PCH\n");
+		drm_dbg_kms(&dev_priv->drm, "Found Mule Creek Canyon PCH\n");
 		WARN_ON(!IS_ELKHARTLAKE(dev_priv));
 		return PCH_MCC;
 	case INTEL_PCH_TGP_DEVICE_ID_TYPE:
 	case INTEL_PCH_TGP2_DEVICE_ID_TYPE:
-		DRM_DEBUG_KMS("Found Tiger Lake LP PCH\n");
+		drm_dbg_kms(&dev_priv->drm, "Found Tiger Lake LP PCH\n");
 		WARN_ON(!IS_TIGERLAKE(dev_priv));
 		return PCH_TGP;
 	case INTEL_PCH_JSP_DEVICE_ID_TYPE:
 	case INTEL_PCH_JSP2_DEVICE_ID_TYPE:
-		DRM_DEBUG_KMS("Found Jasper Lake PCH\n");
+		drm_dbg_kms(&dev_priv->drm, "Found Jasper Lake PCH\n");
 		WARN_ON(!IS_ELKHARTLAKE(dev_priv));
 		return PCH_JSP;
 	default:
@@ -145,9 +146,9 @@ intel_virt_detect_pch(const struct drm_i915_private *dev_priv)
 		id = INTEL_PCH_IBX_DEVICE_ID_TYPE;
 
 	if (id)
-		DRM_DEBUG_KMS("Assuming PCH ID %04x\n", id);
+		drm_dbg_kms(&dev_priv->drm, "Assuming PCH ID %04x\n", id);
 	else
-		DRM_DEBUG_KMS("Assuming no PCH\n");
+		drm_dbg_kms(&dev_priv->drm, "Assuming no PCH\n");
 
 	return id;
 }
@@ -201,13 +202,14 @@ void intel_detect_pch(struct drm_i915_private *dev_priv)
 	 * display.
 	 */
 	if (pch && !HAS_DISPLAY(dev_priv)) {
-		DRM_DEBUG_KMS("Display disabled, reverting to NOP PCH\n");
+		drm_dbg_kms(&dev_priv->drm,
+			    "Display disabled, reverting to NOP PCH\n");
 		dev_priv->pch_type = PCH_NOP;
 		dev_priv->pch_id = 0;
 	}
 
 	if (!pch)
-		DRM_DEBUG_KMS("No PCH found.\n");
+		drm_dbg_kms(&dev_priv->drm, "No PCH found.\n");
 
 	pci_dev_put(pch);
 }
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 31ec82337e4f..bd2d30ecc030 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -140,7 +140,7 @@ static void glk_init_clock_gating(struct drm_i915_private *dev_priv)
 
 }
 
-static void i915_pineview_get_mem_freq(struct drm_i915_private *dev_priv)
+static void pnv_get_mem_freq(struct drm_i915_private *dev_priv)
 {
 	u32 tmp;
 
@@ -178,7 +178,7 @@ static void i915_pineview_get_mem_freq(struct drm_i915_private *dev_priv)
 	dev_priv->is_ddr3 = (tmp & CSHRDDR3CTL_DDR3) ? 1 : 0;
 }
 
-static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv)
+static void ilk_get_mem_freq(struct drm_i915_private *dev_priv)
 {
 	u16 ddrpll, csipll;
 
@@ -199,8 +199,8 @@ static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv)
 		dev_priv->mem_freq = 1600;
 		break;
 	default:
-		DRM_DEBUG_DRIVER("unknown memory frequency 0x%02x\n",
-				 ddrpll & 0xff);
+		drm_dbg(&dev_priv->drm, "unknown memory frequency 0x%02x\n",
+			ddrpll & 0xff);
 		dev_priv->mem_freq = 0;
 		break;
 	}
@@ -228,8 +228,8 @@ static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv)
 		dev_priv->fsb_freq = 6400;
 		break;
 	default:
-		DRM_DEBUG_DRIVER("unknown fsb frequency 0x%04x\n",
-				 csipll & 0x3ff);
+		drm_dbg(&dev_priv->drm, "unknown fsb frequency 0x%04x\n",
+			csipll & 0x3ff);
 		dev_priv->fsb_freq = 0;
 		break;
 	}
@@ -314,7 +314,8 @@ static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable)
 
 	if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
 		      FORCE_DDR_FREQ_REQ_ACK) == 0, 3))
-		DRM_ERROR("timed out waiting for Punit DDR DVFS request\n");
+		drm_err(&dev_priv->drm,
+			"timed out waiting for Punit DDR DVFS request\n");
 
 	vlv_punit_put(dev_priv);
 }
@@ -383,9 +384,9 @@ static bool _intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enabl
 
 	trace_intel_memory_cxsr(dev_priv, was_enabled, enable);
 
-	DRM_DEBUG_KMS("memory self-refresh is %s (was %s)\n",
-		      enableddisabled(enable),
-		      enableddisabled(was_enabled));
+	drm_dbg_kms(&dev_priv->drm, "memory self-refresh is %s (was %s)\n",
+		    enableddisabled(enable),
+		    enableddisabled(was_enabled));
 
 	return was_enabled;
 }
@@ -510,8 +511,8 @@ static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv,
 	if (i9xx_plane == PLANE_B)
 		size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size;
 
-	DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
-		      dsparb, plane_name(i9xx_plane), size);
+	drm_dbg_kms(&dev_priv->drm, "FIFO size - (0x%08x) %c: %d\n",
+		    dsparb, plane_name(i9xx_plane), size);
 
 	return size;
 }
@@ -527,8 +528,8 @@ static int i830_get_fifo_size(struct drm_i915_private *dev_priv,
 		size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size;
 	size >>= 1; /* Convert to cachelines */
 
-	DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
-		      dsparb, plane_name(i9xx_plane), size);
+	drm_dbg_kms(&dev_priv->drm, "FIFO size - (0x%08x) %c: %d\n",
+		    dsparb, plane_name(i9xx_plane), size);
 
 	return size;
 }
@@ -542,41 +543,45 @@ static int i845_get_fifo_size(struct drm_i915_private *dev_priv,
 	size = dsparb & 0x7f;
 	size >>= 2; /* Convert to cachelines */
 
-	DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
-		      dsparb, plane_name(i9xx_plane), size);
+	drm_dbg_kms(&dev_priv->drm, "FIFO size - (0x%08x) %c: %d\n",
+		    dsparb, plane_name(i9xx_plane), size);
 
 	return size;
 }
 
 /* Pineview has different values for various configs */
-static const struct intel_watermark_params pineview_display_wm = {
+static const struct intel_watermark_params pnv_display_wm = {
 	.fifo_size = PINEVIEW_DISPLAY_FIFO,
 	.max_wm = PINEVIEW_MAX_WM,
 	.default_wm = PINEVIEW_DFT_WM,
 	.guard_size = PINEVIEW_GUARD_WM,
 	.cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
 };
-static const struct intel_watermark_params pineview_display_hplloff_wm = {
+
+static const struct intel_watermark_params pnv_display_hplloff_wm = {
 	.fifo_size = PINEVIEW_DISPLAY_FIFO,
 	.max_wm = PINEVIEW_MAX_WM,
 	.default_wm = PINEVIEW_DFT_HPLLOFF_WM,
 	.guard_size = PINEVIEW_GUARD_WM,
 	.cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
 };
-static const struct intel_watermark_params pineview_cursor_wm = {
+
+static const struct intel_watermark_params pnv_cursor_wm = {
 	.fifo_size = PINEVIEW_CURSOR_FIFO,
 	.max_wm = PINEVIEW_CURSOR_MAX_WM,
 	.default_wm = PINEVIEW_CURSOR_DFT_WM,
 	.guard_size = PINEVIEW_CURSOR_GUARD_WM,
 	.cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
 };
-static const struct intel_watermark_params pineview_cursor_hplloff_wm = {
+
+static const struct intel_watermark_params pnv_cursor_hplloff_wm = {
 	.fifo_size = PINEVIEW_CURSOR_FIFO,
 	.max_wm = PINEVIEW_CURSOR_MAX_WM,
 	.default_wm = PINEVIEW_CURSOR_DFT_WM,
 	.guard_size = PINEVIEW_CURSOR_GUARD_WM,
 	.cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
 };
+
 static const struct intel_watermark_params i965_cursor_wm_info = {
 	.fifo_size = I965_CURSOR_FIFO,
 	.max_wm = I965_CURSOR_MAX_WM,
@@ -584,6 +589,7 @@ static const struct intel_watermark_params i965_cursor_wm_info = {
 	.guard_size = 2,
 	.cacheline_size = I915_FIFO_LINE_SIZE,
 };
+
 static const struct intel_watermark_params i945_wm_info = {
 	.fifo_size = I945_FIFO_SIZE,
 	.max_wm = I915_MAX_WM,
@@ -591,6 +597,7 @@ static const struct intel_watermark_params i945_wm_info = {
 	.guard_size = 2,
 	.cacheline_size = I915_FIFO_LINE_SIZE,
 };
+
 static const struct intel_watermark_params i915_wm_info = {
 	.fifo_size = I915_FIFO_SIZE,
 	.max_wm = I915_MAX_WM,
@@ -598,6 +605,7 @@ static const struct intel_watermark_params i915_wm_info = {
 	.guard_size = 2,
 	.cacheline_size = I915_FIFO_LINE_SIZE,
 };
+
 static const struct intel_watermark_params i830_a_wm_info = {
 	.fifo_size = I855GM_FIFO_SIZE,
 	.max_wm = I915_MAX_WM,
@@ -605,6 +613,7 @@ static const struct intel_watermark_params i830_a_wm_info = {
 	.guard_size = 2,
 	.cacheline_size = I830_FIFO_LINE_SIZE,
 };
+
 static const struct intel_watermark_params i830_bc_wm_info = {
 	.fifo_size = I855GM_FIFO_SIZE,
 	.max_wm = I915_MAX_WM/2,
@@ -612,6 +621,7 @@ static const struct intel_watermark_params i830_bc_wm_info = {
 	.guard_size = 2,
 	.cacheline_size = I830_FIFO_LINE_SIZE,
 };
+
 static const struct intel_watermark_params i845_wm_info = {
 	.fifo_size = I830_FIFO_SIZE,
 	.max_wm = I915_MAX_WM,
@@ -848,7 +858,7 @@ static struct intel_crtc *single_enabled_crtc(struct drm_i915_private *dev_priv)
 	return enabled;
 }
 
-static void pineview_update_wm(struct intel_crtc *unused_crtc)
+static void pnv_update_wm(struct intel_crtc *unused_crtc)
 {
 	struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
 	struct intel_crtc *crtc;
@@ -861,7 +871,8 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc)
 					 dev_priv->fsb_freq,
 					 dev_priv->mem_freq);
 	if (!latency) {
-		DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
+		drm_dbg_kms(&dev_priv->drm,
+			    "Unknown FSB/MEM found, disable CxSR\n");
 		intel_set_memory_cxsr(dev_priv, false);
 		return;
 	}
@@ -876,18 +887,18 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc)
 		int clock = adjusted_mode->crtc_clock;
 
 		/* Display SR */
-		wm = intel_calculate_wm(clock, &pineview_display_wm,
-					pineview_display_wm.fifo_size,
+		wm = intel_calculate_wm(clock, &pnv_display_wm,
+					pnv_display_wm.fifo_size,
 					cpp, latency->display_sr);
 		reg = I915_READ(DSPFW1);
 		reg &= ~DSPFW_SR_MASK;
 		reg |= FW_WM(wm, SR);
 		I915_WRITE(DSPFW1, reg);
-		DRM_DEBUG_KMS("DSPFW1 register is %x\n", reg);
+		drm_dbg_kms(&dev_priv->drm, "DSPFW1 register is %x\n", reg);
 
 		/* cursor SR */
-		wm = intel_calculate_wm(clock, &pineview_cursor_wm,
-					pineview_display_wm.fifo_size,
+		wm = intel_calculate_wm(clock, &pnv_cursor_wm,
+					pnv_display_wm.fifo_size,
 					4, latency->cursor_sr);
 		reg = I915_READ(DSPFW3);
 		reg &= ~DSPFW_CURSOR_SR_MASK;
@@ -895,8 +906,8 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc)
 		I915_WRITE(DSPFW3, reg);
 
 		/* Display HPLL off SR */
-		wm = intel_calculate_wm(clock, &pineview_display_hplloff_wm,
-					pineview_display_hplloff_wm.fifo_size,
+		wm = intel_calculate_wm(clock, &pnv_display_hplloff_wm,
+					pnv_display_hplloff_wm.fifo_size,
 					cpp, latency->display_hpll_disable);
 		reg = I915_READ(DSPFW3);
 		reg &= ~DSPFW_HPLL_SR_MASK;
@@ -904,14 +915,14 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc)
 		I915_WRITE(DSPFW3, reg);
 
 		/* cursor HPLL off SR */
-		wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm,
-					pineview_display_hplloff_wm.fifo_size,
+		wm = intel_calculate_wm(clock, &pnv_cursor_hplloff_wm,
+					pnv_display_hplloff_wm.fifo_size,
 					4, latency->cursor_hpll_disable);
 		reg = I915_READ(DSPFW3);
 		reg &= ~DSPFW_HPLL_CURSOR_MASK;
 		reg |= FW_WM(wm, HPLL_CURSOR);
 		I915_WRITE(DSPFW3, reg);
-		DRM_DEBUG_KMS("DSPFW3 register is %x\n", reg);
+		drm_dbg_kms(&dev_priv->drm, "DSPFW3 register is %x\n", reg);
 
 		intel_set_memory_cxsr(dev_priv, true);
 	} else {
@@ -1202,6 +1213,7 @@ static bool g4x_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
 				     const struct intel_plane_state *plane_state)
 {
 	struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 	int num_levels = intel_wm_num_levels(to_i915(plane->base.dev));
 	enum plane_id plane_id = plane->id;
 	bool dirty = false;
@@ -1254,16 +1266,18 @@ static bool g4x_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
 
  out:
 	if (dirty) {
-		DRM_DEBUG_KMS("%s watermarks: normal=%d, SR=%d, HPLL=%d\n",
-			      plane->base.name,
-			      crtc_state->wm.g4x.raw[G4X_WM_LEVEL_NORMAL].plane[plane_id],
-			      crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].plane[plane_id],
-			      crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].plane[plane_id]);
+		drm_dbg_kms(&dev_priv->drm,
+			    "%s watermarks: normal=%d, SR=%d, HPLL=%d\n",
+			    plane->base.name,
+			    crtc_state->wm.g4x.raw[G4X_WM_LEVEL_NORMAL].plane[plane_id],
+			    crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].plane[plane_id],
+			    crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].plane[plane_id]);
 
 		if (plane_id == PLANE_PRIMARY)
-			DRM_DEBUG_KMS("FBC watermarks: SR=%d, HPLL=%d\n",
-				      crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].fbc,
-				      crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].fbc);
+			drm_dbg_kms(&dev_priv->drm,
+				    "FBC watermarks: SR=%d, HPLL=%d\n",
+				    crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].fbc,
+				    crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].fbc);
 	}
 
 	return dirty;
@@ -1781,6 +1795,7 @@ static bool vlv_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
 				     const struct intel_plane_state *plane_state)
 {
 	struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 	enum plane_id plane_id = plane->id;
 	int num_levels = intel_wm_num_levels(to_i915(plane->base.dev));
 	int level;
@@ -1808,11 +1823,12 @@ static bool vlv_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
 
 out:
 	if (dirty)
-		DRM_DEBUG_KMS("%s watermarks: PM2=%d, PM5=%d, DDR DVFS=%d\n",
-			      plane->base.name,
-			      crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2].plane[plane_id],
-			      crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM5].plane[plane_id],
-			      crtc_state->wm.vlv.raw[VLV_WM_LEVEL_DDR_DVFS].plane[plane_id]);
+		drm_dbg_kms(&dev_priv->drm,
+			    "%s watermarks: PM2=%d, PM5=%d, DDR DVFS=%d\n",
+			    plane->base.name,
+			    crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2].plane[plane_id],
+			    crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM5].plane[plane_id],
+			    crtc_state->wm.vlv.raw[VLV_WM_LEVEL_DDR_DVFS].plane[plane_id]);
 
 	return dirty;
 }
@@ -2227,8 +2243,9 @@ static void i965_update_wm(struct intel_crtc *unused_crtc)
 		if (srwm < 0)
 			srwm = 1;
 		srwm &= 0x1ff;
-		DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n",
-			      entries, srwm);
+		drm_dbg_kms(&dev_priv->drm,
+			    "self-refresh entries: %d, wm: %d\n",
+			    entries, srwm);
 
 		entries = intel_wm_method2(clock, htotal,
 					   crtc->base.cursor->state->crtc_w, 4,
@@ -2241,8 +2258,9 @@ static void i965_update_wm(struct intel_crtc *unused_crtc)
 		if (cursor_sr > i965_cursor_wm_info.max_wm)
 			cursor_sr = i965_cursor_wm_info.max_wm;
 
-		DRM_DEBUG_KMS("self-refresh watermark: display plane %d "
-			      "cursor %d\n", srwm, cursor_sr);
+		drm_dbg_kms(&dev_priv->drm,
+			    "self-refresh watermark: display plane %d "
+			    "cursor %d\n", srwm, cursor_sr);
 
 		cxsr_enabled = true;
 	} else {
@@ -2251,8 +2269,9 @@ static void i965_update_wm(struct intel_crtc *unused_crtc)
 		intel_set_memory_cxsr(dev_priv, false);
 	}
 
-	DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n",
-		      srwm);
+	drm_dbg_kms(&dev_priv->drm,
+		    "Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n",
+		    srwm);
 
 	/* 965 has limitations... */
 	I915_WRITE(DSPFW1, FW_WM(srwm, SR) |
@@ -2342,7 +2361,8 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc)
 			planeb_wm = wm_info->max_wm;
 	}
 
-	DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm);
+	drm_dbg_kms(&dev_priv->drm,
+		    "FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm);
 
 	if (IS_I915GM(dev_priv) && enabled) {
 		struct drm_i915_gem_object *obj;
@@ -2384,7 +2404,8 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc)
 		entries = intel_wm_method2(clock, htotal, hdisplay, cpp,
 					   sr_latency_ns / 100);
 		entries = DIV_ROUND_UP(entries, wm_info->cacheline_size);
-		DRM_DEBUG_KMS("self-refresh entries: %d\n", entries);
+		drm_dbg_kms(&dev_priv->drm,
+			    "self-refresh entries: %d\n", entries);
 		srwm = wm_info->fifo_size - entries;
 		if (srwm < 0)
 			srwm = 1;
@@ -2396,8 +2417,9 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc)
 			I915_WRITE(FW_BLC_SELF, srwm & 0x3f);
 	}
 
-	DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n",
-		      planea_wm, planeb_wm, cwm, srwm);
+	drm_dbg_kms(&dev_priv->drm,
+		    "Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n",
+		     planea_wm, planeb_wm, cwm, srwm);
 
 	fwater_lo = ((planeb_wm & 0x3f) << 16) | (planea_wm & 0x3f);
 	fwater_hi = (cwm & 0x1f);
@@ -2433,7 +2455,8 @@ static void i845_update_wm(struct intel_crtc *unused_crtc)
 	fwater_lo = I915_READ(FW_BLC) & ~0xfff;
 	fwater_lo |= (3<<8) | planea_wm;
 
-	DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d\n", planea_wm);
+	drm_dbg_kms(&dev_priv->drm,
+		    "Setting FIFO watermarks - A: %d\n", planea_wm);
 
 	I915_WRITE(FW_BLC, fwater_lo);
 }
@@ -2832,7 +2855,8 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
 					     &val, NULL);
 
 		if (ret) {
-			DRM_ERROR("SKL Mailbox read error = %d\n", ret);
+			drm_err(&dev_priv->drm,
+				"SKL Mailbox read error = %d\n", ret);
 			return;
 		}
 
@@ -2850,7 +2874,8 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
 					     GEN9_PCODE_READ_MEM_LATENCY,
 					     &val, NULL);
 		if (ret) {
-			DRM_ERROR("SKL Mailbox read error = %d\n", ret);
+			drm_err(&dev_priv->drm,
+				"SKL Mailbox read error = %d\n", ret);
 			return;
 		}
 
@@ -2968,8 +2993,9 @@ static void intel_print_wm_latency(struct drm_i915_private *dev_priv,
 		unsigned int latency = wm[level];
 
 		if (latency == 0) {
-			DRM_DEBUG_KMS("%s WM%d latency not provided\n",
-				      name, level);
+			drm_dbg_kms(&dev_priv->drm,
+				    "%s WM%d latency not provided\n",
+				    name, level);
 			continue;
 		}
 
@@ -2982,9 +3008,9 @@ static void intel_print_wm_latency(struct drm_i915_private *dev_priv,
 		else if (level > 0)
 			latency *= 5;
 
-		DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n",
-			      name, level, wm[level],
-			      latency / 10, latency % 10);
+		drm_dbg_kms(&dev_priv->drm,
+			    "%s WM%d latency %u (%u.%u usec)\n", name, level,
+			    wm[level], latency / 10, latency % 10);
 	}
 }
 
@@ -3018,7 +3044,8 @@ static void snb_wm_latency_quirk(struct drm_i915_private *dev_priv)
 	if (!changed)
 		return;
 
-	DRM_DEBUG_KMS("WM latency values increased to avoid potential underruns\n");
+	drm_dbg_kms(&dev_priv->drm,
+		    "WM latency values increased to avoid potential underruns\n");
 	intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
 	intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
 	intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
@@ -3046,7 +3073,8 @@ static void snb_wm_lp3_irq_quirk(struct drm_i915_private *dev_priv)
 	dev_priv->wm.spr_latency[3] = 0;
 	dev_priv->wm.cur_latency[3] = 0;
 
-	DRM_DEBUG_KMS("LP3 watermarks disabled due to potential for lost interrupts\n");
+	drm_dbg_kms(&dev_priv->drm,
+		    "LP3 watermarks disabled due to potential for lost interrupts\n");
 	intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
 	intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
 	intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
@@ -3096,7 +3124,7 @@ static bool ilk_validate_pipe_wm(const struct drm_i915_private *dev_priv,
 
 	/* At least LP0 must be valid */
 	if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0])) {
-		DRM_DEBUG_KMS("LP0 watermark invalid\n");
+		drm_dbg_kms(&dev_priv->drm, "LP0 watermark invalid\n");
 		return false;
 	}
 
@@ -3673,7 +3701,7 @@ skl_setup_sagv_block_time(struct drm_i915_private *dev_priv)
 			return;
 		}
 
-		DRM_DEBUG_DRIVER("Couldn't read SAGV block time!\n");
+		drm_dbg(&dev_priv->drm, "Couldn't read SAGV block time!\n");
 	} else if (IS_GEN(dev_priv, 11)) {
 		dev_priv->sagv_block_time_us = 10;
 		return;
@@ -3713,7 +3741,7 @@ intel_enable_sagv(struct drm_i915_private *dev_priv)
 	if (dev_priv->sagv_status == I915_SAGV_ENABLED)
 		return 0;
 
-	DRM_DEBUG_KMS("Enabling SAGV\n");
+	drm_dbg_kms(&dev_priv->drm, "Enabling SAGV\n");
 	ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL,
 				      GEN9_SAGV_ENABLE);
 
@@ -3724,11 +3752,11 @@ intel_enable_sagv(struct drm_i915_private *dev_priv)
 	 * don't actually have SAGV.
 	 */
 	if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {
-		DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
+		drm_dbg(&dev_priv->drm, "No SAGV found on system, ignoring\n");
 		dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
 		return 0;
 	} else if (ret < 0) {
-		DRM_ERROR("Failed to enable SAGV\n");
+		drm_err(&dev_priv->drm, "Failed to enable SAGV\n");
 		return ret;
 	}
 
@@ -3747,7 +3775,7 @@ intel_disable_sagv(struct drm_i915_private *dev_priv)
 	if (dev_priv->sagv_status == I915_SAGV_DISABLED)
 		return 0;
 
-	DRM_DEBUG_KMS("Disabling SAGV\n");
+	drm_dbg_kms(&dev_priv->drm, "Disabling SAGV\n");
 	/* bspec says to keep retrying for at least 1 ms */
 	ret = skl_pcode_request(dev_priv, GEN9_PCODE_SAGV_CONTROL,
 				GEN9_SAGV_DISABLE,
@@ -3758,11 +3786,11 @@ intel_disable_sagv(struct drm_i915_private *dev_priv)
 	 * don't actually have SAGV.
 	 */
 	if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {
-		DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
+		drm_dbg(&dev_priv->drm, "No SAGV found on system, ignoring\n");
 		dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
 		return 0;
 	} else if (ret < 0) {
-		DRM_ERROR("Failed to disable SAGV (%d)\n", ret);
+		drm_err(&dev_priv->drm, "Failed to disable SAGV (%d)\n", ret);
 		return ret;
 	}
 
@@ -4331,9 +4359,10 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *crtc_state,
 	}
 
 	if (level < 0) {
-		DRM_DEBUG_KMS("Requested display configuration exceeds system DDB limitations");
-		DRM_DEBUG_KMS("minimum required %d/%d\n", blocks,
-			      alloc_size);
+		drm_dbg_kms(&dev_priv->drm,
+			    "Requested display configuration exceeds system DDB limitations");
+		drm_dbg_kms(&dev_priv->drm, "minimum required %d/%d\n",
+			    blocks, alloc_size);
 		return -EINVAL;
 	}
 
@@ -4561,7 +4590,8 @@ skl_compute_wm_params(const struct intel_crtc_state *crtc_state,
 	/* only planar format has two planes */
 	if (color_plane == 1 &&
 	    !intel_format_info_is_yuv_semiplanar(format, modifier)) {
-		DRM_DEBUG_KMS("Non planar format have single plane\n");
+		drm_dbg_kms(&dev_priv->drm,
+			    "Non planar format have single plane\n");
 		return -EINVAL;
 	}
 
@@ -5260,10 +5290,11 @@ skl_print_wm_changes(struct intel_atomic_state *state)
 			if (skl_ddb_entry_equal(old, new))
 				continue;
 
-			DRM_DEBUG_KMS("[PLANE:%d:%s] ddb (%4d - %4d) -> (%4d - %4d), size %4d -> %4d\n",
-				      plane->base.base.id, plane->base.name,
-				      old->start, old->end, new->start, new->end,
-				      skl_ddb_entry_size(old), skl_ddb_entry_size(new));
+			drm_dbg_kms(&dev_priv->drm,
+				    "[PLANE:%d:%s] ddb (%4d - %4d) -> (%4d - %4d), size %4d -> %4d\n",
+				    plane->base.base.id, plane->base.name,
+				    old->start, old->end, new->start, new->end,
+				    skl_ddb_entry_size(old), skl_ddb_entry_size(new));
 		}
 
 		for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
@@ -5276,70 +5307,74 @@ skl_print_wm_changes(struct intel_atomic_state *state)
 			if (skl_plane_wm_equals(dev_priv, old_wm, new_wm))
 				continue;
 
-			DRM_DEBUG_KMS("[PLANE:%d:%s]   level %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm"
-				      " -> %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm\n",
-				      plane->base.base.id, plane->base.name,
-				      enast(old_wm->wm[0].plane_en), enast(old_wm->wm[1].plane_en),
-				      enast(old_wm->wm[2].plane_en), enast(old_wm->wm[3].plane_en),
-				      enast(old_wm->wm[4].plane_en), enast(old_wm->wm[5].plane_en),
-				      enast(old_wm->wm[6].plane_en), enast(old_wm->wm[7].plane_en),
-				      enast(old_wm->trans_wm.plane_en),
-				      enast(new_wm->wm[0].plane_en), enast(new_wm->wm[1].plane_en),
-				      enast(new_wm->wm[2].plane_en), enast(new_wm->wm[3].plane_en),
-				      enast(new_wm->wm[4].plane_en), enast(new_wm->wm[5].plane_en),
-				      enast(new_wm->wm[6].plane_en), enast(new_wm->wm[7].plane_en),
-				      enast(new_wm->trans_wm.plane_en));
-
-			DRM_DEBUG_KMS("[PLANE:%d:%s]   lines %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d"
+			drm_dbg_kms(&dev_priv->drm,
+				    "[PLANE:%d:%s]   level %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm"
+				    " -> %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm\n",
+				    plane->base.base.id, plane->base.name,
+				    enast(old_wm->wm[0].plane_en), enast(old_wm->wm[1].plane_en),
+				    enast(old_wm->wm[2].plane_en), enast(old_wm->wm[3].plane_en),
+				    enast(old_wm->wm[4].plane_en), enast(old_wm->wm[5].plane_en),
+				    enast(old_wm->wm[6].plane_en), enast(old_wm->wm[7].plane_en),
+				    enast(old_wm->trans_wm.plane_en),
+				    enast(new_wm->wm[0].plane_en), enast(new_wm->wm[1].plane_en),
+				    enast(new_wm->wm[2].plane_en), enast(new_wm->wm[3].plane_en),
+				    enast(new_wm->wm[4].plane_en), enast(new_wm->wm[5].plane_en),
+				    enast(new_wm->wm[6].plane_en), enast(new_wm->wm[7].plane_en),
+				    enast(new_wm->trans_wm.plane_en));
+
+			drm_dbg_kms(&dev_priv->drm,
+				    "[PLANE:%d:%s]   lines %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d"
 				      " -> %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d\n",
-				      plane->base.base.id, plane->base.name,
-				      enast(old_wm->wm[0].ignore_lines), old_wm->wm[0].plane_res_l,
-				      enast(old_wm->wm[1].ignore_lines), old_wm->wm[1].plane_res_l,
-				      enast(old_wm->wm[2].ignore_lines), old_wm->wm[2].plane_res_l,
-				      enast(old_wm->wm[3].ignore_lines), old_wm->wm[3].plane_res_l,
-				      enast(old_wm->wm[4].ignore_lines), old_wm->wm[4].plane_res_l,
-				      enast(old_wm->wm[5].ignore_lines), old_wm->wm[5].plane_res_l,
-				      enast(old_wm->wm[6].ignore_lines), old_wm->wm[6].plane_res_l,
-				      enast(old_wm->wm[7].ignore_lines), old_wm->wm[7].plane_res_l,
-				      enast(old_wm->trans_wm.ignore_lines), old_wm->trans_wm.plane_res_l,
-
-				      enast(new_wm->wm[0].ignore_lines), new_wm->wm[0].plane_res_l,
-				      enast(new_wm->wm[1].ignore_lines), new_wm->wm[1].plane_res_l,
-				      enast(new_wm->wm[2].ignore_lines), new_wm->wm[2].plane_res_l,
-				      enast(new_wm->wm[3].ignore_lines), new_wm->wm[3].plane_res_l,
-				      enast(new_wm->wm[4].ignore_lines), new_wm->wm[4].plane_res_l,
-				      enast(new_wm->wm[5].ignore_lines), new_wm->wm[5].plane_res_l,
-				      enast(new_wm->wm[6].ignore_lines), new_wm->wm[6].plane_res_l,
-				      enast(new_wm->wm[7].ignore_lines), new_wm->wm[7].plane_res_l,
-				      enast(new_wm->trans_wm.ignore_lines), new_wm->trans_wm.plane_res_l);
-
-			DRM_DEBUG_KMS("[PLANE:%d:%s]  blocks %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d"
-				      " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n",
-				      plane->base.base.id, plane->base.name,
-				      old_wm->wm[0].plane_res_b, old_wm->wm[1].plane_res_b,
-				      old_wm->wm[2].plane_res_b, old_wm->wm[3].plane_res_b,
-				      old_wm->wm[4].plane_res_b, old_wm->wm[5].plane_res_b,
-				      old_wm->wm[6].plane_res_b, old_wm->wm[7].plane_res_b,
-				      old_wm->trans_wm.plane_res_b,
-				      new_wm->wm[0].plane_res_b, new_wm->wm[1].plane_res_b,
-				      new_wm->wm[2].plane_res_b, new_wm->wm[3].plane_res_b,
-				      new_wm->wm[4].plane_res_b, new_wm->wm[5].plane_res_b,
-				      new_wm->wm[6].plane_res_b, new_wm->wm[7].plane_res_b,
-				      new_wm->trans_wm.plane_res_b);
-
-			DRM_DEBUG_KMS("[PLANE:%d:%s] min_ddb %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d"
-				      " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n",
-				      plane->base.base.id, plane->base.name,
-				      old_wm->wm[0].min_ddb_alloc, old_wm->wm[1].min_ddb_alloc,
-				      old_wm->wm[2].min_ddb_alloc, old_wm->wm[3].min_ddb_alloc,
-				      old_wm->wm[4].min_ddb_alloc, old_wm->wm[5].min_ddb_alloc,
-				      old_wm->wm[6].min_ddb_alloc, old_wm->wm[7].min_ddb_alloc,
-				      old_wm->trans_wm.min_ddb_alloc,
-				      new_wm->wm[0].min_ddb_alloc, new_wm->wm[1].min_ddb_alloc,
-				      new_wm->wm[2].min_ddb_alloc, new_wm->wm[3].min_ddb_alloc,
-				      new_wm->wm[4].min_ddb_alloc, new_wm->wm[5].min_ddb_alloc,
-				      new_wm->wm[6].min_ddb_alloc, new_wm->wm[7].min_ddb_alloc,
-				      new_wm->trans_wm.min_ddb_alloc);
+				    plane->base.base.id, plane->base.name,
+				    enast(old_wm->wm[0].ignore_lines), old_wm->wm[0].plane_res_l,
+				    enast(old_wm->wm[1].ignore_lines), old_wm->wm[1].plane_res_l,
+				    enast(old_wm->wm[2].ignore_lines), old_wm->wm[2].plane_res_l,
+				    enast(old_wm->wm[3].ignore_lines), old_wm->wm[3].plane_res_l,
+				    enast(old_wm->wm[4].ignore_lines), old_wm->wm[4].plane_res_l,
+				    enast(old_wm->wm[5].ignore_lines), old_wm->wm[5].plane_res_l,
+				    enast(old_wm->wm[6].ignore_lines), old_wm->wm[6].plane_res_l,
+				    enast(old_wm->wm[7].ignore_lines), old_wm->wm[7].plane_res_l,
+				    enast(old_wm->trans_wm.ignore_lines), old_wm->trans_wm.plane_res_l,
+
+				    enast(new_wm->wm[0].ignore_lines), new_wm->wm[0].plane_res_l,
+				    enast(new_wm->wm[1].ignore_lines), new_wm->wm[1].plane_res_l,
+				    enast(new_wm->wm[2].ignore_lines), new_wm->wm[2].plane_res_l,
+				    enast(new_wm->wm[3].ignore_lines), new_wm->wm[3].plane_res_l,
+				    enast(new_wm->wm[4].ignore_lines), new_wm->wm[4].plane_res_l,
+				    enast(new_wm->wm[5].ignore_lines), new_wm->wm[5].plane_res_l,
+				    enast(new_wm->wm[6].ignore_lines), new_wm->wm[6].plane_res_l,
+				    enast(new_wm->wm[7].ignore_lines), new_wm->wm[7].plane_res_l,
+				    enast(new_wm->trans_wm.ignore_lines), new_wm->trans_wm.plane_res_l);
+
+			drm_dbg_kms(&dev_priv->drm,
+				    "[PLANE:%d:%s]  blocks %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d"
+				    " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n",
+				    plane->base.base.id, plane->base.name,
+				    old_wm->wm[0].plane_res_b, old_wm->wm[1].plane_res_b,
+				    old_wm->wm[2].plane_res_b, old_wm->wm[3].plane_res_b,
+				    old_wm->wm[4].plane_res_b, old_wm->wm[5].plane_res_b,
+				    old_wm->wm[6].plane_res_b, old_wm->wm[7].plane_res_b,
+				    old_wm->trans_wm.plane_res_b,
+				    new_wm->wm[0].plane_res_b, new_wm->wm[1].plane_res_b,
+				    new_wm->wm[2].plane_res_b, new_wm->wm[3].plane_res_b,
+				    new_wm->wm[4].plane_res_b, new_wm->wm[5].plane_res_b,
+				    new_wm->wm[6].plane_res_b, new_wm->wm[7].plane_res_b,
+				    new_wm->trans_wm.plane_res_b);
+
+			drm_dbg_kms(&dev_priv->drm,
+				    "[PLANE:%d:%s] min_ddb %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d"
+				    " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n",
+				    plane->base.base.id, plane->base.name,
+				    old_wm->wm[0].min_ddb_alloc, old_wm->wm[1].min_ddb_alloc,
+				    old_wm->wm[2].min_ddb_alloc, old_wm->wm[3].min_ddb_alloc,
+				    old_wm->wm[4].min_ddb_alloc, old_wm->wm[5].min_ddb_alloc,
+				    old_wm->wm[6].min_ddb_alloc, old_wm->wm[7].min_ddb_alloc,
+				    old_wm->trans_wm.min_ddb_alloc,
+				    new_wm->wm[0].min_ddb_alloc, new_wm->wm[1].min_ddb_alloc,
+				    new_wm->wm[2].min_ddb_alloc, new_wm->wm[3].min_ddb_alloc,
+				    new_wm->wm[4].min_ddb_alloc, new_wm->wm[5].min_ddb_alloc,
+				    new_wm->wm[6].min_ddb_alloc, new_wm->wm[7].min_ddb_alloc,
+				    new_wm->trans_wm.min_ddb_alloc);
 		}
 	}
 }
@@ -5931,19 +5966,22 @@ void g4x_wm_get_hw_state(struct drm_i915_private *dev_priv)
 		crtc_state->wm.g4x.optimal = *active;
 		crtc_state->wm.g4x.intermediate = *active;
 
-		DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite=%d\n",
-			      pipe_name(pipe),
-			      wm->pipe[pipe].plane[PLANE_PRIMARY],
-			      wm->pipe[pipe].plane[PLANE_CURSOR],
-			      wm->pipe[pipe].plane[PLANE_SPRITE0]);
+		drm_dbg_kms(&dev_priv->drm,
+			    "Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite=%d\n",
+			    pipe_name(pipe),
+			    wm->pipe[pipe].plane[PLANE_PRIMARY],
+			    wm->pipe[pipe].plane[PLANE_CURSOR],
+			    wm->pipe[pipe].plane[PLANE_SPRITE0]);
 	}
 
-	DRM_DEBUG_KMS("Initial SR watermarks: plane=%d, cursor=%d fbc=%d\n",
-		      wm->sr.plane, wm->sr.cursor, wm->sr.fbc);
-	DRM_DEBUG_KMS("Initial HPLL watermarks: plane=%d, SR cursor=%d fbc=%d\n",
-		      wm->hpll.plane, wm->hpll.cursor, wm->hpll.fbc);
-	DRM_DEBUG_KMS("Initial SR=%s HPLL=%s FBC=%s\n",
-		      yesno(wm->cxsr), yesno(wm->hpll_en), yesno(wm->fbc_en));
+	drm_dbg_kms(&dev_priv->drm,
+		    "Initial SR watermarks: plane=%d, cursor=%d fbc=%d\n",
+		    wm->sr.plane, wm->sr.cursor, wm->sr.fbc);
+	drm_dbg_kms(&dev_priv->drm,
+		    "Initial HPLL watermarks: plane=%d, SR cursor=%d fbc=%d\n",
+		    wm->hpll.plane, wm->hpll.cursor, wm->hpll.fbc);
+	drm_dbg_kms(&dev_priv->drm, "Initial SR=%s HPLL=%s FBC=%s\n",
+		    yesno(wm->cxsr), yesno(wm->hpll_en), yesno(wm->fbc_en));
 }
 
 void g4x_wm_sanitize(struct drm_i915_private *dev_priv)
@@ -6035,8 +6073,9 @@ void vlv_wm_get_hw_state(struct drm_i915_private *dev_priv)
 
 		if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
 			      FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) {
-			DRM_DEBUG_KMS("Punit not acking DDR DVFS request, "
-				      "assuming DDR DVFS is disabled\n");
+			drm_dbg_kms(&dev_priv->drm,
+				    "Punit not acking DDR DVFS request, "
+				    "assuming DDR DVFS is disabled\n");
 			dev_priv->wm.max_level = VLV_WM_LEVEL_PM5;
 		} else {
 			val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
@@ -6087,16 +6126,18 @@ void vlv_wm_get_hw_state(struct drm_i915_private *dev_priv)
 		crtc_state->wm.vlv.optimal = *active;
 		crtc_state->wm.vlv.intermediate = *active;
 
-		DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n",
-			      pipe_name(pipe),
-			      wm->pipe[pipe].plane[PLANE_PRIMARY],
-			      wm->pipe[pipe].plane[PLANE_CURSOR],
-			      wm->pipe[pipe].plane[PLANE_SPRITE0],
-			      wm->pipe[pipe].plane[PLANE_SPRITE1]);
+		drm_dbg_kms(&dev_priv->drm,
+			    "Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n",
+			    pipe_name(pipe),
+			    wm->pipe[pipe].plane[PLANE_PRIMARY],
+			    wm->pipe[pipe].plane[PLANE_CURSOR],
+			    wm->pipe[pipe].plane[PLANE_SPRITE0],
+			    wm->pipe[pipe].plane[PLANE_SPRITE1]);
 	}
 
-	DRM_DEBUG_KMS("Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n",
-		      wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr);
+	drm_dbg_kms(&dev_priv->drm,
+		    "Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n",
+		    wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr);
 }
 
 void vlv_wm_sanitize(struct drm_i915_private *dev_priv)
@@ -6412,8 +6453,9 @@ static void gen6_check_mch_setup(struct drm_i915_private *dev_priv)
 
 	tmp = I915_READ(MCH_SSKPD);
 	if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL)
-		DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n",
-			      tmp);
+		drm_dbg_kms(&dev_priv->drm,
+			    "Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n",
+			    tmp);
 }
 
 static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)
@@ -6590,6 +6632,17 @@ static void icl_init_clock_gating(struct drm_i915_private *dev_priv)
 	/* WaEnable32PlaneMode:icl */
 	I915_WRITE(GEN9_CSFE_CHICKEN1_RCS,
 		   _MASKED_BIT_ENABLE(GEN11_ENABLE_32_PLANE_MODE));
+
+	/*
+	 * Wa_1408615072:icl,ehl  (vsunit)
+	 * Wa_1407596294:icl,ehl  (hsunit)
+	 */
+	intel_uncore_rmw(&dev_priv->uncore, UNSLICE_UNIT_LEVEL_CLKGATE,
+			 0, VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS);
+
+	/* Wa_1407352427:icl,ehl */
+	intel_uncore_rmw(&dev_priv->uncore, UNSLICE_UNIT_LEVEL_CLKGATE2,
+			 0, PSDUNIT_CLKGATE_DIS);
 }
 
 static void tgl_init_clock_gating(struct drm_i915_private *dev_priv)
@@ -6597,6 +6650,10 @@ static void tgl_init_clock_gating(struct drm_i915_private *dev_priv)
 	u32 vd_pg_enable = 0;
 	unsigned int i;
 
+	/* Wa_1408615072:tgl */
+	intel_uncore_rmw(&dev_priv->uncore, UNSLICE_UNIT_LEVEL_CLKGATE2,
+			 0, VSUNIT_CLKGATE_DIS_TGL);
+
 	/* This is not a WA. Enable VD HCP & MFX_ENC powergate */
 	for (i = 0; i < I915_MAX_VCS; i++) {
 		if (HAS_ENGINE(dev_priv, _VCS(i)))
@@ -7113,7 +7170,8 @@ void intel_suspend_hw(struct drm_i915_private *dev_priv)
 
 static void nop_init_clock_gating(struct drm_i915_private *dev_priv)
 {
-	DRM_DEBUG_KMS("No clock gating settings or workarounds applied.\n");
+	drm_dbg_kms(&dev_priv->drm,
+		    "No clock gating settings or workarounds applied.\n");
 }
 
 /**
@@ -7180,9 +7238,9 @@ void intel_init_pm(struct drm_i915_private *dev_priv)
 {
 	/* For cxsr */
 	if (IS_PINEVIEW(dev_priv))
-		i915_pineview_get_mem_freq(dev_priv);
+		pnv_get_mem_freq(dev_priv);
 	else if (IS_GEN(dev_priv, 5))
-		i915_ironlake_get_mem_freq(dev_priv);
+		ilk_get_mem_freq(dev_priv);
 
 	if (intel_has_sagv(dev_priv))
 		skl_setup_sagv_block_time(dev_priv);
@@ -7208,8 +7266,9 @@ void intel_init_pm(struct drm_i915_private *dev_priv)
 			dev_priv->display.optimize_watermarks =
 				ilk_optimize_watermarks;
 		} else {
-			DRM_DEBUG_KMS("Failed to read display plane latency. "
-				      "Disable CxSR\n");
+			drm_dbg_kms(&dev_priv->drm,
+				    "Failed to read display plane latency. "
+				    "Disable CxSR\n");
 		}
 	} else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
 		vlv_setup_wm_latency(dev_priv);
@@ -7229,7 +7288,8 @@ void intel_init_pm(struct drm_i915_private *dev_priv)
 					    dev_priv->is_ddr3,
 					    dev_priv->fsb_freq,
 					    dev_priv->mem_freq)) {
-			DRM_INFO("failed to find known CxSR latency "
+			drm_info(&dev_priv->drm,
+				 "failed to find known CxSR latency "
 				 "(found ddr%s fsb freq %d, mem freq %d), "
 				 "disabling CxSR\n",
 				 (dev_priv->is_ddr3 == 1) ? "3" : "2",
@@ -7238,7 +7298,7 @@ void intel_init_pm(struct drm_i915_private *dev_priv)
 			intel_set_memory_cxsr(dev_priv, false);
 			dev_priv->display.update_wm = NULL;
 		} else
-			dev_priv->display.update_wm = pineview_update_wm;
+			dev_priv->display.update_wm = pnv_update_wm;
 	} else if (IS_GEN(dev_priv, 4)) {
 		dev_priv->display.update_wm = i965_update_wm;
 	} else if (IS_GEN(dev_priv, 3)) {
@@ -7253,7 +7313,8 @@ void intel_init_pm(struct drm_i915_private *dev_priv)
 			dev_priv->display.get_fifo_size = i830_get_fifo_size;
 		}
 	} else {
-		DRM_ERROR("unexpected fall-through in intel_init_pm\n");
+		drm_err(&dev_priv->drm,
+			"unexpected fall-through in %s\n", __func__);
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/intel_region_lmem.c b/drivers/gpu/drm/i915/intel_region_lmem.c
index eddb392917aa..14b59b899c9b 100644
--- a/drivers/gpu/drm/i915/intel_region_lmem.c
+++ b/drivers/gpu/drm/i915/intel_region_lmem.c
@@ -90,6 +90,8 @@ region_lmem_init(struct intel_memory_region *mem)
 	if (ret)
 		io_mapping_fini(&mem->iomap);
 
+	intel_memory_region_set_name(mem, "local");
+
 	return ret;
 }
 
@@ -123,10 +125,12 @@ intel_setup_fake_lmem(struct drm_i915_private *i915)
 					 io_start,
 					 &intel_region_lmem_ops);
 	if (!IS_ERR(mem)) {
-		DRM_INFO("Intel graphics fake LMEM: %pR\n", &mem->region);
-		DRM_INFO("Intel graphics fake LMEM IO start: %llx\n",
-			 (u64)mem->io_start);
-		DRM_INFO("Intel graphics fake LMEM size: %llx\n",
+		drm_info(&i915->drm, "Intel graphics fake LMEM: %pR\n",
+			 &mem->region);
+		drm_info(&i915->drm,
+			 "Intel graphics fake LMEM IO start: %llx\n",
+			(u64)mem->io_start);
+		drm_info(&i915->drm, "Intel graphics fake LMEM size: %llx\n",
 			 (u64)resource_size(&mem->region));
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c
index e06b35b844a0..cbfb7171d62d 100644
--- a/drivers/gpu/drm/i915/intel_sideband.c
+++ b/drivers/gpu/drm/i915/intel_sideband.c
@@ -105,8 +105,8 @@ static int vlv_sideband_rw(struct drm_i915_private *i915,
 	if (intel_wait_for_register(uncore,
 				    VLV_IOSF_DOORBELL_REQ, IOSF_SB_BUSY, 0,
 				    5)) {
-		DRM_DEBUG_DRIVER("IOSF sideband idle wait (%s) timed out\n",
-				 is_read ? "read" : "write");
+		drm_dbg(&i915->drm, "IOSF sideband idle wait (%s) timed out\n",
+			is_read ? "read" : "write");
 		return -EAGAIN;
 	}
 
@@ -129,8 +129,8 @@ static int vlv_sideband_rw(struct drm_i915_private *i915,
 			*val = intel_uncore_read_fw(uncore, VLV_IOSF_DATA);
 		err = 0;
 	} else {
-		DRM_DEBUG_DRIVER("IOSF sideband finish wait (%s) timed out\n",
-				 is_read ? "read" : "write");
+		drm_dbg(&i915->drm, "IOSF sideband finish wait (%s) timed out\n",
+			is_read ? "read" : "write");
 		err = -ETIMEDOUT;
 	}
 
@@ -283,7 +283,8 @@ static int intel_sbi_rw(struct drm_i915_private *i915, u16 reg,
 	if (intel_wait_for_register_fw(uncore,
 				       SBI_CTL_STAT, SBI_BUSY, 0,
 				       100)) {
-		DRM_ERROR("timeout waiting for SBI to become ready\n");
+		drm_err(&i915->drm,
+			"timeout waiting for SBI to become ready\n");
 		return -EBUSY;
 	}
 
@@ -301,12 +302,13 @@ static int intel_sbi_rw(struct drm_i915_private *i915, u16 reg,
 	if (__intel_wait_for_register_fw(uncore,
 					 SBI_CTL_STAT, SBI_BUSY, 0,
 					 100, 100, &cmd)) {
-		DRM_ERROR("timeout waiting for SBI to complete read\n");
+		drm_err(&i915->drm,
+			"timeout waiting for SBI to complete read\n");
 		return -ETIMEDOUT;
 	}
 
 	if (cmd & SBI_RESPONSE_FAIL) {
-		DRM_ERROR("error during SBI read of reg %x\n", reg);
+		drm_err(&i915->drm, "error during SBI read of reg %x\n", reg);
 		return -ENXIO;
 	}
 
@@ -426,8 +428,9 @@ int sandybridge_pcode_read(struct drm_i915_private *i915, u32 mbox,
 	mutex_unlock(&i915->sb_lock);
 
 	if (err) {
-		DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps: %d\n",
-				 mbox, __builtin_return_address(0), err);
+		drm_dbg(&i915->drm,
+			"warning: pcode (read from mbox %x) mailbox access failed for %ps: %d\n",
+			mbox, __builtin_return_address(0), err);
 	}
 
 	return err;
@@ -447,8 +450,9 @@ int sandybridge_pcode_write_timeout(struct drm_i915_private *i915,
 	mutex_unlock(&i915->sb_lock);
 
 	if (err) {
-		DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps: %d\n",
-				 val, mbox, __builtin_return_address(0), err);
+		drm_dbg(&i915->drm,
+			"warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps: %d\n",
+			val, mbox, __builtin_return_address(0), err);
 	}
 
 	return err;
@@ -519,7 +523,8 @@ int skl_pcode_request(struct drm_i915_private *i915, u32 mbox, u32 request,
 	 * requests, and for any quirks of the PCODE firmware that delays
 	 * the request completion.
 	 */
-	DRM_DEBUG_KMS("PCODE timeout, retrying with preemption disabled\n");
+	drm_dbg_kms(&i915->drm,
+		    "PCODE timeout, retrying with preemption disabled\n");
 	WARN_ON_ONCE(timeout_base_ms > 3);
 	preempt_disable();
 	ret = wait_for_atomic(COND, 50);
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 94a97bf8c021..5f2cf6f43b8b 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -359,7 +359,8 @@ static void __gen6_gt_wait_for_fifo(struct intel_uncore *uncore)
 		if (wait_for_atomic((n = fifo_free_entries(uncore)) >
 				    GT_FIFO_NUM_RESERVED_ENTRIES,
 				    GT_FIFO_TIMEOUT_MS)) {
-			DRM_DEBUG("GT_FIFO timeout, entries: %u\n", n);
+			drm_dbg(&uncore->i915->drm,
+				"GT_FIFO timeout, entries: %u\n", n);
 			return;
 		}
 	}
@@ -432,7 +433,7 @@ intel_uncore_forcewake_reset(struct intel_uncore *uncore)
 			break;
 
 		if (--retry_count == 0) {
-			DRM_ERROR("Timed out waiting for forcewake timers to finish\n");
+			drm_err(&uncore->i915->drm, "Timed out waiting for forcewake timers to finish\n");
 			break;
 		}
 
@@ -490,7 +491,7 @@ gen6_check_for_fifo_debug(struct intel_uncore *uncore)
 	fifodbg = __raw_uncore_read32(uncore, GTFIFODBG);
 
 	if (unlikely(fifodbg)) {
-		DRM_DEBUG_DRIVER("GTFIFODBG = 0x08%x\n", fifodbg);
+		drm_dbg(&uncore->i915->drm, "GTFIFODBG = 0x08%x\n", fifodbg);
 		__raw_uncore_write32(uncore, GTFIFODBG, fifodbg);
 	}
 
@@ -562,7 +563,7 @@ void intel_uncore_resume_early(struct intel_uncore *uncore)
 	unsigned int restore_forcewake;
 
 	if (intel_uncore_unclaimed_mmio(uncore))
-		DRM_DEBUG("unclaimed mmio detected on resume, clearing\n");
+		drm_dbg(&uncore->i915->drm, "unclaimed mmio detected on resume, clearing\n");
 
 	if (!intel_uncore_has_forcewake(uncore))
 		return;
@@ -1595,8 +1596,8 @@ static int intel_uncore_fw_domains_init(struct intel_uncore *uncore)
 		spin_unlock_irq(&uncore->lock);
 
 		if (!(ecobus & FORCEWAKE_MT_ENABLE)) {
-			DRM_INFO("No MT forcewake available on Ivybridge, this can result in issues\n");
-			DRM_INFO("when using vblank-synced partial screen updates.\n");
+			drm_info(&i915->drm, "No MT forcewake available on Ivybridge, this can result in issues\n");
+			drm_info(&i915->drm, "when using vblank-synced partial screen updates.\n");
 			fw_domain_fini(uncore, FW_DOMAIN_ID_RENDER);
 			fw_domain_init(uncore, FW_DOMAIN_ID_RENDER,
 				       FORCEWAKE, FORCEWAKE_ACK);
@@ -1683,8 +1684,7 @@ static int uncore_mmio_setup(struct intel_uncore *uncore)
 		mmio_size = 2 * 1024 * 1024;
 	uncore->regs = pci_iomap(pdev, mmio_bar, mmio_size);
 	if (uncore->regs == NULL) {
-		DRM_ERROR("failed to map registers\n");
-
+		drm_err(&i915->drm, "failed to map registers\n");
 		return -EIO;
 	}
 
@@ -1807,7 +1807,7 @@ int intel_uncore_init_mmio(struct intel_uncore *uncore)
 
 	/* clear out unclaimed reg detection bit */
 	if (intel_uncore_unclaimed_mmio(uncore))
-		DRM_DEBUG("unclaimed mmio detected on uncore init, clearing\n");
+		drm_dbg(&i915->drm, "unclaimed mmio detected on uncore init, clearing\n");
 
 	return 0;
 
@@ -2072,9 +2072,10 @@ intel_uncore_arm_unclaimed_mmio_detection(struct intel_uncore *uncore)
 
 	if (unlikely(check_for_unclaimed_mmio(uncore))) {
 		if (!i915_modparams.mmio_debug) {
-			DRM_DEBUG("Unclaimed register detected, "
-				  "enabling oneshot unclaimed register reporting. "
-				  "Please use i915.mmio_debug=N for more information.\n");
+			drm_dbg(&uncore->i915->drm,
+				"Unclaimed register detected, "
+				"enabling oneshot unclaimed register reporting. "
+				"Please use i915.mmio_debug=N for more information.\n");
 			i915_modparams.mmio_debug++;
 		}
 		uncore->debug->unclaimed_mmio_check--;
diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c
index 59aa1b6f1827..8fbf6f4d3f26 100644
--- a/drivers/gpu/drm/i915/intel_wakeref.c
+++ b/drivers/gpu/drm/i915/intel_wakeref.c
@@ -95,16 +95,17 @@ static void __intel_wakeref_put_work(struct work_struct *wrk)
 void __intel_wakeref_init(struct intel_wakeref *wf,
 			  struct intel_runtime_pm *rpm,
 			  const struct intel_wakeref_ops *ops,
-			  struct lock_class_key *key)
+			  struct intel_wakeref_lockclass *key)
 {
 	wf->rpm = rpm;
 	wf->ops = ops;
 
-	__mutex_init(&wf->mutex, "wakeref", key);
+	__mutex_init(&wf->mutex, "wakeref.mutex", &key->mutex);
 	atomic_set(&wf->count, 0);
 	wf->wakeref = 0;
 
 	INIT_WORK(&wf->work, __intel_wakeref_put_work);
+	lockdep_init_map(&wf->work.lockdep_map, "wakeref.work", &key->work, 0);
 }
 
 int intel_wakeref_wait_for_idle(struct intel_wakeref *wf)
diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h
index 8d945db94b7a..7d1e676b71ef 100644
--- a/drivers/gpu/drm/i915/intel_wakeref.h
+++ b/drivers/gpu/drm/i915/intel_wakeref.h
@@ -44,12 +44,17 @@ struct intel_wakeref {
 	struct work_struct work;
 };
 
+struct intel_wakeref_lockclass {
+	struct lock_class_key mutex;
+	struct lock_class_key work;
+};
+
 void __intel_wakeref_init(struct intel_wakeref *wf,
 			  struct intel_runtime_pm *rpm,
 			  const struct intel_wakeref_ops *ops,
-			  struct lock_class_key *key);
+			  struct intel_wakeref_lockclass *key);
 #define intel_wakeref_init(wf, rpm, ops) do {				\
-	static struct lock_class_key __key;				\
+	static struct intel_wakeref_lockclass __key;			\
 									\
 	__intel_wakeref_init((wf), (rpm), (ops), &__key);		\
 } while (0)
diff --git a/drivers/gpu/drm/i915/oa/Makefile b/drivers/gpu/drm/i915/oa/Makefile
deleted file mode 100644
index df028e2b0d64..000000000000
--- a/drivers/gpu/drm/i915/oa/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
-# SPDX-License-Identifier: MIT
-
-# For building individual subdir files on the command line
-subdir-ccflags-y += -I$(srctree)/$(src)/..
-
-# Extra header tests
-header-test-pattern-$(CONFIG_DRM_I915_WERROR) := *.h
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
index b37fc53973cc..78f36faf2bbe 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
@@ -124,8 +124,6 @@ static void pm_resume(struct drm_i915_private *i915)
 	 * that runtime-pm just works.
 	 */
 	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
-		intel_gt_sanitize(&i915->gt, false);
-
 		i915_gem_restore_gtt_mappings(i915);
 		i915_gem_restore_fences(&i915->ggtt);
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 80cde5bda922..b342bef5e7c9 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -34,6 +34,7 @@
 
 #include "mock_drm.h"
 #include "mock_gem_device.h"
+#include "mock_gtt.h"
 #include "igt_flush_test.h"
 
 static void cleanup_freed_objects(struct drm_i915_private *i915)
@@ -151,7 +152,7 @@ static int igt_ppgtt_alloc(void *arg)
 	if (!HAS_PPGTT(dev_priv))
 		return 0;
 
-	ppgtt = __ppgtt_create(dev_priv);
+	ppgtt = i915_ppgtt_create(&dev_priv->gt);
 	if (IS_ERR(ppgtt))
 		return PTR_ERR(ppgtt);
 
@@ -206,8 +207,7 @@ err_ppgtt_cleanup:
 	return err;
 }
 
-static int lowlevel_hole(struct drm_i915_private *i915,
-			 struct i915_address_space *vm,
+static int lowlevel_hole(struct i915_address_space *vm,
 			 u64 hole_start, u64 hole_end,
 			 unsigned long end_time)
 {
@@ -256,7 +256,7 @@ static int lowlevel_hole(struct drm_i915_private *i915,
 		 * memory. We expect to hit -ENOMEM.
 		 */
 
-		obj = fake_dma_object(i915, BIT_ULL(size));
+		obj = fake_dma_object(vm->i915, BIT_ULL(size));
 		if (IS_ERR(obj)) {
 			kfree(order);
 			break;
@@ -291,7 +291,7 @@ static int lowlevel_hole(struct drm_i915_private *i915,
 			mock_vma->node.size = BIT_ULL(size);
 			mock_vma->node.start = addr;
 
-			with_intel_runtime_pm(&i915->runtime_pm, wakeref)
+			with_intel_runtime_pm(vm->gt->uncore->rpm, wakeref)
 				vm->insert_entries(vm, mock_vma,
 						   I915_CACHE_NONE, 0);
 		}
@@ -303,7 +303,7 @@ static int lowlevel_hole(struct drm_i915_private *i915,
 			intel_wakeref_t wakeref;
 
 			GEM_BUG_ON(addr + BIT_ULL(size) > vm->total);
-			with_intel_runtime_pm(&i915->runtime_pm, wakeref)
+			with_intel_runtime_pm(vm->gt->uncore->rpm, wakeref)
 				vm->clear_range(vm, addr, BIT_ULL(size));
 		}
 
@@ -312,7 +312,7 @@ static int lowlevel_hole(struct drm_i915_private *i915,
 
 		kfree(order);
 
-		cleanup_freed_objects(i915);
+		cleanup_freed_objects(vm->i915);
 	}
 
 	kfree(mock_vma);
@@ -340,8 +340,7 @@ static void close_object_list(struct list_head *objects,
 	}
 }
 
-static int fill_hole(struct drm_i915_private *i915,
-		     struct i915_address_space *vm,
+static int fill_hole(struct i915_address_space *vm,
 		     u64 hole_start, u64 hole_end,
 		     unsigned long end_time)
 {
@@ -374,7 +373,7 @@ static int fill_hole(struct drm_i915_private *i915,
 				{ }
 			}, *p;
 
-			obj = fake_dma_object(i915, full_size);
+			obj = fake_dma_object(vm->i915, full_size);
 			if (IS_ERR(obj))
 				break;
 
@@ -542,7 +541,7 @@ static int fill_hole(struct drm_i915_private *i915,
 		}
 
 		close_object_list(&objects, vm);
-		cleanup_freed_objects(i915);
+		cleanup_freed_objects(vm->i915);
 	}
 
 	return 0;
@@ -552,8 +551,7 @@ err:
 	return err;
 }
 
-static int walk_hole(struct drm_i915_private *i915,
-		     struct i915_address_space *vm,
+static int walk_hole(struct i915_address_space *vm,
 		     u64 hole_start, u64 hole_end,
 		     unsigned long end_time)
 {
@@ -575,7 +573,7 @@ static int walk_hole(struct drm_i915_private *i915,
 		u64 addr;
 		int err = 0;
 
-		obj = fake_dma_object(i915, size << PAGE_SHIFT);
+		obj = fake_dma_object(vm->i915, size << PAGE_SHIFT);
 		if (IS_ERR(obj))
 			break;
 
@@ -630,14 +628,13 @@ err_put:
 		if (err)
 			return err;
 
-		cleanup_freed_objects(i915);
+		cleanup_freed_objects(vm->i915);
 	}
 
 	return 0;
 }
 
-static int pot_hole(struct drm_i915_private *i915,
-		    struct i915_address_space *vm,
+static int pot_hole(struct i915_address_space *vm,
 		    u64 hole_start, u64 hole_end,
 		    unsigned long end_time)
 {
@@ -651,7 +648,7 @@ static int pot_hole(struct drm_i915_private *i915,
 	if (i915_is_ggtt(vm))
 		flags |= PIN_GLOBAL;
 
-	obj = i915_gem_object_create_internal(i915, 2 * I915_GTT_PAGE_SIZE);
+	obj = i915_gem_object_create_internal(vm->i915, 2 * I915_GTT_PAGE_SIZE);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -712,8 +709,7 @@ err_obj:
 	return err;
 }
 
-static int drunk_hole(struct drm_i915_private *i915,
-		      struct i915_address_space *vm,
+static int drunk_hole(struct i915_address_space *vm,
 		      u64 hole_start, u64 hole_end,
 		      unsigned long end_time)
 {
@@ -758,7 +754,7 @@ static int drunk_hole(struct drm_i915_private *i915,
 		 * memory. We expect to hit -ENOMEM.
 		 */
 
-		obj = fake_dma_object(i915, BIT_ULL(size));
+		obj = fake_dma_object(vm->i915, BIT_ULL(size));
 		if (IS_ERR(obj)) {
 			kfree(order);
 			break;
@@ -816,14 +812,13 @@ err_obj:
 		if (err)
 			return err;
 
-		cleanup_freed_objects(i915);
+		cleanup_freed_objects(vm->i915);
 	}
 
 	return 0;
 }
 
-static int __shrink_hole(struct drm_i915_private *i915,
-			 struct i915_address_space *vm,
+static int __shrink_hole(struct i915_address_space *vm,
 			 u64 hole_start, u64 hole_end,
 			 unsigned long end_time)
 {
@@ -840,7 +835,7 @@ static int __shrink_hole(struct drm_i915_private *i915,
 		u64 size = BIT_ULL(order++);
 
 		size = min(size, hole_end - addr);
-		obj = fake_dma_object(i915, size);
+		obj = fake_dma_object(vm->i915, size);
 		if (IS_ERR(obj)) {
 			err = PTR_ERR(obj);
 			break;
@@ -894,12 +889,11 @@ static int __shrink_hole(struct drm_i915_private *i915,
 	}
 
 	close_object_list(&objects, vm);
-	cleanup_freed_objects(i915);
+	cleanup_freed_objects(vm->i915);
 	return err;
 }
 
-static int shrink_hole(struct drm_i915_private *i915,
-		       struct i915_address_space *vm,
+static int shrink_hole(struct i915_address_space *vm,
 		       u64 hole_start, u64 hole_end,
 		       unsigned long end_time)
 {
@@ -911,7 +905,7 @@ static int shrink_hole(struct drm_i915_private *i915,
 
 	for_each_prime_number_from(prime, 0, ULONG_MAX - 1) {
 		vm->fault_attr.interval = prime;
-		err = __shrink_hole(i915, vm, hole_start, hole_end, end_time);
+		err = __shrink_hole(vm, hole_start, hole_end, end_time);
 		if (err)
 			break;
 	}
@@ -921,8 +915,7 @@ static int shrink_hole(struct drm_i915_private *i915,
 	return err;
 }
 
-static int shrink_boom(struct drm_i915_private *i915,
-		       struct i915_address_space *vm,
+static int shrink_boom(struct i915_address_space *vm,
 		       u64 hole_start, u64 hole_end,
 		       unsigned long end_time)
 {
@@ -944,7 +937,7 @@ static int shrink_boom(struct drm_i915_private *i915,
 		unsigned int size = sizes[i];
 		struct i915_vma *vma;
 
-		purge = fake_dma_object(i915, size);
+		purge = fake_dma_object(vm->i915, size);
 		if (IS_ERR(purge))
 			return PTR_ERR(purge);
 
@@ -961,7 +954,7 @@ static int shrink_boom(struct drm_i915_private *i915,
 		/* Should now be ripe for purging */
 		i915_vma_unpin(vma);
 
-		explode = fake_dma_object(i915, size);
+		explode = fake_dma_object(vm->i915, size);
 		if (IS_ERR(explode)) {
 			err = PTR_ERR(explode);
 			goto err_purge;
@@ -987,7 +980,7 @@ static int shrink_boom(struct drm_i915_private *i915,
 		i915_gem_object_put(explode);
 
 		memset(&vm->fault_attr, 0, sizeof(vm->fault_attr));
-		cleanup_freed_objects(i915);
+		cleanup_freed_objects(vm->i915);
 	}
 
 	return 0;
@@ -1001,8 +994,7 @@ err_purge:
 }
 
 static int exercise_ppgtt(struct drm_i915_private *dev_priv,
-			  int (*func)(struct drm_i915_private *i915,
-				      struct i915_address_space *vm,
+			  int (*func)(struct i915_address_space *vm,
 				      u64 hole_start, u64 hole_end,
 				      unsigned long end_time))
 {
@@ -1018,7 +1010,7 @@ static int exercise_ppgtt(struct drm_i915_private *dev_priv,
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
-	ppgtt = i915_ppgtt_create(dev_priv);
+	ppgtt = i915_ppgtt_create(&dev_priv->gt);
 	if (IS_ERR(ppgtt)) {
 		err = PTR_ERR(ppgtt);
 		goto out_free;
@@ -1026,7 +1018,7 @@ static int exercise_ppgtt(struct drm_i915_private *dev_priv,
 	GEM_BUG_ON(offset_in_page(ppgtt->vm.total));
 	GEM_BUG_ON(!atomic_read(&ppgtt->vm.open));
 
-	err = func(dev_priv, &ppgtt->vm, 0, ppgtt->vm.total, end_time);
+	err = func(&ppgtt->vm, 0, ppgtt->vm.total, end_time);
 
 	i915_vm_put(&ppgtt->vm);
 
@@ -1082,8 +1074,7 @@ static int sort_holes(void *priv, struct list_head *A, struct list_head *B)
 }
 
 static int exercise_ggtt(struct drm_i915_private *i915,
-			 int (*func)(struct drm_i915_private *i915,
-				     struct i915_address_space *vm,
+			 int (*func)(struct i915_address_space *vm,
 				     u64 hole_start, u64 hole_end,
 				     unsigned long end_time))
 {
@@ -1105,7 +1096,7 @@ restart:
 		if (hole_start >= hole_end)
 			continue;
 
-		err = func(i915, &ggtt->vm, hole_start, hole_end, end_time);
+		err = func(&ggtt->vm, hole_start, hole_end, end_time);
 		if (err)
 			break;
 
@@ -1252,8 +1243,7 @@ static void track_vma_bind(struct i915_vma *vma)
 }
 
 static int exercise_mock(struct drm_i915_private *i915,
-			 int (*func)(struct drm_i915_private *i915,
-				     struct i915_address_space *vm,
+			 int (*func)(struct i915_address_space *vm,
 				     u64 hole_start, u64 hole_end,
 				     unsigned long end_time))
 {
@@ -1268,7 +1258,7 @@ static int exercise_mock(struct drm_i915_private *i915,
 		return -ENOMEM;
 
 	vm = i915_gem_context_get_vm_rcu(ctx);
-	err = func(i915, vm, 0, min(vm->total, limit), end_time);
+	err = func(vm, 0, min(vm->total, limit), end_time);
 	i915_vm_put(vm);
 
 	mock_context_close(ctx);
diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
index 476fba2ed8bb..34138c7bdd15 100644
--- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
@@ -1,5 +1,11 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* List each unit test as selftest(name, function)
+
+#ifndef selftest
+#define selftest(x, y)
+#endif
+
+/*
+ * List each unit test as selftest(name, function)
  *
  * The name is used as both an enum and expanded as subtest__name to create
  * a module parameter. It must be unique and legal for a C identifier.
diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
index aa5a0e7f5d9e..5b39bab4da1d 100644
--- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
@@ -1,5 +1,11 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* List each unit test as selftest(name, function)
+
+#ifndef selftest
+#define selftest(x, y)
+#endif
+
+/*
+ * List each unit test as selftest(name, function)
  *
  * The name is used as both an enum and expanded as subtest__name to create
  * a module parameter. It must be unique and legal for a C identifier.
diff --git a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
index f7129a243daa..5a577a1332f5 100644
--- a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
@@ -1,5 +1,11 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* List each unit test as selftest(name, function)
+
+#ifndef selftest
+#define selftest(x, y)
+#endif
+
+/*
+ * List each unit test as selftest(name, function)
  *
  * The name is used as both an enum and expanded as subtest__name to create
  * a module parameter. It must be unique and legal for a C identifier.
diff --git a/drivers/gpu/drm/i915/selftests/i915_random.h b/drivers/gpu/drm/i915/selftests/i915_random.h
index 35cc69a3a1b9..05364eca20f7 100644
--- a/drivers/gpu/drm/i915/selftests/i915_random.h
+++ b/drivers/gpu/drm/i915/selftests/i915_random.h
@@ -25,6 +25,7 @@
 #ifndef __I915_SELFTESTS_RANDOM_H__
 #define __I915_SELFTESTS_RANDOM_H__
 
+#include <linux/math64.h>
 #include <linux/random.h>
 
 #include "../i915_selftest.h"
diff --git a/drivers/gpu/drm/i915/selftests/igt_atomic.c b/drivers/gpu/drm/i915/selftests/igt_atomic.c
new file mode 100644
index 000000000000..fb506b699095
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/igt_atomic.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include <linux/preempt.h>
+#include <linux/bottom_half.h>
+#include <linux/irqflags.h>
+
+#include "igt_atomic.h"
+
+static void __preempt_begin(void)
+{
+	preempt_disable();
+}
+
+static void __preempt_end(void)
+{
+	preempt_enable();
+}
+
+static void __softirq_begin(void)
+{
+	local_bh_disable();
+}
+
+static void __softirq_end(void)
+{
+	local_bh_enable();
+}
+
+static void __hardirq_begin(void)
+{
+	local_irq_disable();
+}
+
+static void __hardirq_end(void)
+{
+	local_irq_enable();
+}
+
+const struct igt_atomic_section igt_atomic_phases[] = {
+	{ "preempt", __preempt_begin, __preempt_end },
+	{ "softirq", __softirq_begin, __softirq_end },
+	{ "hardirq", __hardirq_begin, __hardirq_end },
+	{ }
+};
diff --git a/drivers/gpu/drm/i915/selftests/igt_atomic.h b/drivers/gpu/drm/i915/selftests/igt_atomic.h
index 93ec89f487ec..1991798abf4b 100644
--- a/drivers/gpu/drm/i915/selftests/igt_atomic.h
+++ b/drivers/gpu/drm/i915/selftests/igt_atomic.h
@@ -6,51 +6,12 @@
 #ifndef IGT_ATOMIC_H
 #define IGT_ATOMIC_H
 
-#include <linux/preempt.h>
-#include <linux/bottom_half.h>
-#include <linux/irqflags.h>
-
-static void __preempt_begin(void)
-{
-	preempt_disable();
-}
-
-static void __preempt_end(void)
-{
-	preempt_enable();
-}
-
-static void __softirq_begin(void)
-{
-	local_bh_disable();
-}
-
-static void __softirq_end(void)
-{
-	local_bh_enable();
-}
-
-static void __hardirq_begin(void)
-{
-	local_irq_disable();
-}
-
-static void __hardirq_end(void)
-{
-	local_irq_enable();
-}
-
 struct igt_atomic_section {
 	const char *name;
 	void (*critical_section_begin)(void);
 	void (*critical_section_end)(void);
 };
 
-static const struct igt_atomic_section igt_atomic_phases[] = {
-	{ "preempt", __preempt_begin, __preempt_end },
-	{ "softirq", __softirq_begin, __softirq_end },
-	{ "hardirq", __hardirq_begin, __hardirq_end },
-	{ }
-};
+extern const struct igt_atomic_section igt_atomic_phases[];
 
 #endif /* IGT_ATOMIC_H */
diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.h b/drivers/gpu/drm/i915/selftests/igt_live_test.h
index c0e9f99d50de..36ed42736c52 100644
--- a/drivers/gpu/drm/i915/selftests/igt_live_test.h
+++ b/drivers/gpu/drm/i915/selftests/igt_live_test.h
@@ -7,7 +7,7 @@
 #ifndef IGT_LIVE_TEST_H
 #define IGT_LIVE_TEST_H
 
-#include "../i915_gem.h"
+#include "gt/intel_engine.h" /* for I915_NUM_ENGINES */
 
 struct drm_i915_private;
 
diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
index 04d0aa7b349e..3ef3620e0da5 100644
--- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c
+++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
@@ -270,36 +270,31 @@ static int igt_gpu_write_dw(struct intel_context *ce,
 
 static int igt_cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val)
 {
-	unsigned long n;
+	unsigned long n = obj->base.size >> PAGE_SHIFT;
+	u32 *ptr;
 	int err;
 
-	i915_gem_object_lock(obj);
-	err = i915_gem_object_set_to_wc_domain(obj, false);
-	i915_gem_object_unlock(obj);
-	if (err)
-		return err;
-
-	err = i915_gem_object_pin_pages(obj);
+	err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
 	if (err)
 		return err;
 
-	for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) {
-		u32 __iomem *base;
-		u32 read_val;
-
-		base = i915_gem_object_lmem_io_map_page_atomic(obj, n);
+	ptr = i915_gem_object_pin_map(obj, I915_MAP_WC);
+	if (IS_ERR(ptr))
+		return PTR_ERR(ptr);
 
-		read_val = ioread32(base + dword);
-		io_mapping_unmap_atomic(base);
-		if (read_val != val) {
-			pr_err("n=%lu base[%u]=%u, val=%u\n",
-			       n, dword, read_val, val);
+	ptr += dword;
+	while (n--) {
+		if (*ptr != val) {
+			pr_err("base[%u]=%08x, val=%08x\n",
+			       dword, *ptr, val);
 			err = -EINVAL;
 			break;
 		}
+
+		ptr += PAGE_SIZE / sizeof(*ptr);
 	}
 
-	i915_gem_object_unpin_pages(obj);
+	i915_gem_object_unpin_map(obj);
 	return err;
 }
 
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index ac641f5360e1..3b8986983afc 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -58,6 +58,8 @@ static void mock_device_release(struct drm_device *dev)
 	mock_device_flush(i915);
 	intel_gt_driver_remove(&i915->gt);
 
+	i915_gem_driver_release__contexts(i915);
+
 	i915_gem_drain_workqueue(i915);
 	i915_gem_drain_freed_objects(i915);
 
@@ -184,6 +186,7 @@ struct drm_i915_private *mock_gem_device(void)
 	if (mock_engine_init(i915->engine[RCS0]))
 		goto err_context;
 
+	__clear_bit(I915_WEDGED, &i915->gt.reset.flags);
 	intel_engines_driver_register(i915);
 
 	return i915;
diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c
index 20ac3844edec..edc5e3dda8ca 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c
@@ -55,6 +55,11 @@ static void mock_cleanup(struct i915_address_space *vm)
 {
 }
 
+static void mock_clear_range(struct i915_address_space *vm,
+			     u64 start, u64 length)
+{
+}
+
 struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name)
 {
 	struct i915_ppgtt *ppgtt;
@@ -70,7 +75,7 @@ struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name)
 
 	i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT);
 
-	ppgtt->vm.clear_range = nop_clear_range;
+	ppgtt->vm.clear_range = mock_clear_range;
 	ppgtt->vm.insert_page = mock_insert_page;
 	ppgtt->vm.insert_entries = mock_insert_entries;
 	ppgtt->vm.cleanup = mock_cleanup;
@@ -107,7 +112,7 @@ void mock_init_ggtt(struct drm_i915_private *i915, struct i915_ggtt *ggtt)
 	ggtt->mappable_end = resource_size(&ggtt->gmadr);
 	ggtt->vm.total = 4096 * PAGE_SIZE;
 
-	ggtt->vm.clear_range = nop_clear_range;
+	ggtt->vm.clear_range = mock_clear_range;
 	ggtt->vm.insert_page = mock_insert_page;
 	ggtt->vm.insert_entries = mock_insert_entries;
 	ggtt->vm.cleanup = mock_cleanup;
diff --git a/drivers/gpu/drm/mediatek/Makefile b/drivers/gpu/drm/mediatek/Makefile
index 5044dfb8e3d6..b7a82ed5788f 100644
--- a/drivers/gpu/drm/mediatek/Makefile
+++ b/drivers/gpu/drm/mediatek/Makefile
@@ -20,7 +20,7 @@ obj-$(CONFIG_DRM_MEDIATEK) += mediatek-drm.o
 mediatek-drm-hdmi-objs := mtk_cec.o \
 			  mtk_hdmi.o \
 			  mtk_hdmi_ddc.o \
-                          mtk_mt2701_hdmi_phy.o \
+			  mtk_mt2701_hdmi_phy.o \
 			  mtk_mt8173_hdmi_phy.o \
 			  mtk_hdmi_phy.o
 
diff --git a/drivers/gpu/drm/mediatek/mtk_disp_color.c b/drivers/gpu/drm/mediatek/mtk_disp_color.c
index 59de2a46aa49..6fb0d6983a4a 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_color.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_color.c
@@ -9,6 +9,7 @@
 #include <linux/of_device.h>
 #include <linux/of_irq.h>
 #include <linux/platform_device.h>
+#include <linux/soc/mediatek/mtk-cmdq.h>
 
 #include "mtk_drm_crtc.h"
 #include "mtk_drm_ddp_comp.h"
@@ -45,12 +46,12 @@ static inline struct mtk_disp_color *comp_to_color(struct mtk_ddp_comp *comp)
 
 static void mtk_color_config(struct mtk_ddp_comp *comp, unsigned int w,
 			     unsigned int h, unsigned int vrefresh,
-			     unsigned int bpc)
+			     unsigned int bpc, struct cmdq_pkt *cmdq_pkt)
 {
 	struct mtk_disp_color *color = comp_to_color(comp);
 
-	writel(w, comp->regs + DISP_COLOR_WIDTH(color));
-	writel(h, comp->regs + DISP_COLOR_HEIGHT(color));
+	mtk_ddp_write(cmdq_pkt, w, comp, DISP_COLOR_WIDTH(color));
+	mtk_ddp_write(cmdq_pkt, h, comp, DISP_COLOR_HEIGHT(color));
 }
 
 static void mtk_color_start(struct mtk_ddp_comp *comp)
diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
index 4a55bb6e2213..891d80c73e04 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
@@ -11,6 +11,7 @@
 #include <linux/of_device.h>
 #include <linux/of_irq.h>
 #include <linux/platform_device.h>
+#include <linux/soc/mediatek/mtk-cmdq.h>
 
 #include "mtk_drm_crtc.h"
 #include "mtk_drm_ddp_comp.h"
@@ -124,14 +125,15 @@ static void mtk_ovl_stop(struct mtk_ddp_comp *comp)
 
 static void mtk_ovl_config(struct mtk_ddp_comp *comp, unsigned int w,
 			   unsigned int h, unsigned int vrefresh,
-			   unsigned int bpc)
+			   unsigned int bpc, struct cmdq_pkt *cmdq_pkt)
 {
 	if (w != 0 && h != 0)
-		writel_relaxed(h << 16 | w, comp->regs + DISP_REG_OVL_ROI_SIZE);
-	writel_relaxed(0x0, comp->regs + DISP_REG_OVL_ROI_BGCLR);
+		mtk_ddp_write_relaxed(cmdq_pkt, h << 16 | w, comp,
+				      DISP_REG_OVL_ROI_SIZE);
+	mtk_ddp_write_relaxed(cmdq_pkt, 0x0, comp, DISP_REG_OVL_ROI_BGCLR);
 
-	writel(0x1, comp->regs + DISP_REG_OVL_RST);
-	writel(0x0, comp->regs + DISP_REG_OVL_RST);
+	mtk_ddp_write(cmdq_pkt, 0x1, comp, DISP_REG_OVL_RST);
+	mtk_ddp_write(cmdq_pkt, 0x0, comp, DISP_REG_OVL_RST);
 }
 
 static unsigned int mtk_ovl_layer_nr(struct mtk_ddp_comp *comp)
@@ -175,16 +177,16 @@ static int mtk_ovl_layer_check(struct mtk_ddp_comp *comp, unsigned int idx,
 	return 0;
 }
 
-static void mtk_ovl_layer_on(struct mtk_ddp_comp *comp, unsigned int idx)
+static void mtk_ovl_layer_on(struct mtk_ddp_comp *comp, unsigned int idx,
+			     struct cmdq_pkt *cmdq_pkt)
 {
-	unsigned int reg;
 	unsigned int gmc_thrshd_l;
 	unsigned int gmc_thrshd_h;
 	unsigned int gmc_value;
 	struct mtk_disp_ovl *ovl = comp_to_ovl(comp);
 
-	writel(0x1, comp->regs + DISP_REG_OVL_RDMA_CTRL(idx));
-
+	mtk_ddp_write(cmdq_pkt, 0x1, comp,
+		      DISP_REG_OVL_RDMA_CTRL(idx));
 	gmc_thrshd_l = GMC_THRESHOLD_LOW >>
 		      (GMC_THRESHOLD_BITS - ovl->data->gmc_bits);
 	gmc_thrshd_h = GMC_THRESHOLD_HIGH >>
@@ -194,22 +196,19 @@ static void mtk_ovl_layer_on(struct mtk_ddp_comp *comp, unsigned int idx)
 	else
 		gmc_value = gmc_thrshd_l | gmc_thrshd_l << 8 |
 			    gmc_thrshd_h << 16 | gmc_thrshd_h << 24;
-	writel(gmc_value, comp->regs + DISP_REG_OVL_RDMA_GMC(idx));
-
-	reg = readl(comp->regs + DISP_REG_OVL_SRC_CON);
-	reg = reg | BIT(idx);
-	writel(reg, comp->regs + DISP_REG_OVL_SRC_CON);
+	mtk_ddp_write(cmdq_pkt, gmc_value,
+		      comp, DISP_REG_OVL_RDMA_GMC(idx));
+	mtk_ddp_write_mask(cmdq_pkt, BIT(idx), comp,
+			   DISP_REG_OVL_SRC_CON, BIT(idx));
 }
 
-static void mtk_ovl_layer_off(struct mtk_ddp_comp *comp, unsigned int idx)
+static void mtk_ovl_layer_off(struct mtk_ddp_comp *comp, unsigned int idx,
+			      struct cmdq_pkt *cmdq_pkt)
 {
-	unsigned int reg;
-
-	reg = readl(comp->regs + DISP_REG_OVL_SRC_CON);
-	reg = reg & ~BIT(idx);
-	writel(reg, comp->regs + DISP_REG_OVL_SRC_CON);
-
-	writel(0x0, comp->regs + DISP_REG_OVL_RDMA_CTRL(idx));
+	mtk_ddp_write_mask(cmdq_pkt, 0, comp,
+			   DISP_REG_OVL_SRC_CON, BIT(idx));
+	mtk_ddp_write(cmdq_pkt, 0, comp,
+		      DISP_REG_OVL_RDMA_CTRL(idx));
 }
 
 static unsigned int ovl_fmt_convert(struct mtk_disp_ovl *ovl, unsigned int fmt)
@@ -249,7 +248,8 @@ static unsigned int ovl_fmt_convert(struct mtk_disp_ovl *ovl, unsigned int fmt)
 }
 
 static void mtk_ovl_layer_config(struct mtk_ddp_comp *comp, unsigned int idx,
-				 struct mtk_plane_state *state)
+				 struct mtk_plane_state *state,
+				 struct cmdq_pkt *cmdq_pkt)
 {
 	struct mtk_disp_ovl *ovl = comp_to_ovl(comp);
 	struct mtk_plane_pending_state *pending = &state->pending;
@@ -260,11 +260,13 @@ static void mtk_ovl_layer_config(struct mtk_ddp_comp *comp, unsigned int idx,
 	unsigned int src_size = (pending->height << 16) | pending->width;
 	unsigned int con;
 
-	if (!pending->enable)
-		mtk_ovl_layer_off(comp, idx);
+	if (!pending->enable) {
+		mtk_ovl_layer_off(comp, idx, cmdq_pkt);
+		return;
+	}
 
 	con = ovl_fmt_convert(ovl, fmt);
-	if (idx != 0)
+	if (state->base.fb->format->has_alpha)
 		con |= OVL_CON_AEN | OVL_CON_ALPHA;
 
 	if (pending->rotation & DRM_MODE_REFLECT_Y) {
@@ -277,14 +279,18 @@ static void mtk_ovl_layer_config(struct mtk_ddp_comp *comp, unsigned int idx,
 		addr += pending->pitch - 1;
 	}
 
-	writel_relaxed(con, comp->regs + DISP_REG_OVL_CON(idx));
-	writel_relaxed(pitch, comp->regs + DISP_REG_OVL_PITCH(idx));
-	writel_relaxed(src_size, comp->regs + DISP_REG_OVL_SRC_SIZE(idx));
-	writel_relaxed(offset, comp->regs + DISP_REG_OVL_OFFSET(idx));
-	writel_relaxed(addr, comp->regs + DISP_REG_OVL_ADDR(ovl, idx));
-
-	if (pending->enable)
-		mtk_ovl_layer_on(comp, idx);
+	mtk_ddp_write_relaxed(cmdq_pkt, con, comp,
+			      DISP_REG_OVL_CON(idx));
+	mtk_ddp_write_relaxed(cmdq_pkt, pitch, comp,
+			      DISP_REG_OVL_PITCH(idx));
+	mtk_ddp_write_relaxed(cmdq_pkt, src_size, comp,
+			      DISP_REG_OVL_SRC_SIZE(idx));
+	mtk_ddp_write_relaxed(cmdq_pkt, offset, comp,
+			      DISP_REG_OVL_OFFSET(idx));
+	mtk_ddp_write_relaxed(cmdq_pkt, addr, comp,
+			      DISP_REG_OVL_ADDR(ovl, idx));
+
+	mtk_ovl_layer_on(comp, idx, cmdq_pkt);
 }
 
 static void mtk_ovl_bgclr_in_on(struct mtk_ddp_comp *comp)
@@ -313,8 +319,6 @@ static const struct mtk_ddp_comp_funcs mtk_disp_ovl_funcs = {
 	.disable_vblank = mtk_ovl_disable_vblank,
 	.supported_rotations = mtk_ovl_supported_rotations,
 	.layer_nr = mtk_ovl_layer_nr,
-	.layer_on = mtk_ovl_layer_on,
-	.layer_off = mtk_ovl_layer_off,
 	.layer_check = mtk_ovl_layer_check,
 	.layer_config = mtk_ovl_layer_config,
 	.bgclr_in_on = mtk_ovl_bgclr_in_on,
diff --git a/drivers/gpu/drm/mediatek/mtk_disp_rdma.c b/drivers/gpu/drm/mediatek/mtk_disp_rdma.c
index 405afef31407..0cb848d64206 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_rdma.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_rdma.c
@@ -9,6 +9,7 @@
 #include <linux/of_device.h>
 #include <linux/of_irq.h>
 #include <linux/platform_device.h>
+#include <linux/soc/mediatek/mtk-cmdq.h>
 
 #include "mtk_drm_crtc.h"
 #include "mtk_drm_ddp_comp.h"
@@ -125,14 +126,16 @@ static void mtk_rdma_stop(struct mtk_ddp_comp *comp)
 
 static void mtk_rdma_config(struct mtk_ddp_comp *comp, unsigned int width,
 			    unsigned int height, unsigned int vrefresh,
-			    unsigned int bpc)
+			    unsigned int bpc, struct cmdq_pkt *cmdq_pkt)
 {
 	unsigned int threshold;
 	unsigned int reg;
 	struct mtk_disp_rdma *rdma = comp_to_rdma(comp);
 
-	rdma_update_bits(comp, DISP_REG_RDMA_SIZE_CON_0, 0xfff, width);
-	rdma_update_bits(comp, DISP_REG_RDMA_SIZE_CON_1, 0xfffff, height);
+	mtk_ddp_write_mask(cmdq_pkt, width, comp,
+			   DISP_REG_RDMA_SIZE_CON_0, 0xfff);
+	mtk_ddp_write_mask(cmdq_pkt, height, comp,
+			   DISP_REG_RDMA_SIZE_CON_1, 0xfffff);
 
 	/*
 	 * Enable FIFO underflow since DSI and DPI can't be blocked.
@@ -144,7 +147,7 @@ static void mtk_rdma_config(struct mtk_ddp_comp *comp, unsigned int width,
 	reg = RDMA_FIFO_UNDERFLOW_EN |
 	      RDMA_FIFO_PSEUDO_SIZE(RDMA_FIFO_SIZE(rdma)) |
 	      RDMA_OUTPUT_VALID_FIFO_THRESHOLD(threshold);
-	writel(reg, comp->regs + DISP_REG_RDMA_FIFO_CON);
+	mtk_ddp_write(cmdq_pkt, reg, comp, DISP_REG_RDMA_FIFO_CON);
 }
 
 static unsigned int rdma_fmt_convert(struct mtk_disp_rdma *rdma,
@@ -190,7 +193,8 @@ static unsigned int mtk_rdma_layer_nr(struct mtk_ddp_comp *comp)
 }
 
 static void mtk_rdma_layer_config(struct mtk_ddp_comp *comp, unsigned int idx,
-				  struct mtk_plane_state *state)
+				  struct mtk_plane_state *state,
+				  struct cmdq_pkt *cmdq_pkt)
 {
 	struct mtk_disp_rdma *rdma = comp_to_rdma(comp);
 	struct mtk_plane_pending_state *pending = &state->pending;
@@ -200,24 +204,27 @@ static void mtk_rdma_layer_config(struct mtk_ddp_comp *comp, unsigned int idx,
 	unsigned int con;
 
 	con = rdma_fmt_convert(rdma, fmt);
-	writel_relaxed(con, comp->regs + DISP_RDMA_MEM_CON);
+	mtk_ddp_write_relaxed(cmdq_pkt, con, comp, DISP_RDMA_MEM_CON);
 
 	if (fmt == DRM_FORMAT_UYVY || fmt == DRM_FORMAT_YUYV) {
-		rdma_update_bits(comp, DISP_REG_RDMA_SIZE_CON_0,
-				 RDMA_MATRIX_ENABLE, RDMA_MATRIX_ENABLE);
-		rdma_update_bits(comp, DISP_REG_RDMA_SIZE_CON_0,
-				 RDMA_MATRIX_INT_MTX_SEL,
-				 RDMA_MATRIX_INT_MTX_BT601_to_RGB);
+		mtk_ddp_write_mask(cmdq_pkt, RDMA_MATRIX_ENABLE, comp,
+				   DISP_REG_RDMA_SIZE_CON_0,
+				   RDMA_MATRIX_ENABLE);
+		mtk_ddp_write_mask(cmdq_pkt, RDMA_MATRIX_INT_MTX_BT601_to_RGB,
+				   comp, DISP_REG_RDMA_SIZE_CON_0,
+				   RDMA_MATRIX_INT_MTX_SEL);
 	} else {
-		rdma_update_bits(comp, DISP_REG_RDMA_SIZE_CON_0,
-				 RDMA_MATRIX_ENABLE, 0);
+		mtk_ddp_write_mask(cmdq_pkt, 0, comp,
+				   DISP_REG_RDMA_SIZE_CON_0,
+				   RDMA_MATRIX_ENABLE);
 	}
+	mtk_ddp_write_relaxed(cmdq_pkt, addr, comp, DISP_RDMA_MEM_START_ADDR);
+	mtk_ddp_write_relaxed(cmdq_pkt, pitch, comp, DISP_RDMA_MEM_SRC_PITCH);
+	mtk_ddp_write(cmdq_pkt, RDMA_MEM_GMC, comp,
+		      DISP_RDMA_MEM_GMC_SETTING_0);
+	mtk_ddp_write_mask(cmdq_pkt, RDMA_MODE_MEMORY, comp,
+			   DISP_REG_RDMA_GLOBAL_CON, RDMA_MODE_MEMORY);
 
-	writel_relaxed(addr, comp->regs + DISP_RDMA_MEM_START_ADDR);
-	writel_relaxed(pitch, comp->regs + DISP_RDMA_MEM_SRC_PITCH);
-	writel(RDMA_MEM_GMC, comp->regs + DISP_RDMA_MEM_GMC_SETTING_0);
-	rdma_update_bits(comp, DISP_REG_RDMA_GLOBAL_CON,
-			 RDMA_MODE_MEMORY, RDMA_MODE_MEMORY);
 }
 
 static const struct mtk_ddp_comp_funcs mtk_disp_rdma_funcs = {
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
index f80a8ba75977..0dfcd1787e65 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
@@ -5,6 +5,7 @@
 
 #include <linux/clk.h>
 #include <linux/pm_runtime.h>
+#include <linux/soc/mediatek/mtk-cmdq.h>
 
 #include <asm/barrier.h>
 #include <soc/mediatek/smi.h>
@@ -42,11 +43,20 @@ struct mtk_drm_crtc {
 	struct drm_plane		*planes;
 	unsigned int			layer_nr;
 	bool				pending_planes;
+	bool				pending_async_planes;
+
+#if IS_REACHABLE(CONFIG_MTK_CMDQ)
+	struct cmdq_client		*cmdq_client;
+	u32				cmdq_event;
+#endif
 
 	void __iomem			*config_regs;
 	struct mtk_disp_mutex		*mutex;
 	unsigned int			ddp_comp_nr;
 	struct mtk_ddp_comp		**ddp_comp;
+
+	/* lock for display hardware access */
+	struct mutex			hw_lock;
 };
 
 struct mtk_crtc_state {
@@ -215,11 +225,12 @@ struct mtk_ddp_comp *mtk_drm_ddp_comp_for_plane(struct drm_crtc *crtc,
 	struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc);
 	struct mtk_ddp_comp *comp;
 	int i, count = 0;
+	unsigned int local_index = plane - mtk_crtc->planes;
 
 	for (i = 0; i < mtk_crtc->ddp_comp_nr; i++) {
 		comp = mtk_crtc->ddp_comp[i];
-		if (plane->index < (count + mtk_ddp_comp_layer_nr(comp))) {
-			*local_layer = plane->index - count;
+		if (local_index < (count + mtk_ddp_comp_layer_nr(comp))) {
+			*local_layer = local_index - count;
 			return comp;
 		}
 		count += mtk_ddp_comp_layer_nr(comp);
@@ -229,6 +240,13 @@ struct mtk_ddp_comp *mtk_drm_ddp_comp_for_plane(struct drm_crtc *crtc,
 	return NULL;
 }
 
+#if IS_REACHABLE(CONFIG_MTK_CMDQ)
+static void ddp_cmdq_cb(struct cmdq_cb_data data)
+{
+	cmdq_pkt_destroy(data.data);
+}
+#endif
+
 static int mtk_crtc_ddp_hw_init(struct mtk_drm_crtc *mtk_crtc)
 {
 	struct drm_crtc *crtc = &mtk_crtc->base;
@@ -297,7 +315,7 @@ static int mtk_crtc_ddp_hw_init(struct mtk_drm_crtc *mtk_crtc)
 		if (i == 1)
 			mtk_ddp_comp_bgclr_in_on(comp);
 
-		mtk_ddp_comp_config(comp, width, height, vrefresh, bpc);
+		mtk_ddp_comp_config(comp, width, height, vrefresh, bpc, NULL);
 		mtk_ddp_comp_start(comp);
 	}
 
@@ -310,7 +328,9 @@ static int mtk_crtc_ddp_hw_init(struct mtk_drm_crtc *mtk_crtc)
 
 		plane_state = to_mtk_plane_state(plane->state);
 		comp = mtk_drm_ddp_comp_for_plane(crtc, plane, &local_layer);
-		mtk_ddp_comp_layer_config(comp, local_layer, plane_state);
+		if (comp)
+			mtk_ddp_comp_layer_config(comp, local_layer,
+						  plane_state, NULL);
 	}
 
 	return 0;
@@ -325,6 +345,7 @@ err_pm_runtime_put:
 static void mtk_crtc_ddp_hw_fini(struct mtk_drm_crtc *mtk_crtc)
 {
 	struct drm_device *drm = mtk_crtc->base.dev;
+	struct drm_crtc *crtc = &mtk_crtc->base;
 	int i;
 
 	DRM_DEBUG_DRIVER("%s\n", __func__);
@@ -350,9 +371,17 @@ static void mtk_crtc_ddp_hw_fini(struct mtk_drm_crtc *mtk_crtc)
 	mtk_disp_mutex_unprepare(mtk_crtc->mutex);
 
 	pm_runtime_put(drm->dev);
+
+	if (crtc->state->event && !crtc->state->active) {
+		spin_lock_irq(&crtc->dev->event_lock);
+		drm_crtc_send_vblank_event(crtc, crtc->state->event);
+		crtc->state->event = NULL;
+		spin_unlock_irq(&crtc->dev->event_lock);
+	}
 }
 
-static void mtk_crtc_ddp_config(struct drm_crtc *crtc)
+static void mtk_crtc_ddp_config(struct drm_crtc *crtc,
+				struct cmdq_pkt *cmdq_handle)
 {
 	struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc);
 	struct mtk_crtc_state *state = to_mtk_crtc_state(mtk_crtc->base.state);
@@ -368,7 +397,8 @@ static void mtk_crtc_ddp_config(struct drm_crtc *crtc)
 	if (state->pending_config) {
 		mtk_ddp_comp_config(comp, state->pending_width,
 				    state->pending_height,
-				    state->pending_vrefresh, 0);
+				    state->pending_vrefresh, 0,
+				    cmdq_handle);
 
 		state->pending_config = false;
 	}
@@ -386,12 +416,84 @@ static void mtk_crtc_ddp_config(struct drm_crtc *crtc)
 			comp = mtk_drm_ddp_comp_for_plane(crtc, plane,
 							  &local_layer);
 
-			mtk_ddp_comp_layer_config(comp, local_layer,
-						  plane_state);
+			if (comp)
+				mtk_ddp_comp_layer_config(comp, local_layer,
+							  plane_state,
+							  cmdq_handle);
 			plane_state->pending.config = false;
 		}
 		mtk_crtc->pending_planes = false;
 	}
+
+	if (mtk_crtc->pending_async_planes) {
+		for (i = 0; i < mtk_crtc->layer_nr; i++) {
+			struct drm_plane *plane = &mtk_crtc->planes[i];
+			struct mtk_plane_state *plane_state;
+
+			plane_state = to_mtk_plane_state(plane->state);
+
+			if (!plane_state->pending.async_config)
+				continue;
+
+			comp = mtk_drm_ddp_comp_for_plane(crtc, plane,
+							  &local_layer);
+
+			if (comp)
+				mtk_ddp_comp_layer_config(comp, local_layer,
+							  plane_state,
+							  cmdq_handle);
+			plane_state->pending.async_config = false;
+		}
+		mtk_crtc->pending_async_planes = false;
+	}
+}
+
+static void mtk_drm_crtc_hw_config(struct mtk_drm_crtc *mtk_crtc)
+{
+#if IS_REACHABLE(CONFIG_MTK_CMDQ)
+	struct cmdq_pkt *cmdq_handle;
+#endif
+	struct drm_crtc *crtc = &mtk_crtc->base;
+	struct mtk_drm_private *priv = crtc->dev->dev_private;
+	unsigned int pending_planes = 0, pending_async_planes = 0;
+	int i;
+
+	mutex_lock(&mtk_crtc->hw_lock);
+	for (i = 0; i < mtk_crtc->layer_nr; i++) {
+		struct drm_plane *plane = &mtk_crtc->planes[i];
+		struct mtk_plane_state *plane_state;
+
+		plane_state = to_mtk_plane_state(plane->state);
+		if (plane_state->pending.dirty) {
+			plane_state->pending.config = true;
+			plane_state->pending.dirty = false;
+			pending_planes |= BIT(i);
+		} else if (plane_state->pending.async_dirty) {
+			plane_state->pending.async_config = true;
+			plane_state->pending.async_dirty = false;
+			pending_async_planes |= BIT(i);
+		}
+	}
+	if (pending_planes)
+		mtk_crtc->pending_planes = true;
+	if (pending_async_planes)
+		mtk_crtc->pending_async_planes = true;
+
+	if (priv->data->shadow_register) {
+		mtk_disp_mutex_acquire(mtk_crtc->mutex);
+		mtk_crtc_ddp_config(crtc, NULL);
+		mtk_disp_mutex_release(mtk_crtc->mutex);
+	}
+#if IS_REACHABLE(CONFIG_MTK_CMDQ)
+	if (mtk_crtc->cmdq_client) {
+		cmdq_handle = cmdq_pkt_create(mtk_crtc->cmdq_client, PAGE_SIZE);
+		cmdq_pkt_clear_event(cmdq_handle, mtk_crtc->cmdq_event);
+		cmdq_pkt_wfe(cmdq_handle, mtk_crtc->cmdq_event);
+		mtk_crtc_ddp_config(crtc, cmdq_handle);
+		cmdq_pkt_flush_async(cmdq_handle, ddp_cmdq_cb, cmdq_handle);
+	}
+#endif
+	mutex_unlock(&mtk_crtc->hw_lock);
 }
 
 int mtk_drm_crtc_plane_check(struct drm_crtc *crtc, struct drm_plane *plane,
@@ -401,7 +503,23 @@ int mtk_drm_crtc_plane_check(struct drm_crtc *crtc, struct drm_plane *plane,
 	struct mtk_ddp_comp *comp;
 
 	comp = mtk_drm_ddp_comp_for_plane(crtc, plane, &local_layer);
-	return mtk_ddp_comp_layer_check(comp, local_layer, state);
+	if (comp)
+		return mtk_ddp_comp_layer_check(comp, local_layer, state);
+	return 0;
+}
+
+void mtk_drm_crtc_async_update(struct drm_crtc *crtc, struct drm_plane *plane,
+			       struct drm_plane_state *new_state)
+{
+	struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc);
+	const struct drm_plane_helper_funcs *plane_helper_funcs =
+			plane->helper_private;
+
+	if (!mtk_crtc->enabled)
+		return;
+
+	plane_helper_funcs->atomic_update(plane, new_state);
+	mtk_drm_crtc_hw_config(mtk_crtc);
 }
 
 static void mtk_drm_crtc_atomic_enable(struct drm_crtc *crtc,
@@ -451,6 +569,7 @@ static void mtk_drm_crtc_atomic_disable(struct drm_crtc *crtc,
 	}
 	mtk_crtc->pending_planes = true;
 
+	mtk_drm_crtc_hw_config(mtk_crtc);
 	/* Wait for planes to be disabled */
 	drm_crtc_wait_one_vblank(crtc);
 
@@ -482,34 +601,16 @@ static void mtk_drm_crtc_atomic_flush(struct drm_crtc *crtc,
 				      struct drm_crtc_state *old_crtc_state)
 {
 	struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc);
-	struct mtk_drm_private *priv = crtc->dev->dev_private;
-	unsigned int pending_planes = 0;
 	int i;
 
 	if (mtk_crtc->event)
 		mtk_crtc->pending_needs_vblank = true;
-	for (i = 0; i < mtk_crtc->layer_nr; i++) {
-		struct drm_plane *plane = &mtk_crtc->planes[i];
-		struct mtk_plane_state *plane_state;
-
-		plane_state = to_mtk_plane_state(plane->state);
-		if (plane_state->pending.dirty) {
-			plane_state->pending.config = true;
-			plane_state->pending.dirty = false;
-			pending_planes |= BIT(i);
-		}
-	}
-	if (pending_planes)
-		mtk_crtc->pending_planes = true;
 	if (crtc->state->color_mgmt_changed)
-		for (i = 0; i < mtk_crtc->ddp_comp_nr; i++)
+		for (i = 0; i < mtk_crtc->ddp_comp_nr; i++) {
 			mtk_ddp_gamma_set(mtk_crtc->ddp_comp[i], crtc->state);
-
-	if (priv->data->shadow_register) {
-		mtk_disp_mutex_acquire(mtk_crtc->mutex);
-		mtk_crtc_ddp_config(crtc);
-		mtk_disp_mutex_release(mtk_crtc->mutex);
-	}
+			mtk_ddp_ctm_set(mtk_crtc->ddp_comp[i], crtc->state);
+		}
+	mtk_drm_crtc_hw_config(mtk_crtc);
 }
 
 static const struct drm_crtc_funcs mtk_crtc_funcs = {
@@ -559,8 +660,12 @@ void mtk_crtc_ddp_irq(struct drm_crtc *crtc, struct mtk_ddp_comp *comp)
 	struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc);
 	struct mtk_drm_private *priv = crtc->dev->dev_private;
 
+#if IS_REACHABLE(CONFIG_MTK_CMDQ)
+	if (!priv->data->shadow_register && !mtk_crtc->cmdq_client)
+#else
 	if (!priv->data->shadow_register)
-		mtk_crtc_ddp_config(crtc);
+#endif
+		mtk_crtc_ddp_config(crtc, NULL);
 
 	mtk_drm_finish_page_flip(mtk_crtc);
 }
@@ -627,6 +732,8 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev,
 	int pipe = priv->num_pipes;
 	int ret;
 	int i;
+	bool has_ctm = false;
+	uint gamma_lut_size = 0;
 
 	if (!path)
 		return 0;
@@ -677,6 +784,14 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev,
 		}
 
 		mtk_crtc->ddp_comp[i] = comp;
+
+		if (comp->funcs) {
+			if (comp->funcs->gamma_set)
+				gamma_lut_size = MTK_LUT_SIZE;
+
+			if (comp->funcs->ctm_set)
+				has_ctm = true;
+		}
 	}
 
 	for (i = 0; i < mtk_crtc->ddp_comp_nr; i++)
@@ -697,9 +812,28 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev,
 				NULL, pipe);
 	if (ret < 0)
 		return ret;
-	drm_mode_crtc_set_gamma_size(&mtk_crtc->base, MTK_LUT_SIZE);
-	drm_crtc_enable_color_mgmt(&mtk_crtc->base, 0, false, MTK_LUT_SIZE);
-	priv->num_pipes++;
 
+	if (gamma_lut_size)
+		drm_mode_crtc_set_gamma_size(&mtk_crtc->base, gamma_lut_size);
+	drm_crtc_enable_color_mgmt(&mtk_crtc->base, 0, has_ctm, gamma_lut_size);
+	priv->num_pipes++;
+	mutex_init(&mtk_crtc->hw_lock);
+
+#if IS_REACHABLE(CONFIG_MTK_CMDQ)
+	mtk_crtc->cmdq_client =
+			cmdq_mbox_create(dev, drm_crtc_index(&mtk_crtc->base),
+					 2000);
+	if (IS_ERR(mtk_crtc->cmdq_client)) {
+		dev_dbg(dev, "mtk_crtc %d failed to create mailbox client, writing register by CPU now\n",
+			drm_crtc_index(&mtk_crtc->base));
+		mtk_crtc->cmdq_client = NULL;
+	}
+	ret = of_property_read_u32_index(dev->of_node, "mediatek,gce-events",
+					 drm_crtc_index(&mtk_crtc->base),
+					 &mtk_crtc->cmdq_event);
+	if (ret)
+		dev_dbg(dev, "mtk_crtc %d failed to get mediatek,gce-events property\n",
+			drm_crtc_index(&mtk_crtc->base));
+#endif
 	return 0;
 }
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.h b/drivers/gpu/drm/mediatek/mtk_drm_crtc.h
index 6afe1c19557a..a2b4677a451c 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.h
+++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.h
@@ -21,5 +21,7 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev,
 			unsigned int path_len);
 int mtk_drm_crtc_plane_check(struct drm_crtc *crtc, struct drm_plane *plane,
 			     struct mtk_plane_state *state);
+void mtk_drm_crtc_async_update(struct drm_crtc *crtc, struct drm_plane *plane,
+			       struct drm_plane_state *plane_state);
 
 #endif /* MTK_DRM_CRTC_H */
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c
index 7f21307cda75..1f5a112bb034 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c
@@ -12,7 +12,7 @@
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
-
+#include <linux/soc/mediatek/mtk-cmdq.h>
 #include "mtk_drm_drv.h"
 #include "mtk_drm_plane.h"
 #include "mtk_drm_ddp_comp.h"
@@ -37,7 +37,15 @@
 #define CCORR_EN				BIT(0)
 #define DISP_CCORR_CFG				0x0020
 #define CCORR_RELAY_MODE			BIT(0)
+#define CCORR_ENGINE_EN				BIT(1)
+#define CCORR_GAMMA_OFF				BIT(2)
+#define CCORR_WGAMUT_SRC_CLIP			BIT(3)
 #define DISP_CCORR_SIZE				0x0030
+#define DISP_CCORR_COEF_0			0x0080
+#define DISP_CCORR_COEF_1			0x0084
+#define DISP_CCORR_COEF_2			0x0088
+#define DISP_CCORR_COEF_3			0x008C
+#define DISP_CCORR_COEF_4			0x0090
 
 #define DISP_DITHER_EN				0x0000
 #define DITHER_EN				BIT(0)
@@ -76,36 +84,84 @@
 #define DITHER_ADD_LSHIFT_G(x)			(((x) & 0x7) << 4)
 #define DITHER_ADD_RSHIFT_G(x)			(((x) & 0x7) << 0)
 
+void mtk_ddp_write(struct cmdq_pkt *cmdq_pkt, unsigned int value,
+		   struct mtk_ddp_comp *comp, unsigned int offset)
+{
+#if IS_REACHABLE(CONFIG_MTK_CMDQ)
+	if (cmdq_pkt)
+		cmdq_pkt_write(cmdq_pkt, comp->subsys,
+			       comp->regs_pa + offset, value);
+	else
+#endif
+		writel(value, comp->regs + offset);
+}
+
+void mtk_ddp_write_relaxed(struct cmdq_pkt *cmdq_pkt, unsigned int value,
+			   struct mtk_ddp_comp *comp,
+			   unsigned int offset)
+{
+#if IS_REACHABLE(CONFIG_MTK_CMDQ)
+	if (cmdq_pkt)
+		cmdq_pkt_write(cmdq_pkt, comp->subsys,
+			       comp->regs_pa + offset, value);
+	else
+#endif
+		writel_relaxed(value, comp->regs + offset);
+}
+
+void mtk_ddp_write_mask(struct cmdq_pkt *cmdq_pkt,
+			unsigned int value,
+			struct mtk_ddp_comp *comp,
+			unsigned int offset,
+			unsigned int mask)
+{
+#if IS_REACHABLE(CONFIG_MTK_CMDQ)
+	if (cmdq_pkt) {
+		cmdq_pkt_write_mask(cmdq_pkt, comp->subsys,
+				    comp->regs_pa + offset, value, mask);
+	} else {
+#endif
+		u32 tmp = readl(comp->regs + offset);
+
+		tmp = (tmp & ~mask) | (value & mask);
+		writel(tmp, comp->regs + offset);
+#if IS_REACHABLE(CONFIG_MTK_CMDQ)
+	}
+#endif
+}
+
 void mtk_dither_set(struct mtk_ddp_comp *comp, unsigned int bpc,
-		    unsigned int CFG)
+		    unsigned int CFG, struct cmdq_pkt *cmdq_pkt)
 {
 	/* If bpc equal to 0, the dithering function didn't be enabled */
 	if (bpc == 0)
 		return;
 
 	if (bpc >= MTK_MIN_BPC) {
-		writel(0, comp->regs + DISP_DITHER_5);
-		writel(0, comp->regs + DISP_DITHER_7);
-		writel(DITHER_LSB_ERR_SHIFT_R(MTK_MAX_BPC - bpc) |
-		       DITHER_ADD_LSHIFT_R(MTK_MAX_BPC - bpc) |
-		       DITHER_NEW_BIT_MODE,
-		       comp->regs + DISP_DITHER_15);
-		writel(DITHER_LSB_ERR_SHIFT_B(MTK_MAX_BPC - bpc) |
-		       DITHER_ADD_LSHIFT_B(MTK_MAX_BPC - bpc) |
-		       DITHER_LSB_ERR_SHIFT_G(MTK_MAX_BPC - bpc) |
-		       DITHER_ADD_LSHIFT_G(MTK_MAX_BPC - bpc),
-		       comp->regs + DISP_DITHER_16);
-		writel(DISP_DITHERING, comp->regs + CFG);
+		mtk_ddp_write(cmdq_pkt, 0, comp, DISP_DITHER_5);
+		mtk_ddp_write(cmdq_pkt, 0, comp, DISP_DITHER_7);
+		mtk_ddp_write(cmdq_pkt,
+			      DITHER_LSB_ERR_SHIFT_R(MTK_MAX_BPC - bpc) |
+			      DITHER_ADD_LSHIFT_R(MTK_MAX_BPC - bpc) |
+			      DITHER_NEW_BIT_MODE,
+			      comp, DISP_DITHER_15);
+		mtk_ddp_write(cmdq_pkt,
+			      DITHER_LSB_ERR_SHIFT_B(MTK_MAX_BPC - bpc) |
+			      DITHER_ADD_LSHIFT_B(MTK_MAX_BPC - bpc) |
+			      DITHER_LSB_ERR_SHIFT_G(MTK_MAX_BPC - bpc) |
+			      DITHER_ADD_LSHIFT_G(MTK_MAX_BPC - bpc),
+			      comp, DISP_DITHER_16);
+		mtk_ddp_write(cmdq_pkt, DISP_DITHERING, comp, CFG);
 	}
 }
 
 static void mtk_od_config(struct mtk_ddp_comp *comp, unsigned int w,
 			  unsigned int h, unsigned int vrefresh,
-			  unsigned int bpc)
+			  unsigned int bpc, struct cmdq_pkt *cmdq_pkt)
 {
-	writel(w << 16 | h, comp->regs + DISP_OD_SIZE);
-	writel(OD_RELAYMODE, comp->regs + DISP_OD_CFG);
-	mtk_dither_set(comp, bpc, DISP_OD_CFG);
+	mtk_ddp_write(cmdq_pkt, w << 16 | h, comp, DISP_OD_SIZE);
+	mtk_ddp_write(cmdq_pkt, OD_RELAYMODE, comp, DISP_OD_CFG);
+	mtk_dither_set(comp, bpc, DISP_OD_CFG, cmdq_pkt);
 }
 
 static void mtk_od_start(struct mtk_ddp_comp *comp)
@@ -120,9 +176,9 @@ static void mtk_ufoe_start(struct mtk_ddp_comp *comp)
 
 static void mtk_aal_config(struct mtk_ddp_comp *comp, unsigned int w,
 			   unsigned int h, unsigned int vrefresh,
-			   unsigned int bpc)
+			   unsigned int bpc, struct cmdq_pkt *cmdq_pkt)
 {
-	writel(h << 16 | w, comp->regs + DISP_AAL_SIZE);
+	mtk_ddp_write(cmdq_pkt, h << 16 | w, comp, DISP_AAL_SIZE);
 }
 
 static void mtk_aal_start(struct mtk_ddp_comp *comp)
@@ -137,10 +193,10 @@ static void mtk_aal_stop(struct mtk_ddp_comp *comp)
 
 static void mtk_ccorr_config(struct mtk_ddp_comp *comp, unsigned int w,
 			     unsigned int h, unsigned int vrefresh,
-			     unsigned int bpc)
+			     unsigned int bpc, struct cmdq_pkt *cmdq_pkt)
 {
-	writel(h << 16 | w, comp->regs + DISP_CCORR_SIZE);
-	writel(CCORR_RELAY_MODE, comp->regs + DISP_CCORR_CFG);
+	mtk_ddp_write(cmdq_pkt, h << 16 | w, comp, DISP_CCORR_SIZE);
+	mtk_ddp_write(cmdq_pkt, CCORR_ENGINE_EN, comp, DISP_CCORR_CFG);
 }
 
 static void mtk_ccorr_start(struct mtk_ddp_comp *comp)
@@ -153,12 +209,63 @@ static void mtk_ccorr_stop(struct mtk_ddp_comp *comp)
 	writel_relaxed(0x0, comp->regs + DISP_CCORR_EN);
 }
 
+/* Converts a DRM S31.32 value to the HW S1.10 format. */
+static u16 mtk_ctm_s31_32_to_s1_10(u64 in)
+{
+	u16 r;
+
+	/* Sign bit. */
+	r = in & BIT_ULL(63) ? BIT(11) : 0;
+
+	if ((in & GENMASK_ULL(62, 33)) > 0) {
+		/* identity value 0x100000000 -> 0x400, */
+		/* if bigger this, set it to max 0x7ff. */
+		r |= GENMASK(10, 0);
+	} else {
+		/* take the 11 most important bits. */
+		r |= (in >> 22) & GENMASK(10, 0);
+	}
+
+	return r;
+}
+
+static void mtk_ccorr_ctm_set(struct mtk_ddp_comp *comp,
+			      struct drm_crtc_state *state)
+{
+	struct drm_property_blob *blob = state->ctm;
+	struct drm_color_ctm *ctm;
+	const u64 *input;
+	uint16_t coeffs[9] = { 0 };
+	int i;
+	struct cmdq_pkt *cmdq_pkt = NULL;
+
+	if (!blob)
+		return;
+
+	ctm = (struct drm_color_ctm *)blob->data;
+	input = ctm->matrix;
+
+	for (i = 0; i < ARRAY_SIZE(coeffs); i++)
+		coeffs[i] = mtk_ctm_s31_32_to_s1_10(input[i]);
+
+	mtk_ddp_write(cmdq_pkt, coeffs[0] << 16 | coeffs[1],
+		      comp, DISP_CCORR_COEF_0);
+	mtk_ddp_write(cmdq_pkt, coeffs[2] << 16 | coeffs[3],
+		      comp, DISP_CCORR_COEF_1);
+	mtk_ddp_write(cmdq_pkt, coeffs[4] << 16 | coeffs[5],
+		      comp, DISP_CCORR_COEF_2);
+	mtk_ddp_write(cmdq_pkt, coeffs[6] << 16 | coeffs[7],
+		      comp, DISP_CCORR_COEF_3);
+	mtk_ddp_write(cmdq_pkt, coeffs[8] << 16,
+		      comp, DISP_CCORR_COEF_4);
+}
+
 static void mtk_dither_config(struct mtk_ddp_comp *comp, unsigned int w,
 			      unsigned int h, unsigned int vrefresh,
-			      unsigned int bpc)
+			      unsigned int bpc, struct cmdq_pkt *cmdq_pkt)
 {
-	writel(h << 16 | w, comp->regs + DISP_DITHER_SIZE);
-	writel(DITHER_RELAY_MODE, comp->regs + DISP_DITHER_CFG);
+	mtk_ddp_write(cmdq_pkt, h << 16 | w, comp, DISP_DITHER_SIZE);
+	mtk_ddp_write(cmdq_pkt, DITHER_RELAY_MODE, comp, DISP_DITHER_CFG);
 }
 
 static void mtk_dither_start(struct mtk_ddp_comp *comp)
@@ -173,10 +280,10 @@ static void mtk_dither_stop(struct mtk_ddp_comp *comp)
 
 static void mtk_gamma_config(struct mtk_ddp_comp *comp, unsigned int w,
 			     unsigned int h, unsigned int vrefresh,
-			     unsigned int bpc)
+			     unsigned int bpc, struct cmdq_pkt *cmdq_pkt)
 {
-	writel(h << 16 | w, comp->regs + DISP_GAMMA_SIZE);
-	mtk_dither_set(comp, bpc, DISP_GAMMA_CFG);
+	mtk_ddp_write(cmdq_pkt, h << 16 | w, comp, DISP_GAMMA_SIZE);
+	mtk_dither_set(comp, bpc, DISP_GAMMA_CFG, cmdq_pkt);
 }
 
 static void mtk_gamma_start(struct mtk_ddp_comp *comp)
@@ -223,6 +330,7 @@ static const struct mtk_ddp_comp_funcs ddp_ccorr = {
 	.config = mtk_ccorr_config,
 	.start = mtk_ccorr_start,
 	.stop = mtk_ccorr_stop,
+	.ctm_set = mtk_ccorr_ctm_set,
 };
 
 static const struct mtk_ddp_comp_funcs ddp_dither = {
@@ -326,6 +434,11 @@ int mtk_ddp_comp_init(struct device *dev, struct device_node *node,
 	enum mtk_ddp_comp_type type;
 	struct device_node *larb_node;
 	struct platform_device *larb_pdev;
+#if IS_REACHABLE(CONFIG_MTK_CMDQ)
+	struct resource res;
+	struct cmdq_client_reg cmdq_reg;
+	int ret;
+#endif
 
 	if (comp_id < 0 || comp_id >= DDP_COMPONENT_ID_MAX)
 		return -EINVAL;
@@ -379,6 +492,19 @@ int mtk_ddp_comp_init(struct device *dev, struct device_node *node,
 
 	comp->larb_dev = &larb_pdev->dev;
 
+#if IS_REACHABLE(CONFIG_MTK_CMDQ)
+	if (of_address_to_resource(node, 0, &res) != 0) {
+		dev_err(dev, "Missing reg in %s node\n", node->full_name);
+		return -EINVAL;
+	}
+	comp->regs_pa = res.start;
+
+	ret = cmdq_dev_get_client_reg(dev, &cmdq_reg, 0);
+	if (ret)
+		dev_dbg(dev, "get mediatek,gce-client-reg fail!\n");
+	else
+		comp->subsys = cmdq_reg.subsys;
+#endif
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h
index 2f1e9e75b8da..debe36395fe7 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h
+++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h
@@ -69,27 +69,29 @@ enum mtk_ddp_comp_id {
 };
 
 struct mtk_ddp_comp;
-
+struct cmdq_pkt;
 struct mtk_ddp_comp_funcs {
 	void (*config)(struct mtk_ddp_comp *comp, unsigned int w,
-		       unsigned int h, unsigned int vrefresh, unsigned int bpc);
+		       unsigned int h, unsigned int vrefresh,
+		       unsigned int bpc, struct cmdq_pkt *cmdq_pkt);
 	void (*start)(struct mtk_ddp_comp *comp);
 	void (*stop)(struct mtk_ddp_comp *comp);
 	void (*enable_vblank)(struct mtk_ddp_comp *comp, struct drm_crtc *crtc);
 	void (*disable_vblank)(struct mtk_ddp_comp *comp);
 	unsigned int (*supported_rotations)(struct mtk_ddp_comp *comp);
 	unsigned int (*layer_nr)(struct mtk_ddp_comp *comp);
-	void (*layer_on)(struct mtk_ddp_comp *comp, unsigned int idx);
-	void (*layer_off)(struct mtk_ddp_comp *comp, unsigned int idx);
 	int (*layer_check)(struct mtk_ddp_comp *comp,
 			   unsigned int idx,
 			   struct mtk_plane_state *state);
 	void (*layer_config)(struct mtk_ddp_comp *comp, unsigned int idx,
-			     struct mtk_plane_state *state);
+			     struct mtk_plane_state *state,
+			     struct cmdq_pkt *cmdq_pkt);
 	void (*gamma_set)(struct mtk_ddp_comp *comp,
 			  struct drm_crtc_state *state);
 	void (*bgclr_in_on)(struct mtk_ddp_comp *comp);
 	void (*bgclr_in_off)(struct mtk_ddp_comp *comp);
+	void (*ctm_set)(struct mtk_ddp_comp *comp,
+			struct drm_crtc_state *state);
 };
 
 struct mtk_ddp_comp {
@@ -99,14 +101,17 @@ struct mtk_ddp_comp {
 	struct device *larb_dev;
 	enum mtk_ddp_comp_id id;
 	const struct mtk_ddp_comp_funcs *funcs;
+	resource_size_t regs_pa;
+	u8 subsys;
 };
 
 static inline void mtk_ddp_comp_config(struct mtk_ddp_comp *comp,
 				       unsigned int w, unsigned int h,
-				       unsigned int vrefresh, unsigned int bpc)
+				       unsigned int vrefresh, unsigned int bpc,
+				       struct cmdq_pkt *cmdq_pkt)
 {
 	if (comp->funcs && comp->funcs->config)
-		comp->funcs->config(comp, w, h, vrefresh, bpc);
+		comp->funcs->config(comp, w, h, vrefresh, bpc, cmdq_pkt);
 }
 
 static inline void mtk_ddp_comp_start(struct mtk_ddp_comp *comp)
@@ -151,20 +156,6 @@ static inline unsigned int mtk_ddp_comp_layer_nr(struct mtk_ddp_comp *comp)
 	return 0;
 }
 
-static inline void mtk_ddp_comp_layer_on(struct mtk_ddp_comp *comp,
-					 unsigned int idx)
-{
-	if (comp->funcs && comp->funcs->layer_on)
-		comp->funcs->layer_on(comp, idx);
-}
-
-static inline void mtk_ddp_comp_layer_off(struct mtk_ddp_comp *comp,
-					  unsigned int idx)
-{
-	if (comp->funcs && comp->funcs->layer_off)
-		comp->funcs->layer_off(comp, idx);
-}
-
 static inline int mtk_ddp_comp_layer_check(struct mtk_ddp_comp *comp,
 					   unsigned int idx,
 					   struct mtk_plane_state *state)
@@ -176,10 +167,11 @@ static inline int mtk_ddp_comp_layer_check(struct mtk_ddp_comp *comp,
 
 static inline void mtk_ddp_comp_layer_config(struct mtk_ddp_comp *comp,
 					     unsigned int idx,
-					     struct mtk_plane_state *state)
+					     struct mtk_plane_state *state,
+					     struct cmdq_pkt *cmdq_pkt)
 {
 	if (comp->funcs && comp->funcs->layer_config)
-		comp->funcs->layer_config(comp, idx, state);
+		comp->funcs->layer_config(comp, idx, state, cmdq_pkt);
 }
 
 static inline void mtk_ddp_gamma_set(struct mtk_ddp_comp *comp,
@@ -201,6 +193,13 @@ static inline void mtk_ddp_comp_bgclr_in_off(struct mtk_ddp_comp *comp)
 		comp->funcs->bgclr_in_off(comp);
 }
 
+static inline void mtk_ddp_ctm_set(struct mtk_ddp_comp *comp,
+				   struct drm_crtc_state *state)
+{
+	if (comp->funcs && comp->funcs->ctm_set)
+		comp->funcs->ctm_set(comp, state);
+}
+
 int mtk_ddp_comp_get_id(struct device_node *node,
 			enum mtk_ddp_comp_type comp_type);
 int mtk_ddp_comp_init(struct device *dev, struct device_node *comp_node,
@@ -209,6 +208,13 @@ int mtk_ddp_comp_init(struct device *dev, struct device_node *comp_node,
 int mtk_ddp_comp_register(struct drm_device *drm, struct mtk_ddp_comp *comp);
 void mtk_ddp_comp_unregister(struct drm_device *drm, struct mtk_ddp_comp *comp);
 void mtk_dither_set(struct mtk_ddp_comp *comp, unsigned int bpc,
-		    unsigned int CFG);
-
+		    unsigned int CFG, struct cmdq_pkt *cmdq_pkt);
+enum mtk_ddp_comp_type mtk_ddp_comp_get_type(enum mtk_ddp_comp_id comp_id);
+void mtk_ddp_write(struct cmdq_pkt *cmdq_pkt, unsigned int value,
+		   struct mtk_ddp_comp *comp, unsigned int offset);
+void mtk_ddp_write_relaxed(struct cmdq_pkt *cmdq_pkt, unsigned int value,
+			   struct mtk_ddp_comp *comp, unsigned int offset);
+void mtk_ddp_write_mask(struct cmdq_pkt *cmdq_pkt, unsigned int value,
+			struct mtk_ddp_comp *comp, unsigned int offset,
+			unsigned int mask);
 #endif /* MTK_DRM_DDP_COMP_H */
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
index 2b1c122066ea..0563c6813333 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
@@ -37,84 +37,9 @@
 #define DRIVER_MAJOR 1
 #define DRIVER_MINOR 0
 
-static void mtk_atomic_schedule(struct mtk_drm_private *private,
-				struct drm_atomic_state *state)
-{
-	private->commit.state = state;
-	schedule_work(&private->commit.work);
-}
-
-static void mtk_atomic_complete(struct mtk_drm_private *private,
-				struct drm_atomic_state *state)
-{
-	struct drm_device *drm = private->drm;
-
-	drm_atomic_helper_wait_for_fences(drm, state, false);
-
-	/*
-	 * Mediatek drm supports runtime PM, so plane registers cannot be
-	 * written when their crtc is disabled.
-	 *
-	 * The comment for drm_atomic_helper_commit states:
-	 *     For drivers supporting runtime PM the recommended sequence is
-	 *
-	 *     drm_atomic_helper_commit_modeset_disables(dev, state);
-	 *     drm_atomic_helper_commit_modeset_enables(dev, state);
-	 *     drm_atomic_helper_commit_planes(dev, state,
-	 *                                     DRM_PLANE_COMMIT_ACTIVE_ONLY);
-	 *
-	 * See the kerneldoc entries for these three functions for more details.
-	 */
-	drm_atomic_helper_commit_modeset_disables(drm, state);
-	drm_atomic_helper_commit_modeset_enables(drm, state);
-	drm_atomic_helper_commit_planes(drm, state,
-					DRM_PLANE_COMMIT_ACTIVE_ONLY);
-
-	drm_atomic_helper_wait_for_vblanks(drm, state);
-
-	drm_atomic_helper_cleanup_planes(drm, state);
-	drm_atomic_state_put(state);
-}
-
-static void mtk_atomic_work(struct work_struct *work)
-{
-	struct mtk_drm_private *private = container_of(work,
-			struct mtk_drm_private, commit.work);
-
-	mtk_atomic_complete(private, private->commit.state);
-}
-
-static int mtk_atomic_commit(struct drm_device *drm,
-			     struct drm_atomic_state *state,
-			     bool async)
-{
-	struct mtk_drm_private *private = drm->dev_private;
-	int ret;
-
-	ret = drm_atomic_helper_prepare_planes(drm, state);
-	if (ret)
-		return ret;
-
-	mutex_lock(&private->commit.lock);
-	flush_work(&private->commit.work);
-
-	ret = drm_atomic_helper_swap_state(state, true);
-	if (ret) {
-		mutex_unlock(&private->commit.lock);
-		drm_atomic_helper_cleanup_planes(drm, state);
-		return ret;
-	}
-
-	drm_atomic_state_get(state);
-	if (async)
-		mtk_atomic_schedule(private, state);
-	else
-		mtk_atomic_complete(private, state);
-
-	mutex_unlock(&private->commit.lock);
-
-	return 0;
-}
+static const struct drm_mode_config_helper_funcs mtk_drm_mode_config_helpers = {
+	.atomic_commit_tail = drm_atomic_helper_commit_tail_rpm,
+};
 
 static struct drm_framebuffer *
 mtk_drm_mode_fb_create(struct drm_device *dev,
@@ -132,7 +57,7 @@ mtk_drm_mode_fb_create(struct drm_device *dev,
 static const struct drm_mode_config_funcs mtk_drm_mode_config_funcs = {
 	.fb_create = mtk_drm_mode_fb_create,
 	.atomic_check = drm_atomic_helper_check,
-	.atomic_commit = mtk_atomic_commit,
+	.atomic_commit = drm_atomic_helper_commit,
 };
 
 static const enum mtk_ddp_comp_id mt2701_mtk_ddp_main[] = {
@@ -250,6 +175,7 @@ static int mtk_drm_kms_init(struct drm_device *drm)
 	drm->mode_config.max_width = 4096;
 	drm->mode_config.max_height = 4096;
 	drm->mode_config.funcs = &mtk_drm_mode_config_funcs;
+	drm->mode_config.helper_private = &mtk_drm_mode_config_helpers;
 
 	ret = component_bind_all(drm->dev, drm);
 	if (ret)
@@ -509,8 +435,6 @@ static int mtk_drm_probe(struct platform_device *pdev)
 	if (!private)
 		return -ENOMEM;
 
-	mutex_init(&private->commit.lock);
-	INIT_WORK(&private->commit.work, mtk_atomic_work);
 	private->data = of_device_get_match_data(dev);
 
 	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.h b/drivers/gpu/drm/mediatek/mtk_drm_drv.h
index e03fea12ff59..17bc99b9f5d4 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_drv.h
+++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.h
@@ -43,13 +43,6 @@ struct mtk_drm_private {
 	struct device_node *comp_node[DDP_COMPONENT_ID_MAX];
 	struct mtk_ddp_comp *ddp_comp[DDP_COMPONENT_ID_MAX];
 	const struct mtk_mmsys_driver_data *data;
-
-	struct {
-		struct drm_atomic_state *state;
-		struct work_struct work;
-		struct mutex lock;
-	} commit;
-
 	struct drm_atomic_state *suspend_state;
 
 	bool dma_parms_allocated;
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.c b/drivers/gpu/drm/mediatek/mtk_drm_plane.c
index f0b0325381e0..914cc7619cd7 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_plane.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.c
@@ -7,6 +7,7 @@
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_fourcc.h>
+#include <drm/drm_atomic_uapi.h>
 #include <drm/drm_plane_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 
@@ -75,6 +76,50 @@ static void mtk_drm_plane_destroy_state(struct drm_plane *plane,
 	kfree(to_mtk_plane_state(state));
 }
 
+static int mtk_plane_atomic_async_check(struct drm_plane *plane,
+					struct drm_plane_state *state)
+{
+	struct drm_crtc_state *crtc_state;
+
+	if (plane != state->crtc->cursor)
+		return -EINVAL;
+
+	if (!plane->state)
+		return -EINVAL;
+
+	if (!plane->state->fb)
+		return -EINVAL;
+
+	if (state->state)
+		crtc_state = drm_atomic_get_existing_crtc_state(state->state,
+								state->crtc);
+	else /* Special case for asynchronous cursor updates. */
+		crtc_state = state->crtc->state;
+
+	return drm_atomic_helper_check_plane_state(plane->state, crtc_state,
+						   DRM_PLANE_HELPER_NO_SCALING,
+						   DRM_PLANE_HELPER_NO_SCALING,
+						   true, true);
+}
+
+static void mtk_plane_atomic_async_update(struct drm_plane *plane,
+					  struct drm_plane_state *new_state)
+{
+	struct mtk_plane_state *state = to_mtk_plane_state(plane->state);
+
+	plane->state->crtc_x = new_state->crtc_x;
+	plane->state->crtc_y = new_state->crtc_y;
+	plane->state->crtc_h = new_state->crtc_h;
+	plane->state->crtc_w = new_state->crtc_w;
+	plane->state->src_x = new_state->src_x;
+	plane->state->src_y = new_state->src_y;
+	plane->state->src_h = new_state->src_h;
+	plane->state->src_w = new_state->src_w;
+	state->pending.async_dirty = true;
+
+	mtk_drm_crtc_async_update(new_state->crtc, plane, new_state);
+}
+
 static const struct drm_plane_funcs mtk_plane_funcs = {
 	.update_plane = drm_atomic_helper_update_plane,
 	.disable_plane = drm_atomic_helper_disable_plane,
@@ -163,6 +208,8 @@ static const struct drm_plane_helper_funcs mtk_plane_helper_funcs = {
 	.atomic_check = mtk_plane_atomic_check,
 	.atomic_update = mtk_plane_atomic_update,
 	.atomic_disable = mtk_plane_atomic_disable,
+	.atomic_async_update = mtk_plane_atomic_async_update,
+	.atomic_async_check = mtk_plane_atomic_async_check,
 };
 
 int mtk_plane_init(struct drm_device *dev, struct drm_plane *plane,
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.h b/drivers/gpu/drm/mediatek/mtk_drm_plane.h
index 760885e35b27..d454bece9535 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_plane.h
+++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.h
@@ -22,6 +22,8 @@ struct mtk_plane_pending_state {
 	unsigned int			height;
 	unsigned int			rotation;
 	bool				dirty;
+	bool				async_dirty;
+	bool				async_config;
 };
 
 struct mtk_plane_state {
diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c
index 3b5e016d16c4..5fa1073cf26b 100644
--- a/drivers/gpu/drm/mediatek/mtk_dsi.c
+++ b/drivers/gpu/drm/mediatek/mtk_dsi.c
@@ -230,28 +230,25 @@ static void mtk_dsi_mask(struct mtk_dsi *dsi, u32 offset, u32 mask, u32 data)
 static void mtk_dsi_phy_timconfig(struct mtk_dsi *dsi)
 {
 	u32 timcon0, timcon1, timcon2, timcon3;
-	u32 ui, cycle_time;
+	u32 data_rate_mhz = DIV_ROUND_UP(dsi->data_rate, 1000000);
 	struct mtk_phy_timing *timing = &dsi->phy_timing;
 
-	ui = DIV_ROUND_UP(1000000000, dsi->data_rate);
-	cycle_time = div_u64(8000000000ULL, dsi->data_rate);
+	timing->lpx = (60 * data_rate_mhz / (8 * 1000)) + 1;
+	timing->da_hs_prepare = (80 * data_rate_mhz + 4 * 1000) / 8000;
+	timing->da_hs_zero = (170 * data_rate_mhz + 10 * 1000) / 8000 + 1 -
+			     timing->da_hs_prepare;
+	timing->da_hs_trail = timing->da_hs_prepare + 1;
 
-	timing->lpx = NS_TO_CYCLE(60, cycle_time);
-	timing->da_hs_prepare = NS_TO_CYCLE(50 + 5 * ui, cycle_time);
-	timing->da_hs_zero = NS_TO_CYCLE(110 + 6 * ui, cycle_time);
-	timing->da_hs_trail = NS_TO_CYCLE(77 + 4 * ui, cycle_time);
+	timing->ta_go = 4 * timing->lpx - 2;
+	timing->ta_sure = timing->lpx + 2;
+	timing->ta_get = 4 * timing->lpx;
+	timing->da_hs_exit = 2 * timing->lpx + 1;
 
-	timing->ta_go = 4 * timing->lpx;
-	timing->ta_sure = 3 * timing->lpx / 2;
-	timing->ta_get = 5 * timing->lpx;
-	timing->da_hs_exit = 2 * timing->lpx;
-
-	timing->clk_hs_zero = NS_TO_CYCLE(336, cycle_time);
-	timing->clk_hs_trail = NS_TO_CYCLE(100, cycle_time) + 10;
-
-	timing->clk_hs_prepare = NS_TO_CYCLE(64, cycle_time);
-	timing->clk_hs_post = NS_TO_CYCLE(80 + 52 * ui, cycle_time);
-	timing->clk_hs_exit = 2 * timing->lpx;
+	timing->clk_hs_prepare = 70 * data_rate_mhz / (8 * 1000);
+	timing->clk_hs_post = timing->clk_hs_prepare + 8;
+	timing->clk_hs_trail = timing->clk_hs_prepare;
+	timing->clk_hs_zero = timing->clk_hs_trail * 4;
+	timing->clk_hs_exit = 2 * timing->clk_hs_trail;
 
 	timcon0 = timing->lpx | timing->da_hs_prepare << 8 |
 		  timing->da_hs_zero << 16 | timing->da_hs_trail << 24;
@@ -482,27 +479,39 @@ static void mtk_dsi_config_vdo_timing(struct mtk_dsi *dsi)
 			dsi_tmp_buf_bpp - 10);
 
 	data_phy_cycles = timing->lpx + timing->da_hs_prepare +
-				  timing->da_hs_zero + timing->da_hs_exit + 2;
+			  timing->da_hs_zero + timing->da_hs_exit + 3;
 
 	if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO_BURST) {
-		if (vm->hfront_porch * dsi_tmp_buf_bpp >
+		if ((vm->hfront_porch + vm->hback_porch) * dsi_tmp_buf_bpp >
 		    data_phy_cycles * dsi->lanes + 18) {
-			horizontal_frontporch_byte = vm->hfront_porch *
-						     dsi_tmp_buf_bpp -
-						     data_phy_cycles *
-						     dsi->lanes - 18;
+			horizontal_frontporch_byte =
+				vm->hfront_porch * dsi_tmp_buf_bpp -
+				(data_phy_cycles * dsi->lanes + 18) *
+				vm->hfront_porch /
+				(vm->hfront_porch + vm->hback_porch);
+
+			horizontal_backporch_byte =
+				horizontal_backporch_byte -
+				(data_phy_cycles * dsi->lanes + 18) *
+				vm->hback_porch /
+				(vm->hfront_porch + vm->hback_porch);
 		} else {
 			DRM_WARN("HFP less than d-phy, FPS will under 60Hz\n");
 			horizontal_frontporch_byte = vm->hfront_porch *
 						     dsi_tmp_buf_bpp;
 		}
 	} else {
-		if (vm->hfront_porch * dsi_tmp_buf_bpp >
+		if ((vm->hfront_porch + vm->hback_porch) * dsi_tmp_buf_bpp >
 		    data_phy_cycles * dsi->lanes + 12) {
-			horizontal_frontporch_byte = vm->hfront_porch *
-						     dsi_tmp_buf_bpp -
-						     data_phy_cycles *
-						     dsi->lanes - 12;
+			horizontal_frontporch_byte =
+				vm->hfront_porch * dsi_tmp_buf_bpp -
+				(data_phy_cycles * dsi->lanes + 12) *
+				vm->hfront_porch /
+				(vm->hfront_porch + vm->hback_porch);
+			horizontal_backporch_byte = horizontal_backporch_byte -
+				(data_phy_cycles * dsi->lanes + 12) *
+				vm->hback_porch /
+				(vm->hfront_porch + vm->hback_porch);
 		} else {
 			DRM_WARN("HFP less than d-phy, FPS will under 60Hz\n");
 			horizontal_frontporch_byte = vm->hfront_porch *
diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
index 7ad14937fcdf..b67f88872726 100644
--- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
@@ -506,6 +506,14 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
 		goto fail;
 	}
 
+	/*
+	 * Set the ICC path to maximum speed for now by multiplying the fastest
+	 * frequency by the bus width (8). We'll want to scale this later on to
+	 * improve battery life.
+	 */
+	icc_set_bw(gpu->icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
+	icc_set_bw(gpu->ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
+
 	return gpu;
 
 fail:
diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
index b01388a9e89e..253d8d85daad 100644
--- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
@@ -591,6 +591,14 @@ struct msm_gpu *a4xx_gpu_init(struct drm_device *dev)
 		goto fail;
 	}
 
+	/*
+	 * Set the ICC path to maximum speed for now by multiplying the fastest
+	 * frequency by the bus width (8). We'll want to scale this later on to
+	 * improve battery life.
+	 */
+	icc_set_bw(gpu->icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
+	icc_set_bw(gpu->ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
+
 	return gpu;
 
 fail:
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index b02e2042547f..7d9e63e20ded 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -753,11 +753,18 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
 		gpu->funcs->flush(gpu, gpu->rb[0]);
 		if (!a5xx_idle(gpu, gpu->rb[0]))
 			return -EINVAL;
-	} else {
-		/* Print a warning so if we die, we know why */
+	} else if (ret == -ENODEV) {
+		/*
+		 * This device does not use zap shader (but print a warning
+		 * just in case someone got their dt wrong.. hopefully they
+		 * have a debug UART to realize the error of their ways...
+		 * if you mess this up you are about to crash horribly)
+		 */
 		dev_warn_once(gpu->dev->dev,
 			"Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
 		gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
+	} else {
+		return ret;
 	}
 
 	/* Last step - yield the ringbuffer */
diff --git a/drivers/gpu/drm/msm/adreno/a6xx.xml.h b/drivers/gpu/drm/msm/adreno/a6xx.xml.h
index f44553ec3193..ed78fee2a262 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx.xml.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx.xml.h
@@ -16,11 +16,11 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/envytools/rnndb/adreno/a3xx.xml          (  83840 bytes, from 2018-07-03 19:37:13)
 - /home/robclark/src/envytools/rnndb/adreno/a4xx.xml          ( 112086 bytes, from 2018-07-03 19:37:13)
 - /home/robclark/src/envytools/rnndb/adreno/a5xx.xml          ( 147240 bytes, from 2018-12-02 17:29:54)
-- /home/robclark/src/envytools/rnndb/adreno/a6xx.xml          ( 140790 bytes, from 2018-12-02 17:29:54)
+- /home/smasetty/playarea/envytools/rnndb/adreno/a6xx.xml     ( 161969 bytes, from 2019-11-29 07:18:16)
 - /home/robclark/src/envytools/rnndb/adreno/a6xx_gmu.xml      (  10431 bytes, from 2018-09-14 13:03:07)
 - /home/robclark/src/envytools/rnndb/adreno/ocmem.xml         (   1773 bytes, from 2018-07-03 19:37:13)
 
-Copyright (C) 2013-2018 by the following authors:
+Copyright (C) 2013-2019 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
 - Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
 
@@ -2519,6 +2519,54 @@ static inline uint32_t A6XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL(uint32_t val)
 
 #define REG_A6XX_VBIF_PERF_PWR_CNT_HIGH2			0x0000311a
 
+#define REG_A6XX_GBIF_SCACHE_CNTL1				0x00003c02
+
+#define REG_A6XX_GBIF_QSB_SIDE0					0x00003c03
+
+#define REG_A6XX_GBIF_QSB_SIDE1					0x00003c04
+
+#define REG_A6XX_GBIF_QSB_SIDE2					0x00003c05
+
+#define REG_A6XX_GBIF_QSB_SIDE3					0x00003c06
+
+#define REG_A6XX_GBIF_HALT					0x00003c45
+
+#define REG_A6XX_GBIF_HALT_ACK					0x00003c46
+
+#define REG_A6XX_GBIF_PERF_PWR_CNT_EN				0x00003cc0
+
+#define REG_A6XX_GBIF_PERF_CNT_SEL				0x00003cc2
+
+#define REG_A6XX_GBIF_PERF_PWR_CNT_SEL				0x00003cc3
+
+#define REG_A6XX_GBIF_PERF_CNT_LOW0				0x00003cc4
+
+#define REG_A6XX_GBIF_PERF_CNT_LOW1				0x00003cc5
+
+#define REG_A6XX_GBIF_PERF_CNT_LOW2				0x00003cc6
+
+#define REG_A6XX_GBIF_PERF_CNT_LOW3				0x00003cc7
+
+#define REG_A6XX_GBIF_PERF_CNT_HIGH0				0x00003cc8
+
+#define REG_A6XX_GBIF_PERF_CNT_HIGH1				0x00003cc9
+
+#define REG_A6XX_GBIF_PERF_CNT_HIGH2				0x00003cca
+
+#define REG_A6XX_GBIF_PERF_CNT_HIGH3				0x00003ccb
+
+#define REG_A6XX_GBIF_PWR_CNT_LOW0				0x00003ccc
+
+#define REG_A6XX_GBIF_PWR_CNT_LOW1				0x00003ccd
+
+#define REG_A6XX_GBIF_PWR_CNT_LOW2				0x00003cce
+
+#define REG_A6XX_GBIF_PWR_CNT_HIGH0				0x00003ccf
+
+#define REG_A6XX_GBIF_PWR_CNT_HIGH1				0x00003cd0
+
+#define REG_A6XX_GBIF_PWR_CNT_HIGH2				0x00003cd1
+
 #define REG_A6XX_RB_WINDOW_OFFSET2				0x000088d4
 #define A6XX_RB_WINDOW_OFFSET2_WINDOW_OFFSET_DISABLE		0x80000000
 #define A6XX_RB_WINDOW_OFFSET2_X__MASK				0x00007fff
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index 85f14feafdec..983afeaee737 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (c) 2017-2018 The Linux Foundation. All rights reserved. */
+/* Copyright (c) 2017-2019 The Linux Foundation. All rights reserved. */
 
 #include <linux/clk.h>
 #include <linux/interconnect.h>
@@ -149,6 +149,8 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, unsigned long freq)
 		if (freq == gmu->gpu_freqs[perf_index])
 			break;
 
+	gmu->current_perf_index = perf_index;
+
 	__a6xx_gmu_set_freq(gmu, perf_index);
 }
 
@@ -433,6 +435,8 @@ static void __iomem *a6xx_gmu_get_mmio(struct platform_device *pdev,
 
 static void a6xx_gmu_rpmh_init(struct a6xx_gmu *gmu)
 {
+	struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
+	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
 	struct platform_device *pdev = to_platform_device(gmu->dev);
 	void __iomem *pdcptr = a6xx_gmu_get_mmio(pdev, "gmu_pdc");
 	void __iomem *seqptr = a6xx_gmu_get_mmio(pdev, "gmu_pdc_seq");
@@ -480,20 +484,34 @@ static void a6xx_gmu_rpmh_init(struct a6xx_gmu *gmu)
 	pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS1_CMD0_MSGID + 4, 0x10108);
 	pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS1_CMD0_ADDR + 4, 0x30000);
 	pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS1_CMD0_DATA + 4, 0x0);
+
 	pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS1_CMD0_MSGID + 8, 0x10108);
-	pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS1_CMD0_ADDR + 8, 0x30080);
+	if (adreno_is_a618(adreno_gpu))
+		pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS1_CMD0_ADDR + 8, 0x30090);
+	else
+		pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS1_CMD0_ADDR + 8, 0x30080);
 	pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS1_CMD0_DATA + 8, 0x0);
+
 	pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD_ENABLE_BANK, 7);
 	pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD_WAIT_FOR_CMPL_BANK, 0);
 	pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CONTROL, 0);
 	pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_MSGID, 0x10108);
 	pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_ADDR, 0x30010);
 	pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_DATA, 2);
+
 	pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_MSGID + 4, 0x10108);
 	pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_ADDR + 4, 0x30000);
-	pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_DATA + 4, 0x3);
+	if (adreno_is_a618(adreno_gpu))
+		pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_DATA + 4, 0x2);
+	else
+		pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_DATA + 4, 0x3);
+
+
 	pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_MSGID + 8, 0x10108);
-	pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_ADDR + 8, 0x30080);
+	if (adreno_is_a618(adreno_gpu))
+		pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_ADDR + 8, 0x30090);
+	else
+		pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_ADDR + 8, 0x30080);
 	pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_DATA + 8, 0x3);
 
 	/* Setup GPU PDC */
@@ -741,8 +759,8 @@ int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu)
 	gmu_write(gmu, REG_A6XX_GMU_GMU2HOST_INTR_MASK, ~A6XX_HFI_IRQ_MASK);
 	enable_irq(gmu->hfi_irq);
 
-	/* Set the GPU to the highest power frequency */
-	__a6xx_gmu_set_freq(gmu, gmu->nr_gpu_freqs - 1);
+	/* Set the GPU to the current freq */
+	__a6xx_gmu_set_freq(gmu, gmu->current_perf_index);
 
 	/*
 	 * "enable" the GX power domain which won't actually do anything but it
@@ -1166,6 +1184,8 @@ static int a6xx_gmu_pwrlevels_probe(struct a6xx_gmu *gmu)
 	gmu->nr_gpu_freqs = a6xx_gmu_build_freq_table(&gpu->pdev->dev,
 		gmu->gpu_freqs, ARRAY_SIZE(gmu->gpu_freqs));
 
+	gmu->current_perf_index = gmu->nr_gpu_freqs - 1;
+
 	/* Build the list of RPMh votes that we'll send to the GMU */
 	return a6xx_gmu_rpmh_votes_init(gmu);
 }
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
index 39a26dd63674..2af91ed7ed0c 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
@@ -63,6 +63,9 @@ struct a6xx_gmu {
 	struct clk_bulk_data *clocks;
 	struct clk *core_clk;
 
+	/* current performance index set externally */
+	int current_perf_index;
+
 	int nr_gpu_freqs;
 	unsigned long gpu_freqs[16];
 	u32 gx_arc_votes[16];
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index dc8ec2c94301..daf07800cde0 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (c) 2017-2018 The Linux Foundation. All rights reserved. */
+/* Copyright (c) 2017-2019 The Linux Foundation. All rights reserved. */
 
 
 #include "msm_gem.h"
@@ -378,6 +378,18 @@ static int a6xx_hw_init(struct msm_gpu *gpu)
 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 	int ret;
 
+	/*
+	 * During a previous slumber, GBIF halt is asserted to ensure
+	 * no further transaction can go through GPU before GPU
+	 * headswitch is turned off.
+	 *
+	 * This halt is deasserted once headswitch goes off but
+	 * incase headswitch doesn't goes off clear GBIF halt
+	 * here to ensure GPU wake-up doesn't fail because of
+	 * halted GPU transactions.
+	 */
+	gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0);
+
 	/* Make sure the GMU keeps the GPU on while we set it up */
 	a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
 
@@ -406,12 +418,17 @@ static int a6xx_hw_init(struct msm_gpu *gpu)
 	gpu_write(gpu, REG_A6XX_TPL1_ADDR_MODE_CNTL, 0x1);
 	gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
 
-	/* enable hardware clockgating */
-	a6xx_set_hwcg(gpu, true);
+	/*
+	 * enable hardware clockgating
+	 * For now enable clock gating only for a630
+	 */
+	if (adreno_is_a630(adreno_gpu))
+		a6xx_set_hwcg(gpu, true);
 
-	/* VBIF start */
-	gpu_write(gpu, REG_A6XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
+	/* VBIF/GBIF start*/
 	gpu_write(gpu, REG_A6XX_RBBM_VBIF_CLIENT_QOS_CNTL, 0x3);
+	if (adreno_is_a630(adreno_gpu))
+		gpu_write(gpu, REG_A6XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
 
 	/* Make all blocks contribute to the GPU BUSY perf counter */
 	gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff);
@@ -537,12 +554,19 @@ static int a6xx_hw_init(struct msm_gpu *gpu)
 		a6xx_flush(gpu, gpu->rb[0]);
 		if (!a6xx_idle(gpu, gpu->rb[0]))
 			return -EINVAL;
-	} else {
-		/* Print a warning so if we die, we know why */
+	} else if (ret == -ENODEV) {
+		/*
+		 * This device does not use zap shader (but print a warning
+		 * just in case someone got their dt wrong.. hopefully they
+		 * have a debug UART to realize the error of their ways...
+		 * if you mess this up you are about to crash horribly)
+		 */
 		dev_warn_once(gpu->dev->dev,
 			"Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
 		gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0);
 		ret = 0;
+	} else {
+		return ret;
 	}
 
 out:
@@ -724,6 +748,39 @@ static const u32 a6xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A6XX_CP_RB_CNTL),
 };
 
+#define GBIF_CLIENT_HALT_MASK             BIT(0)
+#define GBIF_ARB_HALT_MASK                BIT(1)
+
+static void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu)
+{
+	struct msm_gpu *gpu = &adreno_gpu->base;
+
+	if(!a6xx_has_gbif(adreno_gpu)){
+		gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0xf);
+		spin_until((gpu_read(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL1) &
+								0xf) == 0xf);
+		gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0);
+
+		return;
+	}
+
+	/* Halt new client requests on GBIF */
+	gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK);
+	spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
+			(GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK);
+
+	/* Halt all AXI requests on GBIF */
+	gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK);
+	spin_until((gpu_read(gpu,  REG_A6XX_GBIF_HALT_ACK) &
+			(GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK);
+
+	/*
+	 * GMU needs DDR access in slumber path. Deassert GBIF halt now
+	 * to allow for GMU to access system memory.
+	 */
+	gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0);
+}
+
 static int a6xx_pm_resume(struct msm_gpu *gpu)
 {
 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
@@ -748,6 +805,16 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu)
 
 	devfreq_suspend_device(gpu->devfreq.devfreq);
 
+	/*
+	 * Make sure the GMU is idle before continuing (because some transitions
+	 * may use VBIF
+	 */
+	a6xx_gmu_wait_for_idle(&a6xx_gpu->gmu);
+
+	/* Clear the VBIF pipe before shutting down */
+	/* FIXME: This accesses the GPU - do we need to make sure it is on? */
+	a6xx_bus_clear_pending_transactions(adreno_gpu);
+
 	return a6xx_gmu_stop(a6xx_gpu);
 }
 
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
index 64399554f2dd..7239b8b60939 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (c) 2017 The Linux Foundation. All rights reserved. */
+/* Copyright (c) 2017, 2019 The Linux Foundation. All rights reserved. */
 
 #ifndef __A6XX_GPU_H__
 #define __A6XX_GPU_H__
@@ -42,6 +42,13 @@ struct a6xx_gpu {
 #define A6XX_PROTECT_RDONLY(_reg, _len) \
 	((((_len) & 0x3FFF) << 18) | ((_reg) & 0x3FFFF))
 
+static inline bool a6xx_has_gbif(struct adreno_gpu *gpu)
+{
+	if(adreno_is_a630(gpu))
+		return false;
+
+	return true;
+}
 
 int a6xx_gmu_resume(struct a6xx_gpu *gpu);
 int a6xx_gmu_stop(struct a6xx_gpu *gpu);
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
index 691c1a277d91..d6023ba8033c 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (c) 2018 The Linux Foundation. All rights reserved. */
+/* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */
 
 #include <linux/ascii85.h>
 #include "msm_gem.h"
@@ -320,6 +320,7 @@ static void a6xx_get_debugbus(struct msm_gpu *gpu,
 {
 	struct resource *res;
 	void __iomem *cxdbg = NULL;
+	int nr_debugbus_blocks;
 
 	/* Set up the GX debug bus */
 
@@ -374,9 +375,11 @@ static void a6xx_get_debugbus(struct msm_gpu *gpu,
 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0);
 	}
 
-	a6xx_state->debugbus = state_kcalloc(a6xx_state,
-		ARRAY_SIZE(a6xx_debugbus_blocks),
-		sizeof(*a6xx_state->debugbus));
+	nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) +
+		(a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0);
+
+	a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks,
+			sizeof(*a6xx_state->debugbus));
 
 	if (a6xx_state->debugbus) {
 		int i;
@@ -388,15 +391,31 @@ static void a6xx_get_debugbus(struct msm_gpu *gpu,
 				&a6xx_state->debugbus[i]);
 
 		a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks);
+
+		/*
+		 * GBIF has same debugbus as of other GPU blocks, fall back to
+		 * default path if GPU uses GBIF, also GBIF uses exactly same
+		 * ID as of VBIF.
+		 */
+		if (a6xx_has_gbif(to_adreno_gpu(gpu))) {
+			a6xx_get_debugbus_block(gpu, a6xx_state,
+				&a6xx_gbif_debugbus_block,
+				&a6xx_state->debugbus[i]);
+
+			a6xx_state->nr_debugbus += 1;
+		}
 	}
 
-	a6xx_state->vbif_debugbus =
-		state_kcalloc(a6xx_state, 1,
-			sizeof(*a6xx_state->vbif_debugbus));
+	/*  Dump the VBIF debugbus on applicable targets */
+	if (!a6xx_has_gbif(to_adreno_gpu(gpu))) {
+		a6xx_state->vbif_debugbus =
+			state_kcalloc(a6xx_state, 1,
+					sizeof(*a6xx_state->vbif_debugbus));
 
-	if (a6xx_state->vbif_debugbus)
-		a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
-			a6xx_state->vbif_debugbus);
+		if (a6xx_state->vbif_debugbus)
+			a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
+					a6xx_state->vbif_debugbus);
+	}
 
 	if (cxdbg) {
 		a6xx_state->cx_debugbus =
@@ -770,14 +789,16 @@ static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
 		&a6xx_state->gmu_registers[1]);
 }
 
+#define A6XX_GBIF_REGLIST_SIZE   1
 static void a6xx_get_registers(struct msm_gpu *gpu,
 		struct a6xx_gpu_state *a6xx_state,
 		struct a6xx_crashdumper *dumper)
 {
 	int i, count = ARRAY_SIZE(a6xx_ahb_reglist) +
 		ARRAY_SIZE(a6xx_reglist) +
-		ARRAY_SIZE(a6xx_hlsq_reglist);
+		ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE;
 	int index = 0;
+	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 
 	a6xx_state->registers = state_kcalloc(a6xx_state,
 		count, sizeof(*a6xx_state->registers));
@@ -792,6 +813,15 @@ static void a6xx_get_registers(struct msm_gpu *gpu,
 			a6xx_state, &a6xx_ahb_reglist[i],
 			&a6xx_state->registers[index++]);
 
+	if (a6xx_has_gbif(adreno_gpu))
+		a6xx_get_ahb_gpu_registers(gpu,
+				a6xx_state, &a6xx_gbif_reglist,
+				&a6xx_state->registers[index++]);
+	else
+		a6xx_get_ahb_gpu_registers(gpu,
+				a6xx_state, &a6xx_vbif_reglist,
+				&a6xx_state->registers[index++]);
+
 	for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
 		a6xx_get_crashdumper_registers(gpu,
 			a6xx_state, &a6xx_reglist[i],
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h
index 68cccfa2870a..e67c20c415af 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (c) 2018 The Linux Foundation. All rights reserved. */
+/* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */
 
 #ifndef _A6XX_CRASH_DUMP_H_
 #define _A6XX_CRASH_DUMP_H_
@@ -307,11 +307,20 @@ static const u32 a6xx_vbif_registers[] = {
 	0x3410, 0x3410, 0x3800, 0x3801,
 };
 
+static const u32 a6xx_gbif_registers[] = {
+	0x3C00, 0X3C0B, 0X3C40, 0X3C47, 0X3CC0, 0X3CD1, 0xE3A, 0xE3A,
+};
+
 static const struct a6xx_registers a6xx_ahb_reglist[] = {
 	REGS(a6xx_ahb_registers, 0, 0),
-	REGS(a6xx_vbif_registers, 0, 0),
 };
 
+static const struct a6xx_registers a6xx_vbif_reglist =
+			REGS(a6xx_vbif_registers, 0, 0);
+
+static const struct a6xx_registers a6xx_gbif_reglist =
+			REGS(a6xx_gbif_registers, 0, 0);
+
 static const u32 a6xx_gmu_gx_registers[] = {
 	/* GMU GX */
 	0x0000, 0x0000, 0x0010, 0x0013, 0x0016, 0x0016, 0x0018, 0x001b,
@@ -422,6 +431,9 @@ static const struct a6xx_debugbus_block {
 	DEBUGBUS(A6XX_DBGBUS_TPL1_3, 0x100),
 };
 
+static const struct a6xx_debugbus_block a6xx_gbif_debugbus_block =
+			DEBUGBUS(A6XX_DBGBUS_VBIF, 0x100);
+
 static const struct a6xx_debugbus_block a6xx_cx_debugbus_blocks[] = {
 	DEBUGBUS(A6XX_DBGBUS_GMU_CX, 0x100),
 	DEBUGBUS(A6XX_DBGBUS_CX, 0x100),
diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c
index fbbdf86504f5..cb3a6e597d76 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -167,6 +167,17 @@ static const struct adreno_info gpulist[] = {
 		.init = a5xx_gpu_init,
 		.zapfw = "a540_zap.mdt",
 	}, {
+		.rev = ADRENO_REV(6, 1, 8, ANY_ID),
+		.revn = 618,
+		.name = "A618",
+		.fw = {
+			[ADRENO_FW_SQE] = "a630_sqe.fw",
+			[ADRENO_FW_GMU] = "a630_gmu.bin",
+		},
+		.gmem = SZ_512K,
+		.inactive_period = DRM_MSM_INACTIVE_PERIOD,
+		.init = a6xx_gpu_init,
+	}, {
 		.rev = ADRENO_REV(6, 3, 0, ANY_ID),
 		.revn = 630,
 		.name = "A630",
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 0783e4b5486a..7fd29829b2fa 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -26,6 +26,7 @@ static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname,
 {
 	struct device *dev = &gpu->pdev->dev;
 	const struct firmware *fw;
+	const char *signed_fwname = NULL;
 	struct device_node *np, *mem_np;
 	struct resource r;
 	phys_addr_t mem_phys;
@@ -58,8 +59,43 @@ static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname,
 
 	mem_phys = r.start;
 
-	/* Request the MDT file for the firmware */
-	fw = adreno_request_fw(to_adreno_gpu(gpu), fwname);
+	/*
+	 * Check for a firmware-name property.  This is the new scheme
+	 * to handle firmware that may be signed with device specific
+	 * keys, allowing us to have a different zap fw path for different
+	 * devices.
+	 *
+	 * If the firmware-name property is found, we bypass the
+	 * adreno_request_fw() mechanism, because we don't need to handle
+	 * the /lib/firmware/qcom/... vs /lib/firmware/... case.
+	 *
+	 * If the firmware-name property is not found, for backwards
+	 * compatibility we fall back to the fwname from the gpulist
+	 * table.
+	 */
+	of_property_read_string_index(np, "firmware-name", 0, &signed_fwname);
+	if (signed_fwname) {
+		fwname = signed_fwname;
+		ret = request_firmware_direct(&fw, fwname, gpu->dev->dev);
+		if (ret)
+			fw = ERR_PTR(ret);
+	} else if (fwname) {
+		/* Request the MDT file from the default location: */
+		fw = adreno_request_fw(to_adreno_gpu(gpu), fwname);
+	} else {
+		/*
+		 * For new targets, we require the firmware-name property,
+		 * if a zap-shader is required, rather than falling back
+		 * to a firmware name specified in gpulist.
+		 *
+		 * Because the firmware is signed with a (potentially)
+		 * device specific key, having the name come from gpulist
+		 * was a bad idea, and is only provided for backwards
+		 * compatibility for older targets.
+		 */
+		return -ENODEV;
+	}
+
 	if (IS_ERR(fw)) {
 		DRM_DEV_ERROR(dev, "Unable to load %s\n", fwname);
 		return PTR_ERR(fw);
@@ -95,7 +131,7 @@ static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname,
 	 * not.  But since we've already gotten through adreno_request_fw()
 	 * we know which of the two cases it is:
 	 */
-	if (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY) {
+	if (signed_fwname || (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY)) {
 		ret = qcom_mdt_load(dev, fw, fwname, pasid,
 				mem_region, mem_phys, mem_size, NULL);
 	} else {
@@ -146,14 +182,6 @@ int adreno_zap_shader_load(struct msm_gpu *gpu, u32 pasid)
 		return -EPROBE_DEFER;
 	}
 
-	/* Each GPU has a target specific zap shader firmware name to use */
-	if (!adreno_gpu->info->zapfw) {
-		zap_available = false;
-		DRM_DEV_ERROR(&pdev->dev,
-			"Zap shader firmware file not specified for this target\n");
-		return -ENODEV;
-	}
-
 	return zap_shader_load_mdt(gpu, adreno_gpu->info->zapfw, pasid);
 }
 
@@ -826,7 +854,7 @@ static int adreno_get_legacy_pwrlevels(struct device *dev)
 
 	node = of_get_compatible_child(dev->of_node, "qcom,gpu-pwrlevels");
 	if (!node) {
-		DRM_DEV_ERROR(dev, "Could not find the GPU powerlevels\n");
+		DRM_DEV_DEBUG(dev, "Could not find the GPU powerlevels\n");
 		return -ENXIO;
 	}
 
@@ -887,10 +915,21 @@ static int adreno_get_pwrlevels(struct device *dev,
 	DBG("fast_rate=%u, slow_rate=27000000", gpu->fast_rate);
 
 	/* Check for an interconnect path for the bus */
-	gpu->icc_path = of_icc_get(dev, NULL);
+	gpu->icc_path = of_icc_get(dev, "gfx-mem");
+	if (!gpu->icc_path) {
+		/*
+		 * Keep compatbility with device trees that don't have an
+		 * interconnect-names property.
+		 */
+		gpu->icc_path = of_icc_get(dev, NULL);
+	}
 	if (IS_ERR(gpu->icc_path))
 		gpu->icc_path = NULL;
 
+	gpu->ocmem_icc_path = of_icc_get(dev, "ocmem");
+	if (IS_ERR(gpu->ocmem_icc_path))
+		gpu->ocmem_icc_path = NULL;
+
 	return 0;
 }
 
@@ -977,6 +1016,7 @@ void adreno_gpu_cleanup(struct adreno_gpu *adreno_gpu)
 		release_firmware(adreno_gpu->fw[i]);
 
 	icc_put(gpu->icc_path);
+	icc_put(gpu->ocmem_icc_path);
 
 	msm_gpu_cleanup(&adreno_gpu->base);
 }
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
index e71a7570ef72..9ff4e550e7bd 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
@@ -3,7 +3,7 @@
  * Copyright (C) 2013 Red Hat
  * Author: Rob Clark <robdclark@gmail.com>
  *
- * Copyright (c) 2014,2017 The Linux Foundation. All rights reserved.
+ * Copyright (c) 2014,2017, 2019 The Linux Foundation. All rights reserved.
  */
 
 #ifndef __ADRENO_GPU_H__
@@ -227,6 +227,16 @@ static inline int adreno_is_a540(struct adreno_gpu *gpu)
 	return gpu->revn == 540;
 }
 
+static inline int adreno_is_a618(struct adreno_gpu *gpu)
+{
+       return gpu->revn == 618;
+}
+
+static inline int adreno_is_a630(struct adreno_gpu *gpu)
+{
+       return gpu->revn == 630;
+}
+
 int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value);
 const struct firmware *adreno_request_fw(struct adreno_gpu *adreno_gpu,
 		const char *fwname);
@@ -330,10 +340,7 @@ OUT_PKT7(struct msm_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
 static inline bool adreno_reg_check(struct adreno_gpu *gpu,
 		enum adreno_regs offset_name)
 {
-	if (offset_name >= REG_ADRENO_REGISTER_MAX ||
-			!gpu->reg_offsets[offset_name]) {
-		BUG();
-	}
+	BUG_ON(offset_name >= REG_ADRENO_REGISTER_MAX || !gpu->reg_offsets[offset_name]);
 
 	/*
 	 * REG_SKIP is a special value that tell us that the register in
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
index f197dce54576..bf513411b243 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
@@ -197,10 +197,6 @@ static void _dpu_crtc_blend_setup(struct drm_crtc *crtc)
 	DPU_DEBUG("%s\n", dpu_crtc->name);
 
 	for (i = 0; i < cstate->num_mixers; i++) {
-		if (!mixer[i].hw_lm || !mixer[i].lm_ctl) {
-			DPU_ERROR("invalid lm or ctl assigned to mixer\n");
-			return;
-		}
 		mixer[i].mixer_op_mode = 0;
 		mixer[i].flush_mask = 0;
 		if (mixer[i].lm_ctl->ops.clear_all_blendstages)
@@ -1113,14 +1109,9 @@ static int _dpu_debugfs_status_show(struct seq_file *s, void *data)
 
 	for (i = 0; i < cstate->num_mixers; ++i) {
 		m = &cstate->mixers[i];
-		if (!m->hw_lm)
-			seq_printf(s, "\tmixer[%d] has no lm\n", i);
-		else if (!m->lm_ctl)
-			seq_printf(s, "\tmixer[%d] has no ctl\n", i);
-		else
-			seq_printf(s, "\tmixer:%d ctl:%d width:%d height:%d\n",
-				m->hw_lm->idx - LM_0, m->lm_ctl->idx - CTL_0,
-				out_width, mode->vdisplay);
+		seq_printf(s, "\tmixer:%d ctl:%d width:%d height:%d\n",
+			m->hw_lm->idx - LM_0, m->lm_ctl->idx - CTL_0,
+			out_width, mode->vdisplay);
 	}
 
 	seq_puts(s, "\n");
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
index f96e142c4361..f8ac3bf60fd6 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
@@ -58,7 +58,7 @@
 
 #define IDLE_SHORT_TIMEOUT	1
 
-#define MAX_VDISPLAY_SPLIT 1080
+#define MAX_HDISPLAY_SPLIT 1080
 
 /* timeout in frames waiting for frame done */
 #define DPU_ENCODER_FRAME_DONE_TIMEOUT_FRAMES 5
@@ -233,7 +233,7 @@ int dpu_encoder_helper_wait_for_irq(struct dpu_encoder_phys *phys_enc,
 	u32 irq_status;
 	int ret;
 
-	if (!phys_enc || !wait_info || intr_idx >= INTR_IDX_MAX) {
+	if (!wait_info || intr_idx >= INTR_IDX_MAX) {
 		DPU_ERROR("invalid params\n");
 		return -EINVAL;
 	}
@@ -308,7 +308,7 @@ int dpu_encoder_helper_register_irq(struct dpu_encoder_phys *phys_enc,
 	struct dpu_encoder_irq *irq;
 	int ret = 0;
 
-	if (!phys_enc || intr_idx >= INTR_IDX_MAX) {
+	if (intr_idx >= INTR_IDX_MAX) {
 		DPU_ERROR("invalid params\n");
 		return -EINVAL;
 	}
@@ -363,10 +363,6 @@ int dpu_encoder_helper_unregister_irq(struct dpu_encoder_phys *phys_enc,
 	struct dpu_encoder_irq *irq;
 	int ret;
 
-	if (!phys_enc) {
-		DPU_ERROR("invalid encoder\n");
-		return -EINVAL;
-	}
 	irq = &phys_enc->irq[intr_idx];
 
 	/* silently skip irqs that weren't registered */
@@ -415,7 +411,7 @@ void dpu_encoder_get_hw_resources(struct drm_encoder *drm_enc,
 	for (i = 0; i < dpu_enc->num_phys_encs; i++) {
 		struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i];
 
-		if (phys && phys->ops.get_hw_resources)
+		if (phys->ops.get_hw_resources)
 			phys->ops.get_hw_resources(phys, hw_res);
 	}
 }
@@ -438,7 +434,7 @@ static void dpu_encoder_destroy(struct drm_encoder *drm_enc)
 	for (i = 0; i < dpu_enc->num_phys_encs; i++) {
 		struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i];
 
-		if (phys && phys->ops.destroy) {
+		if (phys->ops.destroy) {
 			phys->ops.destroy(phys);
 			--dpu_enc->num_phys_encs;
 			dpu_enc->phys_encs[i] = NULL;
@@ -464,7 +460,7 @@ void dpu_encoder_helper_split_config(
 	struct dpu_hw_mdp *hw_mdptop;
 	struct msm_display_info *disp_info;
 
-	if (!phys_enc || !phys_enc->hw_mdptop || !phys_enc->parent) {
+	if (!phys_enc->hw_mdptop || !phys_enc->parent) {
 		DPU_ERROR("invalid arg(s), encoder %d\n", phys_enc != 0);
 		return;
 	}
@@ -534,8 +530,23 @@ static struct msm_display_topology dpu_encoder_get_topology(
 		if (dpu_enc->phys_encs[i])
 			intf_count++;
 
-	/* User split topology for width > 1080 */
-	topology.num_lm = (mode->vdisplay > MAX_VDISPLAY_SPLIT) ? 2 : 1;
+	/* Datapath topology selection
+	 *
+	 * Dual display
+	 * 2 LM, 2 INTF ( Split display using 2 interfaces)
+	 *
+	 * Single display
+	 * 1 LM, 1 INTF
+	 * 2 LM, 1 INTF (stream merge to support high resolution interfaces)
+	 *
+	 */
+	if (intf_count == 2)
+		topology.num_lm = 2;
+	else if (!dpu_kms->catalog->caps->has_3d_merge)
+		topology.num_lm = 1;
+	else
+		topology.num_lm = (mode->hdisplay > MAX_HDISPLAY_SPLIT) ? 2 : 1;
+
 	topology.num_enc = 0;
 	topology.num_intf = intf_count;
 
@@ -583,10 +594,10 @@ static int dpu_encoder_virt_atomic_check(
 	for (i = 0; i < dpu_enc->num_phys_encs; i++) {
 		struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i];
 
-		if (phys && phys->ops.atomic_check)
+		if (phys->ops.atomic_check)
 			ret = phys->ops.atomic_check(phys, crtc_state,
 					conn_state);
-		else if (phys && phys->ops.mode_fixup)
+		else if (phys->ops.mode_fixup)
 			if (!phys->ops.mode_fixup(phys, mode, adj_mode))
 				ret = -EINVAL;
 
@@ -682,7 +693,7 @@ static void _dpu_encoder_irq_control(struct drm_encoder *drm_enc, bool enable)
 	for (i = 0; i < dpu_enc->num_phys_encs; i++) {
 		struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i];
 
-		if (phys && phys->ops.irq_control)
+		if (phys->ops.irq_control)
 			phys->ops.irq_control(phys, enable);
 	}
 
@@ -1032,46 +1043,43 @@ static void dpu_encoder_virt_mode_set(struct drm_encoder *drm_enc,
 	for (i = 0; i < dpu_enc->num_phys_encs; i++) {
 		struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i];
 
-		if (phys) {
-			if (!dpu_enc->hw_pp[i]) {
-				DPU_ERROR_ENC(dpu_enc, "no pp block assigned"
-					     "at idx: %d\n", i);
-				goto error;
-			}
+		if (!dpu_enc->hw_pp[i]) {
+			DPU_ERROR_ENC(dpu_enc,
+				"no pp block assigned at idx: %d\n", i);
+			goto error;
+		}
 
-			if (!hw_ctl[i]) {
-				DPU_ERROR_ENC(dpu_enc, "no ctl block assigned"
-					     "at idx: %d\n", i);
-				goto error;
-			}
+		if (!hw_ctl[i]) {
+			DPU_ERROR_ENC(dpu_enc,
+				"no ctl block assigned at idx: %d\n", i);
+			goto error;
+		}
 
-			phys->hw_pp = dpu_enc->hw_pp[i];
-			phys->hw_ctl = hw_ctl[i];
+		phys->hw_pp = dpu_enc->hw_pp[i];
+		phys->hw_ctl = hw_ctl[i];
 
-			dpu_rm_init_hw_iter(&hw_iter, drm_enc->base.id,
-					    DPU_HW_BLK_INTF);
-			for (j = 0; j < MAX_CHANNELS_PER_ENC; j++) {
-				struct dpu_hw_intf *hw_intf;
+		dpu_rm_init_hw_iter(&hw_iter, drm_enc->base.id,
+				    DPU_HW_BLK_INTF);
+		for (j = 0; j < MAX_CHANNELS_PER_ENC; j++) {
+			struct dpu_hw_intf *hw_intf;
 
-				if (!dpu_rm_get_hw(&dpu_kms->rm, &hw_iter))
-					break;
+			if (!dpu_rm_get_hw(&dpu_kms->rm, &hw_iter))
+				break;
 
-				hw_intf = (struct dpu_hw_intf *)hw_iter.hw;
-				if (hw_intf->idx == phys->intf_idx)
-					phys->hw_intf = hw_intf;
-			}
+			hw_intf = (struct dpu_hw_intf *)hw_iter.hw;
+			if (hw_intf->idx == phys->intf_idx)
+				phys->hw_intf = hw_intf;
+		}
 
-			if (!phys->hw_intf) {
-				DPU_ERROR_ENC(dpu_enc,
-					      "no intf block assigned at idx: %d\n",
-					      i);
+		if (!phys->hw_intf) {
+			DPU_ERROR_ENC(dpu_enc,
+				      "no intf block assigned at idx: %d\n", i);
 				goto error;
-			}
-
-			phys->connector = conn->state->connector;
-			if (phys->ops.mode_set)
-				phys->ops.mode_set(phys, mode, adj_mode);
 		}
+
+		phys->connector = conn->state->connector;
+		if (phys->ops.mode_set)
+			phys->ops.mode_set(phys, mode, adj_mode);
 	}
 
 	dpu_enc->mode_set_complete = true;
@@ -1203,7 +1211,7 @@ static void dpu_encoder_virt_disable(struct drm_encoder *drm_enc)
 	for (i = 0; i < dpu_enc->num_phys_encs; i++) {
 		struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i];
 
-		if (phys && phys->ops.disable)
+		if (phys->ops.disable)
 			phys->ops.disable(phys);
 	}
 
@@ -1216,8 +1224,7 @@ static void dpu_encoder_virt_disable(struct drm_encoder *drm_enc)
 	dpu_encoder_resource_control(drm_enc, DPU_ENC_RC_EVENT_STOP);
 
 	for (i = 0; i < dpu_enc->num_phys_encs; i++) {
-		if (dpu_enc->phys_encs[i])
-			dpu_enc->phys_encs[i]->connector = NULL;
+		dpu_enc->phys_encs[i]->connector = NULL;
 	}
 
 	DPU_DEBUG_ENC(dpu_enc, "encoder disabled\n");
@@ -1307,7 +1314,7 @@ void dpu_encoder_toggle_vblank_for_crtc(struct drm_encoder *drm_enc,
 	for (i = 0; i < dpu_enc->num_phys_encs; i++) {
 		struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i];
 
-		if (phys && phys->ops.control_vblank_irq)
+		if (phys->ops.control_vblank_irq)
 			phys->ops.control_vblank_irq(phys, enable);
 	}
 }
@@ -1419,7 +1426,7 @@ static void _dpu_encoder_trigger_flush(struct drm_encoder *drm_enc,
 	}
 
 	ctl = phys->hw_ctl;
-	if (!ctl || !ctl->ops.trigger_flush) {
+	if (!ctl->ops.trigger_flush) {
 		DPU_ERROR("missing trigger cb\n");
 		return;
 	}
@@ -1463,13 +1470,8 @@ void dpu_encoder_helper_trigger_start(struct dpu_encoder_phys *phys_enc)
 {
 	struct dpu_hw_ctl *ctl;
 
-	if (!phys_enc) {
-		DPU_ERROR("invalid encoder\n");
-		return;
-	}
-
 	ctl = phys_enc->hw_ctl;
-	if (ctl && ctl->ops.trigger_start) {
+	if (ctl->ops.trigger_start) {
 		ctl->ops.trigger_start(ctl);
 		trace_dpu_enc_trigger_start(DRMID(phys_enc->parent), ctl->idx);
 	}
@@ -1506,14 +1508,10 @@ static void dpu_encoder_helper_hw_reset(struct dpu_encoder_phys *phys_enc)
 	struct dpu_hw_ctl *ctl;
 	int rc;
 
-	if (!phys_enc) {
-		DPU_ERROR("invalid encoder\n");
-		return;
-	}
 	dpu_enc = to_dpu_encoder_virt(phys_enc->parent);
 	ctl = phys_enc->hw_ctl;
 
-	if (!ctl || !ctl->ops.reset)
+	if (!ctl->ops.reset)
 		return;
 
 	DRM_DEBUG_KMS("id:%u ctl %d reset\n", DRMID(phys_enc->parent),
@@ -1550,12 +1548,10 @@ static void _dpu_encoder_kickoff_phys(struct dpu_encoder_virt *dpu_enc)
 	for (i = 0; i < dpu_enc->num_phys_encs; i++) {
 		struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i];
 
-		if (!phys || phys->enable_state == DPU_ENC_DISABLED)
+		if (phys->enable_state == DPU_ENC_DISABLED)
 			continue;
 
 		ctl = phys->hw_ctl;
-		if (!ctl)
-			continue;
 
 		/*
 		 * This is cleared in frame_done worker, which isn't invoked
@@ -1603,17 +1599,15 @@ void dpu_encoder_trigger_kickoff_pending(struct drm_encoder *drm_enc)
 	for (i = 0; i < dpu_enc->num_phys_encs; i++) {
 		phys = dpu_enc->phys_encs[i];
 
-		if (phys && phys->hw_ctl) {
-			ctl = phys->hw_ctl;
-			if (ctl->ops.clear_pending_flush)
-				ctl->ops.clear_pending_flush(ctl);
+		ctl = phys->hw_ctl;
+		if (ctl->ops.clear_pending_flush)
+			ctl->ops.clear_pending_flush(ctl);
 
-			/* update only for command mode primary ctl */
-			if ((phys == dpu_enc->cur_master) &&
-			   (disp_info->capabilities & MSM_DISPLAY_CAP_CMD_MODE)
-			    && ctl->ops.trigger_pending)
-				ctl->ops.trigger_pending(ctl);
-		}
+		/* update only for command mode primary ctl */
+		if ((phys == dpu_enc->cur_master) &&
+		   (disp_info->capabilities & MSM_DISPLAY_CAP_CMD_MODE)
+		    && ctl->ops.trigger_pending)
+			ctl->ops.trigger_pending(ctl);
 	}
 }
 
@@ -1773,12 +1767,10 @@ void dpu_encoder_prepare_for_kickoff(struct drm_encoder *drm_enc)
 	DPU_ATRACE_BEGIN("enc_prepare_for_kickoff");
 	for (i = 0; i < dpu_enc->num_phys_encs; i++) {
 		phys = dpu_enc->phys_encs[i];
-		if (phys) {
-			if (phys->ops.prepare_for_kickoff)
-				phys->ops.prepare_for_kickoff(phys);
-			if (phys->enable_state == DPU_ENC_ERR_NEEDS_HW_RESET)
-				needs_hw_reset = true;
-		}
+		if (phys->ops.prepare_for_kickoff)
+			phys->ops.prepare_for_kickoff(phys);
+		if (phys->enable_state == DPU_ENC_ERR_NEEDS_HW_RESET)
+			needs_hw_reset = true;
 	}
 	DPU_ATRACE_END("enc_prepare_for_kickoff");
 
@@ -1819,7 +1811,7 @@ void dpu_encoder_kickoff(struct drm_encoder *drm_enc)
 	/* allow phys encs to handle any post-kickoff business */
 	for (i = 0; i < dpu_enc->num_phys_encs; i++) {
 		phys = dpu_enc->phys_encs[i];
-		if (phys && phys->ops.handle_post_kickoff)
+		if (phys->ops.handle_post_kickoff)
 			phys->ops.handle_post_kickoff(phys);
 	}
 
@@ -1848,7 +1840,7 @@ void dpu_encoder_prepare_commit(struct drm_encoder *drm_enc)
 
 	for (i = 0; i < dpu_enc->num_phys_encs; i++) {
 		phys = dpu_enc->phys_encs[i];
-		if (phys && phys->ops.prepare_commit)
+		if (phys->ops.prepare_commit)
 			phys->ops.prepare_commit(phys);
 	}
 }
@@ -1863,9 +1855,6 @@ static int _dpu_encoder_status_show(struct seq_file *s, void *data)
 	for (i = 0; i < dpu_enc->num_phys_encs; i++) {
 		struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i];
 
-		if (!phys)
-			continue;
-
 		seq_printf(s, "intf:%d    vsync:%8d     underrun:%8d    ",
 				phys->intf_idx - INTF_0,
 				atomic_read(&phys->vsync_cnt),
@@ -1924,8 +1913,7 @@ static int _dpu_encoder_init_debugfs(struct drm_encoder *drm_enc)
 		dpu_enc->debugfs_root, dpu_enc, &debugfs_status_fops);
 
 	for (i = 0; i < dpu_enc->num_phys_encs; i++)
-		if (dpu_enc->phys_encs[i] &&
-				dpu_enc->phys_encs[i]->ops.late_register)
+		if (dpu_enc->phys_encs[i]->ops.late_register)
 			dpu_enc->phys_encs[i]->ops.late_register(
 					dpu_enc->phys_encs[i],
 					dpu_enc->debugfs_root);
@@ -2094,11 +2082,8 @@ static int dpu_encoder_setup_display(struct dpu_encoder_virt *dpu_enc,
 
 	for (i = 0; i < dpu_enc->num_phys_encs; i++) {
 		struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i];
-
-		if (phys) {
-			atomic_set(&phys->vsync_cnt, 0);
-			atomic_set(&phys->underrun_cnt, 0);
-		}
+		atomic_set(&phys->vsync_cnt, 0);
+		atomic_set(&phys->underrun_cnt, 0);
 	}
 	mutex_unlock(&dpu_enc->enc_lock);
 
@@ -2240,8 +2225,6 @@ int dpu_encoder_wait_for_event(struct drm_encoder *drm_enc,
 
 	for (i = 0; i < dpu_enc->num_phys_encs; i++) {
 		struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i];
-		if (!phys)
-			continue;
 
 		switch (event) {
 		case MSM_ENC_COMMIT_DONE:
@@ -2257,7 +2240,7 @@ int dpu_encoder_wait_for_event(struct drm_encoder *drm_enc,
 			DPU_ERROR_ENC(dpu_enc, "unknown wait event %d\n",
 					event);
 			return -EINVAL;
-		};
+		}
 
 		if (fn_wait) {
 			DPU_ATRACE_BEGIN("wait_for_completion_event");
@@ -2274,7 +2257,6 @@ int dpu_encoder_wait_for_event(struct drm_encoder *drm_enc,
 enum dpu_intf_mode dpu_encoder_get_intf_mode(struct drm_encoder *encoder)
 {
 	struct dpu_encoder_virt *dpu_enc = NULL;
-	int i;
 
 	if (!encoder) {
 		DPU_ERROR("invalid encoder\n");
@@ -2285,12 +2267,8 @@ enum dpu_intf_mode dpu_encoder_get_intf_mode(struct drm_encoder *encoder)
 	if (dpu_enc->cur_master)
 		return dpu_enc->cur_master->intf_mode;
 
-	for (i = 0; i < dpu_enc->num_phys_encs; i++) {
-		struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i];
-
-		if (phys)
-			return phys->intf_mode;
-	}
+	if (dpu_enc->num_phys_encs)
+		return dpu_enc->phys_encs[0]->intf_mode;
 
 	return INTF_MODE_NONE;
 }
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c
index 047960949fbb..39e1e280ba44 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c
@@ -45,8 +45,7 @@ static bool dpu_encoder_phys_cmd_mode_fixup(
 		const struct drm_display_mode *mode,
 		struct drm_display_mode *adj_mode)
 {
-	if (phys_enc)
-		DPU_DEBUG_CMDENC(to_dpu_encoder_phys_cmd(phys_enc), "\n");
+	DPU_DEBUG_CMDENC(to_dpu_encoder_phys_cmd(phys_enc), "\n");
 	return true;
 }
 
@@ -58,11 +57,8 @@ static void _dpu_encoder_phys_cmd_update_intf_cfg(
 	struct dpu_hw_ctl *ctl;
 	struct dpu_hw_intf_cfg intf_cfg = { 0 };
 
-	if (!phys_enc)
-		return;
-
 	ctl = phys_enc->hw_ctl;
-	if (!ctl || !ctl->ops.setup_intf_cfg)
+	if (!ctl->ops.setup_intf_cfg)
 		return;
 
 	intf_cfg.intf = phys_enc->intf_idx;
@@ -79,7 +75,7 @@ static void dpu_encoder_phys_cmd_pp_tx_done_irq(void *arg, int irq_idx)
 	int new_cnt;
 	u32 event = DPU_ENCODER_FRAME_EVENT_DONE;
 
-	if (!phys_enc || !phys_enc->hw_pp)
+	if (!phys_enc->hw_pp)
 		return;
 
 	DPU_ATRACE_BEGIN("pp_done_irq");
@@ -106,7 +102,7 @@ static void dpu_encoder_phys_cmd_pp_rd_ptr_irq(void *arg, int irq_idx)
 	struct dpu_encoder_phys *phys_enc = arg;
 	struct dpu_encoder_phys_cmd *cmd_enc;
 
-	if (!phys_enc || !phys_enc->hw_pp)
+	if (!phys_enc->hw_pp)
 		return;
 
 	DPU_ATRACE_BEGIN("rd_ptr_irq");
@@ -125,9 +121,6 @@ static void dpu_encoder_phys_cmd_ctl_start_irq(void *arg, int irq_idx)
 {
 	struct dpu_encoder_phys *phys_enc = arg;
 
-	if (!phys_enc || !phys_enc->hw_ctl)
-		return;
-
 	DPU_ATRACE_BEGIN("ctl_start_irq");
 
 	atomic_add_unless(&phys_enc->pending_ctlstart_cnt, -1, 0);
@@ -141,9 +134,6 @@ static void dpu_encoder_phys_cmd_underrun_irq(void *arg, int irq_idx)
 {
 	struct dpu_encoder_phys *phys_enc = arg;
 
-	if (!phys_enc)
-		return;
-
 	if (phys_enc->parent_ops->handle_underrun_virt)
 		phys_enc->parent_ops->handle_underrun_virt(phys_enc->parent,
 			phys_enc);
@@ -179,7 +169,7 @@ static void dpu_encoder_phys_cmd_mode_set(
 	struct dpu_encoder_phys_cmd *cmd_enc =
 		to_dpu_encoder_phys_cmd(phys_enc);
 
-	if (!phys_enc || !mode || !adj_mode) {
+	if (!mode || !adj_mode) {
 		DPU_ERROR("invalid args\n");
 		return;
 	}
@@ -198,7 +188,7 @@ static int _dpu_encoder_phys_cmd_handle_ppdone_timeout(
 	u32 frame_event = DPU_ENCODER_FRAME_EVENT_ERROR;
 	bool do_log = false;
 
-	if (!phys_enc || !phys_enc->hw_pp || !phys_enc->hw_ctl)
+	if (!phys_enc->hw_pp)
 		return -EINVAL;
 
 	cmd_enc->pp_timeout_report_cnt++;
@@ -247,11 +237,6 @@ static int _dpu_encoder_phys_cmd_wait_for_idle(
 	struct dpu_encoder_wait_info wait_info;
 	int ret;
 
-	if (!phys_enc) {
-		DPU_ERROR("invalid encoder\n");
-		return -EINVAL;
-	}
-
 	wait_info.wq = &phys_enc->pending_kickoff_wq;
 	wait_info.atomic_cnt = &phys_enc->pending_kickoff_cnt;
 	wait_info.timeout_ms = KICKOFF_TIMEOUT_MS;
@@ -273,7 +258,7 @@ static int dpu_encoder_phys_cmd_control_vblank_irq(
 	int ret = 0;
 	int refcount;
 
-	if (!phys_enc || !phys_enc->hw_pp) {
+	if (!phys_enc->hw_pp) {
 		DPU_ERROR("invalid encoder\n");
 		return -EINVAL;
 	}
@@ -314,9 +299,6 @@ end:
 static void dpu_encoder_phys_cmd_irq_control(struct dpu_encoder_phys *phys_enc,
 		bool enable)
 {
-	if (!phys_enc)
-		return;
-
 	trace_dpu_enc_phys_cmd_irq_ctrl(DRMID(phys_enc->parent),
 			phys_enc->hw_pp->idx - PINGPONG_0,
 			enable, atomic_read(&phys_enc->vblank_refcount));
@@ -351,7 +333,7 @@ static void dpu_encoder_phys_cmd_tearcheck_config(
 	u32 vsync_hz;
 	struct dpu_kms *dpu_kms;
 
-	if (!phys_enc || !phys_enc->hw_pp) {
+	if (!phys_enc->hw_pp) {
 		DPU_ERROR("invalid encoder\n");
 		return;
 	}
@@ -428,8 +410,7 @@ static void _dpu_encoder_phys_cmd_pingpong_config(
 	struct dpu_encoder_phys_cmd *cmd_enc =
 		to_dpu_encoder_phys_cmd(phys_enc);
 
-	if (!phys_enc || !phys_enc->hw_ctl || !phys_enc->hw_pp
-			|| !phys_enc->hw_ctl->ops.setup_intf_cfg) {
+	if (!phys_enc->hw_pp || !phys_enc->hw_ctl->ops.setup_intf_cfg) {
 		DPU_ERROR("invalid arg(s), enc %d\n", phys_enc != 0);
 		return;
 	}
@@ -458,7 +439,7 @@ static void dpu_encoder_phys_cmd_enable_helper(
 	struct dpu_hw_ctl *ctl;
 	u32 flush_mask = 0;
 
-	if (!phys_enc || !phys_enc->hw_ctl || !phys_enc->hw_pp) {
+	if (!phys_enc->hw_pp) {
 		DPU_ERROR("invalid arg(s), encoder %d\n", phys_enc != 0);
 		return;
 	}
@@ -480,7 +461,7 @@ static void dpu_encoder_phys_cmd_enable(struct dpu_encoder_phys *phys_enc)
 	struct dpu_encoder_phys_cmd *cmd_enc =
 		to_dpu_encoder_phys_cmd(phys_enc);
 
-	if (!phys_enc || !phys_enc->hw_pp) {
+	if (!phys_enc->hw_pp) {
 		DPU_ERROR("invalid phys encoder\n");
 		return;
 	}
@@ -499,8 +480,7 @@ static void dpu_encoder_phys_cmd_enable(struct dpu_encoder_phys *phys_enc)
 static void _dpu_encoder_phys_cmd_connect_te(
 		struct dpu_encoder_phys *phys_enc, bool enable)
 {
-	if (!phys_enc || !phys_enc->hw_pp ||
-			!phys_enc->hw_pp->ops.connect_external_te)
+	if (!phys_enc->hw_pp || !phys_enc->hw_pp->ops.connect_external_te)
 		return;
 
 	trace_dpu_enc_phys_cmd_connect_te(DRMID(phys_enc->parent), enable);
@@ -518,7 +498,7 @@ static int dpu_encoder_phys_cmd_get_line_count(
 {
 	struct dpu_hw_pingpong *hw_pp;
 
-	if (!phys_enc || !phys_enc->hw_pp)
+	if (!phys_enc->hw_pp)
 		return -EINVAL;
 
 	if (!dpu_encoder_phys_cmd_is_master(phys_enc))
@@ -536,7 +516,7 @@ static void dpu_encoder_phys_cmd_disable(struct dpu_encoder_phys *phys_enc)
 	struct dpu_encoder_phys_cmd *cmd_enc =
 		to_dpu_encoder_phys_cmd(phys_enc);
 
-	if (!phys_enc || !phys_enc->hw_pp) {
+	if (!phys_enc->hw_pp) {
 		DPU_ERROR("invalid encoder\n");
 		return;
 	}
@@ -559,10 +539,6 @@ static void dpu_encoder_phys_cmd_destroy(struct dpu_encoder_phys *phys_enc)
 	struct dpu_encoder_phys_cmd *cmd_enc =
 		to_dpu_encoder_phys_cmd(phys_enc);
 
-	if (!phys_enc) {
-		DPU_ERROR("invalid encoder\n");
-		return;
-	}
 	kfree(cmd_enc);
 }
 
@@ -580,7 +556,7 @@ static void dpu_encoder_phys_cmd_prepare_for_kickoff(
 			to_dpu_encoder_phys_cmd(phys_enc);
 	int ret;
 
-	if (!phys_enc || !phys_enc->hw_pp) {
+	if (!phys_enc->hw_pp) {
 		DPU_ERROR("invalid encoder\n");
 		return;
 	}
@@ -614,11 +590,6 @@ static int _dpu_encoder_phys_cmd_wait_for_ctl_start(
 	struct dpu_encoder_wait_info wait_info;
 	int ret;
 
-	if (!phys_enc || !phys_enc->hw_ctl) {
-		DPU_ERROR("invalid argument(s)\n");
-		return -EINVAL;
-	}
-
 	wait_info.wq = &phys_enc->pending_kickoff_wq;
 	wait_info.atomic_cnt = &phys_enc->pending_ctlstart_cnt;
 	wait_info.timeout_ms = KICKOFF_TIMEOUT_MS;
@@ -639,9 +610,6 @@ static int dpu_encoder_phys_cmd_wait_for_tx_complete(
 {
 	int rc;
 
-	if (!phys_enc)
-		return -EINVAL;
-
 	rc = _dpu_encoder_phys_cmd_wait_for_idle(phys_enc);
 	if (rc) {
 		DRM_ERROR("failed wait_for_idle: id:%u ret:%d intf:%d\n",
@@ -658,9 +626,6 @@ static int dpu_encoder_phys_cmd_wait_for_commit_done(
 	int rc = 0;
 	struct dpu_encoder_phys_cmd *cmd_enc;
 
-	if (!phys_enc)
-		return -EINVAL;
-
 	cmd_enc = to_dpu_encoder_phys_cmd(phys_enc);
 
 	/* only required for master controller */
@@ -681,9 +646,6 @@ static int dpu_encoder_phys_cmd_wait_for_vblank(
 	struct dpu_encoder_phys_cmd *cmd_enc;
 	struct dpu_encoder_wait_info wait_info;
 
-	if (!phys_enc)
-		return -EINVAL;
-
 	cmd_enc = to_dpu_encoder_phys_cmd(phys_enc);
 
 	/* only required for master controller */
@@ -715,9 +677,6 @@ static void dpu_encoder_phys_cmd_handle_post_kickoff(
 static void dpu_encoder_phys_cmd_trigger_start(
 		struct dpu_encoder_phys *phys_enc)
 {
-	if (!phys_enc)
-		return;
-
 	dpu_encoder_helper_trigger_start(phys_enc);
 }
 
@@ -816,6 +775,4 @@ struct dpu_encoder_phys *dpu_encoder_phys_cmd_init(
 	DPU_DEBUG_CMDENC(cmd_enc, "created\n");
 
 	return phys_enc;
-
-	return ERR_PTR(ret);
 }
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
index 3123ef873cdf..c71c18de5966 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
@@ -220,8 +220,7 @@ static bool dpu_encoder_phys_vid_mode_fixup(
 		const struct drm_display_mode *mode,
 		struct drm_display_mode *adj_mode)
 {
-	if (phys_enc)
-		DPU_DEBUG_VIDENC(phys_enc, "\n");
+	DPU_DEBUG_VIDENC(phys_enc, "\n");
 
 	/*
 	 * Modifying mode has consequences when the mode comes back to us
@@ -239,7 +238,7 @@ static void dpu_encoder_phys_vid_setup_timing_engine(
 	unsigned long lock_flags;
 	struct dpu_hw_intf_cfg intf_cfg = { 0 };
 
-	if (!phys_enc || !phys_enc->hw_ctl->ops.setup_intf_cfg) {
+	if (!phys_enc->hw_ctl->ops.setup_intf_cfg) {
 		DPU_ERROR("invalid encoder %d\n", phys_enc != 0);
 		return;
 	}
@@ -280,6 +279,14 @@ static void dpu_encoder_phys_vid_setup_timing_engine(
 	phys_enc->hw_intf->ops.setup_timing_gen(phys_enc->hw_intf,
 			&timing_params, fmt);
 	phys_enc->hw_ctl->ops.setup_intf_cfg(phys_enc->hw_ctl, &intf_cfg);
+
+	/* setup which pp blk will connect to this intf */
+	if (phys_enc->hw_intf->ops.bind_pingpong_blk)
+		phys_enc->hw_intf->ops.bind_pingpong_blk(
+				phys_enc->hw_intf,
+				true,
+				phys_enc->hw_pp->idx);
+
 	spin_unlock_irqrestore(phys_enc->enc_spinlock, lock_flags);
 
 	programmable_fetch_config(phys_enc, &timing_params);
@@ -293,12 +300,7 @@ static void dpu_encoder_phys_vid_vblank_irq(void *arg, int irq_idx)
 	u32 flush_register = 0;
 	int new_cnt = -1, old_cnt = -1;
 
-	if (!phys_enc)
-		return;
-
 	hw_ctl = phys_enc->hw_ctl;
-	if (!hw_ctl)
-		return;
 
 	DPU_ATRACE_BEGIN("vblank_irq");
 
@@ -314,7 +316,7 @@ static void dpu_encoder_phys_vid_vblank_irq(void *arg, int irq_idx)
 	 * so we need to double-check with hw that it accepted the flush bits
 	 */
 	spin_lock_irqsave(phys_enc->enc_spinlock, lock_flags);
-	if (hw_ctl && hw_ctl->ops.get_flush_register)
+	if (hw_ctl->ops.get_flush_register)
 		flush_register = hw_ctl->ops.get_flush_register(hw_ctl);
 
 	if (!(flush_register & hw_ctl->ops.get_pending_flush(hw_ctl)))
@@ -335,9 +337,6 @@ static void dpu_encoder_phys_vid_underrun_irq(void *arg, int irq_idx)
 {
 	struct dpu_encoder_phys *phys_enc = arg;
 
-	if (!phys_enc)
-		return;
-
 	if (phys_enc->parent_ops->handle_underrun_virt)
 		phys_enc->parent_ops->handle_underrun_virt(phys_enc->parent,
 			phys_enc);
@@ -374,11 +373,6 @@ static void dpu_encoder_phys_vid_mode_set(
 		struct drm_display_mode *mode,
 		struct drm_display_mode *adj_mode)
 {
-	if (!phys_enc) {
-		DPU_ERROR("invalid encoder/kms\n");
-		return;
-	}
-
 	if (adj_mode) {
 		phys_enc->cached_mode = *adj_mode;
 		drm_mode_debug_printmodeline(adj_mode);
@@ -395,11 +389,6 @@ static int dpu_encoder_phys_vid_control_vblank_irq(
 	int ret = 0;
 	int refcount;
 
-	if (!phys_enc) {
-		DPU_ERROR("invalid encoder\n");
-		return -EINVAL;
-	}
-
 	refcount = atomic_read(&phys_enc->vblank_refcount);
 
 	/* Slave encoders don't report vblank */
@@ -435,6 +424,7 @@ static void dpu_encoder_phys_vid_enable(struct dpu_encoder_phys *phys_enc)
 {
 	struct dpu_hw_ctl *ctl;
 	u32 flush_mask = 0;
+	u32 intf_flush_mask = 0;
 
 	ctl = phys_enc->hw_ctl;
 
@@ -459,10 +449,18 @@ static void dpu_encoder_phys_vid_enable(struct dpu_encoder_phys *phys_enc)
 	ctl->ops.get_bitmask_intf(ctl, &flush_mask, phys_enc->hw_intf->idx);
 	ctl->ops.update_pending_flush(ctl, flush_mask);
 
+	if (ctl->ops.get_bitmask_active_intf)
+		ctl->ops.get_bitmask_active_intf(ctl, &intf_flush_mask,
+			phys_enc->hw_intf->idx);
+
+	if (ctl->ops.update_pending_intf_flush)
+		ctl->ops.update_pending_intf_flush(ctl, intf_flush_mask);
+
 skip_flush:
 	DPU_DEBUG_VIDENC(phys_enc,
-			 "update pending flush ctl %d flush_mask %x\n",
-			 ctl->idx - CTL_0, flush_mask);
+		"update pending flush ctl %d flush_mask 0%x intf_mask 0x%x\n",
+		ctl->idx - CTL_0, flush_mask, intf_flush_mask);
+
 
 	/* ctl_flush & timing engine enable will be triggered by framework */
 	if (phys_enc->enable_state == DPU_ENC_DISABLED)
@@ -471,11 +469,6 @@ skip_flush:
 
 static void dpu_encoder_phys_vid_destroy(struct dpu_encoder_phys *phys_enc)
 {
-	if (!phys_enc) {
-		DPU_ERROR("invalid encoder\n");
-		return;
-	}
-
 	DPU_DEBUG_VIDENC(phys_enc, "\n");
 	kfree(phys_enc);
 }
@@ -493,11 +486,6 @@ static int dpu_encoder_phys_vid_wait_for_vblank(
 	struct dpu_encoder_wait_info wait_info;
 	int ret;
 
-	if (!phys_enc) {
-		pr_err("invalid encoder\n");
-		return -EINVAL;
-	}
-
 	wait_info.wq = &phys_enc->pending_kickoff_wq;
 	wait_info.atomic_cnt = &phys_enc->pending_kickoff_cnt;
 	wait_info.timeout_ms = KICKOFF_TIMEOUT_MS;
@@ -543,13 +531,8 @@ static void dpu_encoder_phys_vid_prepare_for_kickoff(
 	struct dpu_hw_ctl *ctl;
 	int rc;
 
-	if (!phys_enc) {
-		DPU_ERROR("invalid encoder/parameters\n");
-		return;
-	}
-
 	ctl = phys_enc->hw_ctl;
-	if (!ctl || !ctl->ops.wait_reset_status)
+	if (!ctl->ops.wait_reset_status)
 		return;
 
 	/*
@@ -569,12 +552,12 @@ static void dpu_encoder_phys_vid_disable(struct dpu_encoder_phys *phys_enc)
 	unsigned long lock_flags;
 	int ret;
 
-	if (!phys_enc || !phys_enc->parent || !phys_enc->parent->dev) {
+	if (!phys_enc->parent || !phys_enc->parent->dev) {
 		DPU_ERROR("invalid encoder/device\n");
 		return;
 	}
 
-	if (!phys_enc->hw_intf || !phys_enc->hw_ctl) {
+	if (!phys_enc->hw_intf) {
 		DPU_ERROR("invalid hw_intf %d hw_ctl %d\n",
 				phys_enc->hw_intf != 0, phys_enc->hw_ctl != 0);
 		return;
@@ -639,9 +622,6 @@ static void dpu_encoder_phys_vid_irq_control(struct dpu_encoder_phys *phys_enc,
 {
 	int ret;
 
-	if (!phys_enc)
-		return;
-
 	trace_dpu_enc_phys_vid_irq_ctrl(DRMID(phys_enc->parent),
 			    phys_enc->hw_intf->idx - INTF_0,
 			    enable,
@@ -662,9 +642,6 @@ static void dpu_encoder_phys_vid_irq_control(struct dpu_encoder_phys *phys_enc,
 static int dpu_encoder_phys_vid_get_line_count(
 		struct dpu_encoder_phys *phys_enc)
 {
-	if (!phys_enc)
-		return -EINVAL;
-
 	if (!dpu_encoder_phys_vid_is_master(phys_enc))
 		return -EINVAL;
 
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c
index 24ab6249083a..528632690f1e 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c
@@ -489,12 +489,28 @@ static const struct dpu_format dpu_format_map_ubwc[] = {
 		true, 4, DPU_FORMAT_FLAG_COMPRESSED,
 		DPU_FETCH_UBWC, 2, DPU_TILE_HEIGHT_UBWC),
 
+	/* ARGB8888 and ABGR8888 purposely have the same color
+	 * ordering.  The hardware only supports ABGR8888 UBWC
+	 * natively.
+	 */
+	INTERLEAVED_RGB_FMT_TILED(ARGB8888,
+		COLOR_8BIT, COLOR_8BIT, COLOR_8BIT, COLOR_8BIT,
+		C2_R_Cr, C0_G_Y, C1_B_Cb, C3_ALPHA, 4,
+		true, 4, DPU_FORMAT_FLAG_COMPRESSED,
+		DPU_FETCH_UBWC, 2, DPU_TILE_HEIGHT_UBWC),
+
 	INTERLEAVED_RGB_FMT_TILED(XBGR8888,
 		COLOR_8BIT, COLOR_8BIT, COLOR_8BIT, COLOR_8BIT,
 		C2_R_Cr, C0_G_Y, C1_B_Cb, C3_ALPHA, 4,
 		false, 4, DPU_FORMAT_FLAG_COMPRESSED,
 		DPU_FETCH_UBWC, 2, DPU_TILE_HEIGHT_UBWC),
 
+	INTERLEAVED_RGB_FMT_TILED(XRGB8888,
+		COLOR_8BIT, COLOR_8BIT, COLOR_8BIT, COLOR_8BIT,
+		C2_R_Cr, C0_G_Y, C1_B_Cb, C3_ALPHA, 4,
+		false, 4, DPU_FORMAT_FLAG_COMPRESSED,
+		DPU_FETCH_UBWC, 2, DPU_TILE_HEIGHT_UBWC),
+
 	INTERLEAVED_RGB_FMT_TILED(ABGR2101010,
 		COLOR_8BIT, COLOR_8BIT, COLOR_8BIT, COLOR_8BIT,
 		C2_R_Cr, C0_G_Y, C1_B_Cb, C3_ALPHA, 4,
@@ -550,7 +566,9 @@ static int _dpu_format_get_media_color_ubwc(const struct dpu_format *fmt)
 {
 	static const struct dpu_media_color_map dpu_media_ubwc_map[] = {
 		{DRM_FORMAT_ABGR8888, COLOR_FMT_RGBA8888_UBWC},
+		{DRM_FORMAT_ARGB8888, COLOR_FMT_RGBA8888_UBWC},
 		{DRM_FORMAT_XBGR8888, COLOR_FMT_RGBA8888_UBWC},
+		{DRM_FORMAT_XRGB8888, COLOR_FMT_RGBA8888_UBWC},
 		{DRM_FORMAT_ABGR2101010, COLOR_FMT_RGBA1010102_UBWC},
 		{DRM_FORMAT_XBGR2101010, COLOR_FMT_RGBA1010102_UBWC},
 		{DRM_FORMAT_BGR565, COLOR_FMT_RGB565_UBWC},
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
index 04c8c44f5b9c..c567917541e8 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
@@ -11,11 +11,17 @@
 #include "dpu_hw_catalog_format.h"
 #include "dpu_kms.h"
 
-#define VIG_SDM845_MASK \
-	(BIT(DPU_SSPP_SRC) | BIT(DPU_SSPP_SCALER_QSEED3) | BIT(DPU_SSPP_QOS) |\
+#define VIG_MASK \
+	(BIT(DPU_SSPP_SRC) | BIT(DPU_SSPP_QOS) |\
 	BIT(DPU_SSPP_CSC_10BIT) | BIT(DPU_SSPP_CDP) | BIT(DPU_SSPP_QOS_8LVL) |\
 	BIT(DPU_SSPP_TS_PREFILL) | BIT(DPU_SSPP_EXCL_RECT))
 
+#define VIG_SDM845_MASK \
+	(VIG_MASK | BIT(DPU_SSPP_SCALER_QSEED3))
+
+#define VIG_SC7180_MASK \
+	(VIG_MASK | BIT(DPU_SSPP_SCALER_QSEED4))
+
 #define DMA_SDM845_MASK \
 	(BIT(DPU_SSPP_SRC) | BIT(DPU_SSPP_QOS) | BIT(DPU_SSPP_QOS_8LVL) |\
 	BIT(DPU_SSPP_TS_PREFILL) | BIT(DPU_SSPP_TS_PREFILL_REC1) |\
@@ -27,6 +33,9 @@
 #define MIXER_SDM845_MASK \
 	(BIT(DPU_MIXER_SOURCESPLIT) | BIT(DPU_DIM_LAYER))
 
+#define MIXER_SC7180_MASK \
+	(BIT(DPU_DIM_LAYER))
+
 #define PINGPONG_SDM845_MASK BIT(DPU_PINGPONG_DITHER)
 
 #define PINGPONG_SDM845_SPLIT_MASK \
@@ -58,9 +67,20 @@ static const struct dpu_caps sdm845_dpu_caps = {
 	.has_src_split = true,
 	.has_dim_layer = true,
 	.has_idle_pc = true,
+	.has_3d_merge = true,
+};
+
+static const struct dpu_caps sc7180_dpu_caps = {
+	.max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
+	.max_mixer_blendstages = 0x9,
+	.qseed_type = DPU_SSPP_SCALER_QSEED4,
+	.smart_dma_rev = DPU_SSPP_SMART_DMA_V2,
+	.ubwc_version = DPU_HW_UBWC_VER_20,
+	.has_dim_layer = true,
+	.has_idle_pc = true,
 };
 
-static struct dpu_mdp_cfg sdm845_mdp[] = {
+static const struct dpu_mdp_cfg sdm845_mdp[] = {
 	{
 	.name = "top_0", .id = MDP_TOP,
 	.base = 0x0, .len = 0x45C,
@@ -85,10 +105,27 @@ static struct dpu_mdp_cfg sdm845_mdp[] = {
 	},
 };
 
+static const struct dpu_mdp_cfg sc7180_mdp[] = {
+	{
+	.name = "top_0", .id = MDP_TOP,
+	.base = 0x0, .len = 0x494,
+	.features = 0,
+	.highest_bank_bit = 0x3,
+	.clk_ctrls[DPU_CLK_CTRL_VIG0] = {
+		.reg_off = 0x2AC, .bit_off = 0},
+	.clk_ctrls[DPU_CLK_CTRL_DMA0] = {
+		.reg_off = 0x2AC, .bit_off = 8},
+	.clk_ctrls[DPU_CLK_CTRL_DMA1] = {
+		.reg_off = 0x2B4, .bit_off = 8},
+	.clk_ctrls[DPU_CLK_CTRL_CURSOR0] = {
+		.reg_off = 0x2BC, .bit_off = 8},
+	},
+};
+
 /*************************************************************
  * CTL sub blocks config
  *************************************************************/
-static struct dpu_ctl_cfg sdm845_ctl[] = {
+static const struct dpu_ctl_cfg sdm845_ctl[] = {
 	{
 	.name = "ctl_0", .id = CTL_0,
 	.base = 0x1000, .len = 0xE4,
@@ -116,6 +153,24 @@ static struct dpu_ctl_cfg sdm845_ctl[] = {
 	},
 };
 
+static const struct dpu_ctl_cfg sc7180_ctl[] = {
+	{
+	.name = "ctl_0", .id = CTL_0,
+	.base = 0x1000, .len = 0xE4,
+	.features = BIT(DPU_CTL_ACTIVE_CFG)
+	},
+	{
+	.name = "ctl_1", .id = CTL_1,
+	.base = 0x1200, .len = 0xE4,
+	.features = BIT(DPU_CTL_ACTIVE_CFG)
+	},
+	{
+	.name = "ctl_2", .id = CTL_2,
+	.base = 0x1400, .len = 0xE4,
+	.features = BIT(DPU_CTL_ACTIVE_CFG)
+	},
+};
+
 /*************************************************************
  * SSPP sub blocks config
  *************************************************************/
@@ -128,7 +183,7 @@ static const struct dpu_sspp_blks_common sdm845_sspp_common = {
 	.maxvdeciexp = MAX_VERT_DECIMATION,
 };
 
-#define _VIG_SBLK(num, sdma_pri) \
+#define _VIG_SBLK(num, sdma_pri, qseed_ver) \
 	{ \
 	.common = &sdm845_sspp_common, \
 	.maxdwnscale = MAX_DOWNSCALE_RATIO, \
@@ -137,7 +192,7 @@ static const struct dpu_sspp_blks_common sdm845_sspp_common = {
 	.src_blk = {.name = STRCAT("sspp_src_", num), \
 		.id = DPU_SSPP_SRC, .base = 0x00, .len = 0x150,}, \
 	.scaler_blk = {.name = STRCAT("sspp_scaler", num), \
-		.id = DPU_SSPP_SCALER_QSEED3, \
+		.id = qseed_ver, \
 		.base = 0xa00, .len = 0xa0,}, \
 	.csc_blk = {.name = STRCAT("sspp_csc", num), \
 		.id = DPU_SSPP_CSC_10BIT, \
@@ -162,10 +217,14 @@ static const struct dpu_sspp_blks_common sdm845_sspp_common = {
 	.virt_num_formats = ARRAY_SIZE(plane_formats), \
 	}
 
-static const struct dpu_sspp_sub_blks sdm845_vig_sblk_0 = _VIG_SBLK("0", 5);
-static const struct dpu_sspp_sub_blks sdm845_vig_sblk_1 = _VIG_SBLK("1", 6);
-static const struct dpu_sspp_sub_blks sdm845_vig_sblk_2 = _VIG_SBLK("2", 7);
-static const struct dpu_sspp_sub_blks sdm845_vig_sblk_3 = _VIG_SBLK("3", 8);
+static const struct dpu_sspp_sub_blks sdm845_vig_sblk_0 =
+				_VIG_SBLK("0", 5, DPU_SSPP_SCALER_QSEED3);
+static const struct dpu_sspp_sub_blks sdm845_vig_sblk_1 =
+				_VIG_SBLK("1", 6, DPU_SSPP_SCALER_QSEED3);
+static const struct dpu_sspp_sub_blks sdm845_vig_sblk_2 =
+				_VIG_SBLK("2", 7, DPU_SSPP_SCALER_QSEED3);
+static const struct dpu_sspp_sub_blks sdm845_vig_sblk_3 =
+				_VIG_SBLK("3", 8, DPU_SSPP_SCALER_QSEED3);
 
 static const struct dpu_sspp_sub_blks sdm845_dma_sblk_0 = _DMA_SBLK("8", 1);
 static const struct dpu_sspp_sub_blks sdm845_dma_sblk_1 = _DMA_SBLK("9", 2);
@@ -184,7 +243,7 @@ static const struct dpu_sspp_sub_blks sdm845_dma_sblk_3 = _DMA_SBLK("11", 4);
 	.clk_ctrl = _clkctrl \
 	}
 
-static struct dpu_sspp_cfg sdm845_sspp[] = {
+static const struct dpu_sspp_cfg sdm845_sspp[] = {
 	SSPP_BLK("sspp_0", SSPP_VIG0, 0x4000, VIG_SDM845_MASK,
 		sdm845_vig_sblk_0, 0,  SSPP_TYPE_VIG, DPU_CLK_CTRL_VIG0),
 	SSPP_BLK("sspp_1", SSPP_VIG1, 0x6000, VIG_SDM845_MASK,
@@ -203,9 +262,26 @@ static struct dpu_sspp_cfg sdm845_sspp[] = {
 		sdm845_dma_sblk_3, 13, SSPP_TYPE_DMA, DPU_CLK_CTRL_CURSOR1),
 };
 
+static const struct dpu_sspp_sub_blks sc7180_vig_sblk_0 =
+				_VIG_SBLK("0", 4, DPU_SSPP_SCALER_QSEED4);
+
+static const struct dpu_sspp_cfg sc7180_sspp[] = {
+	SSPP_BLK("sspp_0", SSPP_VIG0, 0x4000, VIG_SC7180_MASK,
+		sc7180_vig_sblk_0, 0,  SSPP_TYPE_VIG, DPU_CLK_CTRL_VIG0),
+	SSPP_BLK("sspp_8", SSPP_DMA0, 0x24000,  DMA_SDM845_MASK,
+		sdm845_dma_sblk_0, 1, SSPP_TYPE_DMA, DPU_CLK_CTRL_DMA0),
+	SSPP_BLK("sspp_9", SSPP_DMA1, 0x26000,  DMA_SDM845_MASK,
+		sdm845_dma_sblk_1, 5, SSPP_TYPE_DMA, DPU_CLK_CTRL_DMA1),
+	SSPP_BLK("sspp_10", SSPP_DMA2, 0x28000,  DMA_CURSOR_SDM845_MASK,
+		sdm845_dma_sblk_2, 9, SSPP_TYPE_DMA, DPU_CLK_CTRL_CURSOR0),
+};
+
 /*************************************************************
  * MIXER sub blocks config
  *************************************************************/
+
+/* SDM845 */
+
 static const struct dpu_lm_sub_blks sdm845_lm_sblk = {
 	.maxwidth = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
 	.maxblendstages = 11, /* excluding base layer */
@@ -215,23 +291,46 @@ static const struct dpu_lm_sub_blks sdm845_lm_sblk = {
 	},
 };
 
-#define LM_BLK(_name, _id, _base, _pp, _lmpair) \
+#define LM_BLK(_name, _id, _base, _fmask, _sblk, _pp, _lmpair) \
 	{ \
 	.name = _name, .id = _id, \
 	.base = _base, .len = 0x320, \
-	.features = MIXER_SDM845_MASK, \
-	.sblk = &sdm845_lm_sblk, \
+	.features = _fmask, \
+	.sblk = _sblk, \
 	.pingpong = _pp, \
 	.lm_pair_mask = (1 << _lmpair) \
 	}
 
-static struct dpu_lm_cfg sdm845_lm[] = {
-	LM_BLK("lm_0", LM_0, 0x44000, PINGPONG_0, LM_1),
-	LM_BLK("lm_1", LM_1, 0x45000, PINGPONG_1, LM_0),
-	LM_BLK("lm_2", LM_2, 0x46000, PINGPONG_2, LM_5),
-	LM_BLK("lm_3", LM_3, 0x0, PINGPONG_MAX, 0),
-	LM_BLK("lm_4", LM_4, 0x0, PINGPONG_MAX, 0),
-	LM_BLK("lm_5", LM_5, 0x49000, PINGPONG_3, LM_2),
+static const struct dpu_lm_cfg sdm845_lm[] = {
+	LM_BLK("lm_0", LM_0, 0x44000, MIXER_SDM845_MASK,
+		&sdm845_lm_sblk, PINGPONG_0, LM_1),
+	LM_BLK("lm_1", LM_1, 0x45000, MIXER_SDM845_MASK,
+		&sdm845_lm_sblk, PINGPONG_1, LM_0),
+	LM_BLK("lm_2", LM_2, 0x46000, MIXER_SDM845_MASK,
+		&sdm845_lm_sblk, PINGPONG_2, LM_5),
+	LM_BLK("lm_3", LM_3, 0x0, MIXER_SDM845_MASK,
+		&sdm845_lm_sblk, PINGPONG_MAX, 0),
+	LM_BLK("lm_4", LM_4, 0x0, MIXER_SDM845_MASK,
+		&sdm845_lm_sblk, PINGPONG_MAX, 0),
+	LM_BLK("lm_5", LM_5, 0x49000, MIXER_SDM845_MASK,
+		&sdm845_lm_sblk, PINGPONG_3, LM_2),
+};
+
+/* SC7180 */
+
+static const struct dpu_lm_sub_blks sc7180_lm_sblk = {
+	.maxwidth = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
+	.maxblendstages = 7, /* excluding base layer */
+	.blendstage_base = { /* offsets relative to mixer base */
+		0x20, 0x38, 0x50, 0x68, 0x80, 0x98, 0xb0
+	},
+};
+
+static const struct dpu_lm_cfg sc7180_lm[] = {
+	LM_BLK("lm_0", LM_0, 0x44000, MIXER_SC7180_MASK,
+		&sc7180_lm_sblk, PINGPONG_0, LM_1),
+	LM_BLK("lm_1", LM_1, 0x45000, MIXER_SC7180_MASK,
+		&sc7180_lm_sblk, PINGPONG_1, LM_0),
 };
 
 /*************************************************************
@@ -264,13 +363,18 @@ static const struct dpu_pingpong_sub_blks sdm845_pp_sblk = {
 	.sblk = &sdm845_pp_sblk \
 	}
 
-static struct dpu_pingpong_cfg sdm845_pp[] = {
+static const struct dpu_pingpong_cfg sdm845_pp[] = {
 	PP_BLK_TE("pingpong_0", PINGPONG_0, 0x70000),
 	PP_BLK_TE("pingpong_1", PINGPONG_1, 0x70800),
 	PP_BLK("pingpong_2", PINGPONG_2, 0x71000),
 	PP_BLK("pingpong_3", PINGPONG_3, 0x71800),
 };
 
+static struct dpu_pingpong_cfg sc7180_pp[] = {
+	PP_BLK_TE("pingpong_0", PINGPONG_0, 0x70000),
+	PP_BLK_TE("pingpong_1", PINGPONG_1, 0x70800),
+};
+
 /*************************************************************
  * INTF sub blocks config
  *************************************************************/
@@ -278,26 +382,32 @@ static struct dpu_pingpong_cfg sdm845_pp[] = {
 	{\
 	.name = _name, .id = _id, \
 	.base = _base, .len = 0x280, \
+	.features = BIT(DPU_CTL_ACTIVE_CFG), \
 	.type = _type, \
 	.controller_id = _ctrl_id, \
 	.prog_fetch_lines_worst_case = 24 \
 	}
 
-static struct dpu_intf_cfg sdm845_intf[] = {
+static const struct dpu_intf_cfg sdm845_intf[] = {
 	INTF_BLK("intf_0", INTF_0, 0x6A000, INTF_DP, 0),
 	INTF_BLK("intf_1", INTF_1, 0x6A800, INTF_DSI, 0),
 	INTF_BLK("intf_2", INTF_2, 0x6B000, INTF_DSI, 1),
 	INTF_BLK("intf_3", INTF_3, 0x6B800, INTF_DP, 1),
 };
 
+static const struct dpu_intf_cfg sc7180_intf[] = {
+	INTF_BLK("intf_0", INTF_0, 0x6A000, INTF_DP, 0),
+	INTF_BLK("intf_1", INTF_1, 0x6A800, INTF_DSI, 0),
+};
+
 /*************************************************************
  * VBIF sub blocks config
  *************************************************************/
 /* VBIF QOS remap */
-static u32 sdm845_rt_pri_lvl[] = {3, 3, 4, 4, 5, 5, 6, 6};
-static u32 sdm845_nrt_pri_lvl[] = {3, 3, 3, 3, 3, 3, 3, 3};
+static const u32 sdm845_rt_pri_lvl[] = {3, 3, 4, 4, 5, 5, 6, 6};
+static const u32 sdm845_nrt_pri_lvl[] = {3, 3, 3, 3, 3, 3, 3, 3};
 
-static struct dpu_vbif_cfg sdm845_vbif[] = {
+static const struct dpu_vbif_cfg sdm845_vbif[] = {
 	{
 	.name = "vbif_0", .id = VBIF_0,
 	.base = 0, .len = 0x1040,
@@ -316,7 +426,7 @@ static struct dpu_vbif_cfg sdm845_vbif[] = {
 	},
 };
 
-static struct dpu_reg_dma_cfg sdm845_regdma = {
+static const struct dpu_reg_dma_cfg sdm845_regdma = {
 	.base = 0x0, .version = 0x1, .trigger_sel_off = 0x119c
 };
 
@@ -325,7 +435,7 @@ static struct dpu_reg_dma_cfg sdm845_regdma = {
  *************************************************************/
 
 /* SSPP QOS LUTs */
-static struct dpu_qos_lut_entry sdm845_qos_linear[] = {
+static const struct dpu_qos_lut_entry sdm845_qos_linear[] = {
 	{.fl = 4, .lut = 0x357},
 	{.fl = 5, .lut = 0x3357},
 	{.fl = 6, .lut = 0x23357},
@@ -340,7 +450,11 @@ static struct dpu_qos_lut_entry sdm845_qos_linear[] = {
 	{.fl = 0, .lut = 0x11222222223357}
 };
 
-static struct dpu_qos_lut_entry sdm845_qos_macrotile[] = {
+static const struct dpu_qos_lut_entry sc7180_qos_linear[] = {
+	{.fl = 0, .lut = 0x0011222222335777},
+};
+
+static const struct dpu_qos_lut_entry sdm845_qos_macrotile[] = {
 	{.fl = 10, .lut = 0x344556677},
 	{.fl = 11, .lut = 0x3344556677},
 	{.fl = 12, .lut = 0x23344556677},
@@ -349,11 +463,19 @@ static struct dpu_qos_lut_entry sdm845_qos_macrotile[] = {
 	{.fl = 0, .lut = 0x112233344556677},
 };
 
-static struct dpu_qos_lut_entry sdm845_qos_nrt[] = {
+static const struct dpu_qos_lut_entry sc7180_qos_macrotile[] = {
+	{.fl = 0, .lut = 0x0011223344556677},
+};
+
+static const struct dpu_qos_lut_entry sdm845_qos_nrt[] = {
+	{.fl = 0, .lut = 0x0},
+};
+
+static const struct dpu_qos_lut_entry sc7180_qos_nrt[] = {
 	{.fl = 0, .lut = 0x0},
 };
 
-static struct dpu_perf_cfg sdm845_perf_data = {
+static const struct dpu_perf_cfg sdm845_perf_data = {
 	.max_bw_low = 6800000,
 	.max_bw_high = 6800000,
 	.min_core_ib = 2400000,
@@ -392,6 +514,30 @@ static struct dpu_perf_cfg sdm845_perf_data = {
 	},
 };
 
+static const struct dpu_perf_cfg sc7180_perf_data = {
+	.max_bw_low = 3900000,
+	.max_bw_high = 5500000,
+	.min_core_ib = 2400000,
+	.min_llcc_ib = 800000,
+	.min_dram_ib = 800000,
+	.danger_lut_tbl = {0xff, 0xffff, 0x0},
+	.qos_lut_tbl = {
+		{.nentry = ARRAY_SIZE(sc7180_qos_linear),
+		.entries = sc7180_qos_linear
+		},
+		{.nentry = ARRAY_SIZE(sc7180_qos_macrotile),
+		.entries = sc7180_qos_macrotile
+		},
+		{.nentry = ARRAY_SIZE(sc7180_qos_nrt),
+		.entries = sc7180_qos_nrt
+		},
+	},
+	.cdp_cfg = {
+		{.rd_enable = 1, .wr_enable = 1},
+		{.rd_enable = 1, .wr_enable = 0}
+	},
+};
+
 /*************************************************************
  * Hardware catalog init
  *************************************************************/
@@ -421,12 +567,43 @@ static void sdm845_cfg_init(struct dpu_mdss_cfg *dpu_cfg)
 		.reg_dma_count = 1,
 		.dma_cfg = sdm845_regdma,
 		.perf = sdm845_perf_data,
+		.mdss_irqs = 0x3ff,
+	};
+}
+
+/*
+ * sc7180_cfg_init(): populate sc7180 dpu sub-blocks reg offsets
+ * and instance counts.
+ */
+static void sc7180_cfg_init(struct dpu_mdss_cfg *dpu_cfg)
+{
+	*dpu_cfg = (struct dpu_mdss_cfg){
+		.caps = &sc7180_dpu_caps,
+		.mdp_count = ARRAY_SIZE(sc7180_mdp),
+		.mdp = sc7180_mdp,
+		.ctl_count = ARRAY_SIZE(sc7180_ctl),
+		.ctl = sc7180_ctl,
+		.sspp_count = ARRAY_SIZE(sc7180_sspp),
+		.sspp = sc7180_sspp,
+		.mixer_count = ARRAY_SIZE(sc7180_lm),
+		.mixer = sc7180_lm,
+		.pingpong_count = ARRAY_SIZE(sc7180_pp),
+		.pingpong = sc7180_pp,
+		.intf_count = ARRAY_SIZE(sc7180_intf),
+		.intf = sc7180_intf,
+		.vbif_count = ARRAY_SIZE(sdm845_vbif),
+		.vbif = sdm845_vbif,
+		.reg_dma_count = 1,
+		.dma_cfg = sdm845_regdma,
+		.perf = sc7180_perf_data,
+		.mdss_irqs = 0x3f,
 	};
 }
 
-static struct dpu_mdss_hw_cfg_handler cfg_handler[] = {
+static const struct dpu_mdss_hw_cfg_handler cfg_handler[] = {
 	{ .hw_rev = DPU_HW_VER_400, .cfg_init = sdm845_cfg_init},
 	{ .hw_rev = DPU_HW_VER_401, .cfg_init = sdm845_cfg_init},
+	{ .hw_rev = DPU_HW_VER_620, .cfg_init = sc7180_cfg_init},
 };
 
 void dpu_hw_catalog_deinit(struct dpu_mdss_cfg *dpu_cfg)
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
index ec76b8687a98..09df7d87dd43 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
@@ -38,6 +38,7 @@
 #define DPU_HW_VER_401	DPU_HW_VER(4, 0, 1) /* sdm845 v2.0 */
 #define DPU_HW_VER_410	DPU_HW_VER(4, 1, 0) /* sdm670 v1.0 */
 #define DPU_HW_VER_500	DPU_HW_VER(5, 0, 0) /* sdm855 v1.0 */
+#define DPU_HW_VER_620	DPU_HW_VER(6, 2, 0) /* sc7180 v1.0 */
 
 
 #define IS_MSM8996_TARGET(rev) IS_DPU_MAJOR_MINOR_SAME((rev), DPU_HW_VER_170)
@@ -45,6 +46,7 @@
 #define IS_SDM845_TARGET(rev) IS_DPU_MAJOR_MINOR_SAME((rev), DPU_HW_VER_400)
 #define IS_SDM670_TARGET(rev) IS_DPU_MAJOR_MINOR_SAME((rev), DPU_HW_VER_410)
 #define IS_SDM855_TARGET(rev) IS_DPU_MAJOR_MINOR_SAME((rev), DPU_HW_VER_500)
+#define IS_SC7180_TARGET(rev) IS_DPU_MAJOR_MINOR_SAME((rev), DPU_HW_VER_620)
 
 
 #define DPU_HW_BLK_NAME_LEN	16
@@ -92,6 +94,7 @@ enum {
  * @DPU_SSPP_SRC             Src and fetch part of the pipes,
  * @DPU_SSPP_SCALER_QSEED2,  QSEED2 algorithm support
  * @DPU_SSPP_SCALER_QSEED3,  QSEED3 alogorithm support
+ * @DPU_SSPP_SCALER_QSEED4,  QSEED4 algorithm support
  * @DPU_SSPP_SCALER_RGB,     RGB Scaler, supported by RGB pipes
  * @DPU_SSPP_CSC,            Support of Color space converion
  * @DPU_SSPP_CSC_10BIT,      Support of 10-bit Color space conversion
@@ -110,6 +113,7 @@ enum {
 	DPU_SSPP_SRC = 0x1,
 	DPU_SSPP_SCALER_QSEED2,
 	DPU_SSPP_SCALER_QSEED3,
+	DPU_SSPP_SCALER_QSEED4,
 	DPU_SSPP_SCALER_RGB,
 	DPU_SSPP_CSC,
 	DPU_SSPP_CSC_10BIT,
@@ -166,6 +170,7 @@ enum {
  */
 enum {
 	DPU_CTL_SPLIT_DISPLAY = 0x1,
+	DPU_CTL_ACTIVE_CFG,
 	DPU_CTL_MAX
 };
 
@@ -269,7 +274,7 @@ struct dpu_qos_lut_entry {
  */
 struct dpu_qos_lut_tbl {
 	u32 nentry;
-	struct dpu_qos_lut_entry *entries;
+	const struct dpu_qos_lut_entry *entries;
 };
 
 /**
@@ -283,6 +288,7 @@ struct dpu_qos_lut_tbl {
  * @has_src_split      source split feature status
  * @has_dim_layer      dim layer feature status
  * @has_idle_pc        indicate if idle power collapse feature is supported
+ * @has_3d_merge       indicate if 3D merge is supported
  */
 struct dpu_caps {
 	u32 max_mixer_width;
@@ -293,6 +299,7 @@ struct dpu_caps {
 	bool has_src_split;
 	bool has_dim_layer;
 	bool has_idle_pc;
+	bool has_3d_merge;
 };
 
 /**
@@ -320,6 +327,7 @@ struct dpu_sspp_blks_common {
  * @maxupscale:  maxupscale ratio supported
  * @smart_dma_priority: hw priority of rect1 of multirect pipe
  * @max_per_pipe_bw: maximum allowable bandwidth of this pipe in kBps
+ * @qseed_ver: qseed version
  * @src_blk:
  * @scaler_blk:
  * @csc_blk:
@@ -340,6 +348,7 @@ struct dpu_sspp_sub_blks {
 	u32 maxupscale;
 	u32 smart_dma_priority;
 	u32 max_per_pipe_bw;
+	u32 qseed_ver;
 	struct dpu_src_blk src_blk;
 	struct dpu_scaler_blk scaler_blk;
 	struct dpu_pp_blk csc_blk;
@@ -511,7 +520,7 @@ struct dpu_vbif_dynamic_ot_cfg {
  */
 struct dpu_vbif_dynamic_ot_tbl {
 	u32 count;
-	struct dpu_vbif_dynamic_ot_cfg *cfg;
+	const struct dpu_vbif_dynamic_ot_cfg *cfg;
 };
 
 /**
@@ -521,7 +530,7 @@ struct dpu_vbif_dynamic_ot_tbl {
  */
 struct dpu_vbif_qos_tbl {
 	u32 npriority_lvl;
-	u32 *priority_lvl;
+	const u32 *priority_lvl;
 };
 
 /**
@@ -646,6 +655,7 @@ struct dpu_perf_cfg {
  * @dma_formats        Supported formats for dma pipe
  * @cursor_formats     Supported formats for cursor pipe
  * @vig_formats        Supported formats for vig pipe
+ * @mdss_irqs:         Bitmap with the irqs supported by the target
  */
 struct dpu_mdss_cfg {
 	u32 hwversion;
@@ -653,25 +663,25 @@ struct dpu_mdss_cfg {
 	const struct dpu_caps *caps;
 
 	u32 mdp_count;
-	struct dpu_mdp_cfg *mdp;
+	const struct dpu_mdp_cfg *mdp;
 
 	u32 ctl_count;
-	struct dpu_ctl_cfg *ctl;
+	const struct dpu_ctl_cfg *ctl;
 
 	u32 sspp_count;
-	struct dpu_sspp_cfg *sspp;
+	const struct dpu_sspp_cfg *sspp;
 
 	u32 mixer_count;
-	struct dpu_lm_cfg *mixer;
+	const struct dpu_lm_cfg *mixer;
 
 	u32 pingpong_count;
-	struct dpu_pingpong_cfg *pingpong;
+	const struct dpu_pingpong_cfg *pingpong;
 
 	u32 intf_count;
-	struct dpu_intf_cfg *intf;
+	const struct dpu_intf_cfg *intf;
 
 	u32 vbif_count;
-	struct dpu_vbif_cfg *vbif;
+	const struct dpu_vbif_cfg *vbif;
 
 	u32 reg_dma_count;
 	struct dpu_reg_dma_cfg dma_cfg;
@@ -681,9 +691,11 @@ struct dpu_mdss_cfg {
 	/* Add additional block data structures here */
 
 	struct dpu_perf_cfg perf;
-	struct dpu_format_extended *dma_formats;
-	struct dpu_format_extended *cursor_formats;
-	struct dpu_format_extended *vig_formats;
+	const struct dpu_format_extended *dma_formats;
+	const struct dpu_format_extended *cursor_formats;
+	const struct dpu_format_extended *vig_formats;
+
+	unsigned long mdss_irqs;
 };
 
 struct dpu_mdss_hw_cfg_handler {
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog_format.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog_format.h
index bb6112c949ae..3766f0fd0bf0 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog_format.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog_format.h
@@ -6,8 +6,12 @@
 
 static const uint32_t qcom_compressed_supported_formats[] = {
 	DRM_FORMAT_ABGR8888,
+	DRM_FORMAT_ARGB8888,
 	DRM_FORMAT_XBGR8888,
+	DRM_FORMAT_XRGB8888,
 	DRM_FORMAT_BGR565,
+
+	DRM_FORMAT_NV12,
 };
 
 static const uint32_t plane_formats[] = {
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c
index 179e8d52cadb..831e5f7a9b7f 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c
@@ -22,14 +22,18 @@
 #define   CTL_PREPARE                   0x0d0
 #define   CTL_SW_RESET                  0x030
 #define   CTL_LAYER_EXTN_OFFSET         0x40
+#define   CTL_INTF_ACTIVE               0x0F4
+#define   CTL_INTF_FLUSH                0x110
+#define   CTL_INTF_MASTER               0x134
 
 #define CTL_MIXER_BORDER_OUT            BIT(24)
 #define CTL_FLUSH_MASK_CTL              BIT(17)
 
 #define DPU_REG_RESET_TIMEOUT_US        2000
+#define  INTF_IDX       31
 
-static struct dpu_ctl_cfg *_ctl_offset(enum dpu_ctl ctl,
-		struct dpu_mdss_cfg *m,
+static const struct dpu_ctl_cfg *_ctl_offset(enum dpu_ctl ctl,
+		const struct dpu_mdss_cfg *m,
 		void __iomem *addr,
 		struct dpu_hw_blk_reg_map *b)
 {
@@ -100,11 +104,27 @@ static inline void dpu_hw_ctl_update_pending_flush(struct dpu_hw_ctl *ctx,
 	ctx->pending_flush_mask |= flushbits;
 }
 
+static inline void dpu_hw_ctl_update_pending_intf_flush(struct dpu_hw_ctl *ctx,
+		u32 flushbits)
+{
+	ctx->pending_intf_flush_mask |= flushbits;
+}
+
 static u32 dpu_hw_ctl_get_pending_flush(struct dpu_hw_ctl *ctx)
 {
 	return ctx->pending_flush_mask;
 }
 
+static inline void dpu_hw_ctl_trigger_flush_v1(struct dpu_hw_ctl *ctx)
+{
+
+	if (ctx->pending_flush_mask & BIT(INTF_IDX))
+		DPU_REG_WRITE(&ctx->hw, CTL_INTF_FLUSH,
+				ctx->pending_intf_flush_mask);
+
+	DPU_REG_WRITE(&ctx->hw, CTL_FLUSH, ctx->pending_flush_mask);
+}
+
 static inline void dpu_hw_ctl_trigger_flush(struct dpu_hw_ctl *ctx)
 {
 	trace_dpu_hw_ctl_trigger_pending_flush(ctx->pending_flush_mask,
@@ -222,6 +242,36 @@ static int dpu_hw_ctl_get_bitmask_intf(struct dpu_hw_ctl *ctx,
 	return 0;
 }
 
+static int dpu_hw_ctl_get_bitmask_intf_v1(struct dpu_hw_ctl *ctx,
+		u32 *flushbits, enum dpu_intf intf)
+{
+	switch (intf) {
+	case INTF_0:
+	case INTF_1:
+		*flushbits |= BIT(31);
+		break;
+	default:
+		return 0;
+	}
+	return 0;
+}
+
+static int dpu_hw_ctl_active_get_bitmask_intf(struct dpu_hw_ctl *ctx,
+		u32 *flushbits, enum dpu_intf intf)
+{
+	switch (intf) {
+	case INTF_0:
+		*flushbits |= BIT(0);
+		break;
+	case INTF_1:
+		*flushbits |= BIT(1);
+		break;
+	default:
+		return 0;
+	}
+	return 0;
+}
+
 static u32 dpu_hw_ctl_poll_reset_status(struct dpu_hw_ctl *ctx, u32 timeout_us)
 {
 	struct dpu_hw_blk_reg_map *c = &ctx->hw;
@@ -422,6 +472,24 @@ exit:
 	DPU_REG_WRITE(c, CTL_LAYER_EXT3(lm), mixercfg_ext3);
 }
 
+
+static void dpu_hw_ctl_intf_cfg_v1(struct dpu_hw_ctl *ctx,
+		struct dpu_hw_intf_cfg *cfg)
+{
+	struct dpu_hw_blk_reg_map *c = &ctx->hw;
+	u32 intf_active = 0;
+	u32 mode_sel = 0;
+
+	if (cfg->intf_mode_sel == DPU_CTL_MODE_SEL_CMD)
+		mode_sel |= BIT(17);
+
+	intf_active = DPU_REG_READ(c, CTL_INTF_ACTIVE);
+	intf_active |= BIT(cfg->intf - INTF_0);
+
+	DPU_REG_WRITE(c, CTL_TOP, mode_sel);
+	DPU_REG_WRITE(c, CTL_INTF_ACTIVE, intf_active);
+}
+
 static void dpu_hw_ctl_intf_cfg(struct dpu_hw_ctl *ctx,
 		struct dpu_hw_intf_cfg *cfg)
 {
@@ -455,31 +523,41 @@ static void dpu_hw_ctl_intf_cfg(struct dpu_hw_ctl *ctx,
 static void _setup_ctl_ops(struct dpu_hw_ctl_ops *ops,
 		unsigned long cap)
 {
+	if (cap & BIT(DPU_CTL_ACTIVE_CFG)) {
+		ops->trigger_flush = dpu_hw_ctl_trigger_flush_v1;
+		ops->setup_intf_cfg = dpu_hw_ctl_intf_cfg_v1;
+		ops->get_bitmask_intf = dpu_hw_ctl_get_bitmask_intf_v1;
+		ops->get_bitmask_active_intf =
+			dpu_hw_ctl_active_get_bitmask_intf;
+		ops->update_pending_intf_flush =
+			dpu_hw_ctl_update_pending_intf_flush;
+	} else {
+		ops->trigger_flush = dpu_hw_ctl_trigger_flush;
+		ops->setup_intf_cfg = dpu_hw_ctl_intf_cfg;
+		ops->get_bitmask_intf = dpu_hw_ctl_get_bitmask_intf;
+	}
 	ops->clear_pending_flush = dpu_hw_ctl_clear_pending_flush;
 	ops->update_pending_flush = dpu_hw_ctl_update_pending_flush;
 	ops->get_pending_flush = dpu_hw_ctl_get_pending_flush;
-	ops->trigger_flush = dpu_hw_ctl_trigger_flush;
 	ops->get_flush_register = dpu_hw_ctl_get_flush_register;
 	ops->trigger_start = dpu_hw_ctl_trigger_start;
 	ops->trigger_pending = dpu_hw_ctl_trigger_pending;
-	ops->setup_intf_cfg = dpu_hw_ctl_intf_cfg;
 	ops->reset = dpu_hw_ctl_reset_control;
 	ops->wait_reset_status = dpu_hw_ctl_wait_reset_status;
 	ops->clear_all_blendstages = dpu_hw_ctl_clear_all_blendstages;
 	ops->setup_blendstage = dpu_hw_ctl_setup_blendstage;
 	ops->get_bitmask_sspp = dpu_hw_ctl_get_bitmask_sspp;
 	ops->get_bitmask_mixer = dpu_hw_ctl_get_bitmask_mixer;
-	ops->get_bitmask_intf = dpu_hw_ctl_get_bitmask_intf;
 };
 
 static struct dpu_hw_blk_ops dpu_hw_ops;
 
 struct dpu_hw_ctl *dpu_hw_ctl_init(enum dpu_ctl idx,
 		void __iomem *addr,
-		struct dpu_mdss_cfg *m)
+		const struct dpu_mdss_cfg *m)
 {
 	struct dpu_hw_ctl *c;
-	struct dpu_ctl_cfg *cfg;
+	const struct dpu_ctl_cfg *cfg;
 
 	c = kzalloc(sizeof(*c), GFP_KERNEL);
 	if (!c)
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h
index d3ae939ef9f8..09e1263c72e2 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h
@@ -91,6 +91,15 @@ struct dpu_hw_ctl_ops {
 		u32 flushbits);
 
 	/**
+	 * OR in the given flushbits to the cached pending_intf_flush_mask
+	 * No effect on hardware
+	 * @ctx       : ctl path ctx pointer
+	 * @flushbits : module flushmask
+	 */
+	void (*update_pending_intf_flush)(struct dpu_hw_ctl *ctx,
+		u32 flushbits);
+
+	/**
 	 * Write the value of the pending_flush_mask to hardware
 	 * @ctx       : ctl path ctx pointer
 	 */
@@ -130,11 +139,24 @@ struct dpu_hw_ctl_ops {
 	uint32_t (*get_bitmask_mixer)(struct dpu_hw_ctl *ctx,
 		enum dpu_lm blk);
 
+	/**
+	 * Query the value of the intf flush mask
+	 * No effect on hardware
+	 * @ctx       : ctl path ctx pointer
+	 */
 	int (*get_bitmask_intf)(struct dpu_hw_ctl *ctx,
 		u32 *flushbits,
 		enum dpu_intf blk);
 
 	/**
+	 * Query the value of the intf active flush mask
+	 * No effect on hardware
+	 * @ctx       : ctl path ctx pointer
+	 */
+	int (*get_bitmask_active_intf)(struct dpu_hw_ctl *ctx,
+		u32 *flushbits, enum dpu_intf blk);
+
+	/**
 	 * Set all blend stages to disabled
 	 * @ctx       : ctl path ctx pointer
 	 */
@@ -159,6 +181,7 @@ struct dpu_hw_ctl_ops {
  * @mixer_count: number of mixers
  * @mixer_hw_caps: mixer hardware capabilities
  * @pending_flush_mask: storage for pending ctl_flush managed via ops
+ * @pending_intf_flush_mask: pending INTF flush
  * @ops: operation list
  */
 struct dpu_hw_ctl {
@@ -171,6 +194,7 @@ struct dpu_hw_ctl {
 	int mixer_count;
 	const struct dpu_lm_cfg *mixer_hw_caps;
 	u32 pending_flush_mask;
+	u32 pending_intf_flush_mask;
 
 	/* ops */
 	struct dpu_hw_ctl_ops ops;
@@ -195,7 +219,7 @@ static inline struct dpu_hw_ctl *to_dpu_hw_ctl(struct dpu_hw_blk *hw)
  */
 struct dpu_hw_ctl *dpu_hw_ctl_init(enum dpu_ctl idx,
 		void __iomem *addr,
-		struct dpu_mdss_cfg *m);
+		const struct dpu_mdss_cfg *m);
 
 /**
  * dpu_hw_ctl_destroy(): Destroys ctl driver context
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c
index 8bfa7d0eede6..d84a84f7fe1a 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c
@@ -800,8 +800,8 @@ static void dpu_hw_intr_dispatch_irq(struct dpu_hw_intr *intr,
 		start_idx = reg_idx * 32;
 		end_idx = start_idx + 32;
 
-		if (start_idx >= ARRAY_SIZE(dpu_irq_map) ||
-				end_idx > ARRAY_SIZE(dpu_irq_map))
+		if (!test_bit(reg_idx, &intr->irq_mask) ||
+			start_idx >= ARRAY_SIZE(dpu_irq_map))
 			continue;
 
 		/*
@@ -955,8 +955,11 @@ static int dpu_hw_intr_clear_irqs(struct dpu_hw_intr *intr)
 	if (!intr)
 		return -EINVAL;
 
-	for (i = 0; i < ARRAY_SIZE(dpu_intr_set); i++)
-		DPU_REG_WRITE(&intr->hw, dpu_intr_set[i].clr_off, 0xffffffff);
+	for (i = 0; i < ARRAY_SIZE(dpu_intr_set); i++) {
+		if (test_bit(i, &intr->irq_mask))
+			DPU_REG_WRITE(&intr->hw,
+					dpu_intr_set[i].clr_off, 0xffffffff);
+	}
 
 	/* ensure register writes go through */
 	wmb();
@@ -971,8 +974,11 @@ static int dpu_hw_intr_disable_irqs(struct dpu_hw_intr *intr)
 	if (!intr)
 		return -EINVAL;
 
-	for (i = 0; i < ARRAY_SIZE(dpu_intr_set); i++)
-		DPU_REG_WRITE(&intr->hw, dpu_intr_set[i].en_off, 0x00000000);
+	for (i = 0; i < ARRAY_SIZE(dpu_intr_set); i++) {
+		if (test_bit(i, &intr->irq_mask))
+			DPU_REG_WRITE(&intr->hw,
+					dpu_intr_set[i].en_off, 0x00000000);
+	}
 
 	/* ensure register writes go through */
 	wmb();
@@ -991,6 +997,9 @@ static void dpu_hw_intr_get_interrupt_statuses(struct dpu_hw_intr *intr)
 
 	spin_lock_irqsave(&intr->irq_lock, irq_flags);
 	for (i = 0; i < ARRAY_SIZE(dpu_intr_set); i++) {
+		if (!test_bit(i, &intr->irq_mask))
+			continue;
+
 		/* Read interrupt status */
 		intr->save_irq_status[i] = DPU_REG_READ(&intr->hw,
 				dpu_intr_set[i].status_off);
@@ -1115,6 +1124,7 @@ struct dpu_hw_intr *dpu_hw_intr_init(void __iomem *addr,
 		return ERR_PTR(-ENOMEM);
 	}
 
+	intr->irq_mask = m->mdss_irqs;
 	spin_lock_init(&intr->irq_lock);
 
 	return intr;
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h
index 4edcf402dc46..fc9c98617281 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h
@@ -187,6 +187,7 @@ struct dpu_hw_intr {
 	u32 *save_irq_status;
 	u32 irq_idx_tbl_size;
 	spinlock_t irq_lock;
+	unsigned long irq_mask;
 };
 
 /**
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c
index dcd87cda13fe..efe9a5719c6b 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c
@@ -56,8 +56,10 @@
 #define   INTF_FRAME_COUNT              0x0AC
 #define   INTF_LINE_COUNT               0x0B0
 
-static struct dpu_intf_cfg *_intf_offset(enum dpu_intf intf,
-		struct dpu_mdss_cfg *m,
+#define   INTF_MUX                      0x25C
+
+static const struct dpu_intf_cfg *_intf_offset(enum dpu_intf intf,
+		const struct dpu_mdss_cfg *m,
 		void __iomem *addr,
 		struct dpu_hw_blk_reg_map *b)
 {
@@ -218,6 +220,30 @@ static void dpu_hw_intf_setup_prg_fetch(
 	DPU_REG_WRITE(c, INTF_CONFIG, fetch_enable);
 }
 
+static void dpu_hw_intf_bind_pingpong_blk(
+		struct dpu_hw_intf *intf,
+		bool enable,
+		const enum dpu_pingpong pp)
+{
+	struct dpu_hw_blk_reg_map *c;
+	u32 mux_cfg;
+
+	if (!intf)
+		return;
+
+	c = &intf->hw;
+
+	mux_cfg = DPU_REG_READ(c, INTF_MUX);
+	mux_cfg &= ~0xf;
+
+	if (enable)
+		mux_cfg |= (pp - PINGPONG_0) & 0x7;
+	else
+		mux_cfg |= 0xf;
+
+	DPU_REG_WRITE(c, INTF_MUX, mux_cfg);
+}
+
 static void dpu_hw_intf_get_status(
 		struct dpu_hw_intf *intf,
 		struct intf_status *s)
@@ -254,16 +280,18 @@ static void _setup_intf_ops(struct dpu_hw_intf_ops *ops,
 	ops->get_status = dpu_hw_intf_get_status;
 	ops->enable_timing = dpu_hw_intf_enable_timing_engine;
 	ops->get_line_count = dpu_hw_intf_get_line_count;
+	if (cap & BIT(DPU_CTL_ACTIVE_CFG))
+		ops->bind_pingpong_blk = dpu_hw_intf_bind_pingpong_blk;
 }
 
 static struct dpu_hw_blk_ops dpu_hw_ops;
 
 struct dpu_hw_intf *dpu_hw_intf_init(enum dpu_intf idx,
 		void __iomem *addr,
-		struct dpu_mdss_cfg *m)
+		const struct dpu_mdss_cfg *m)
 {
 	struct dpu_hw_intf *c;
-	struct dpu_intf_cfg *cfg;
+	const struct dpu_intf_cfg *cfg;
 
 	c = kzalloc(sizeof(*c), GFP_KERNEL);
 	if (!c)
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h
index b03acc225c9b..85468981632d 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h
@@ -52,6 +52,8 @@ struct intf_status {
  * @ enable_timing: enable/disable timing engine
  * @ get_status: returns if timing engine is enabled or not
  * @ get_line_count: reads current vertical line counter
+ * @bind_pingpong_blk: enable/disable the connection with pingpong which will
+ *                     feed pixels to this interface
  */
 struct dpu_hw_intf_ops {
 	void (*setup_timing_gen)(struct dpu_hw_intf *intf,
@@ -68,6 +70,10 @@ struct dpu_hw_intf_ops {
 			struct intf_status *status);
 
 	u32 (*get_line_count)(struct dpu_hw_intf *intf);
+
+	void (*bind_pingpong_blk)(struct dpu_hw_intf *intf,
+			bool enable,
+			const enum dpu_pingpong pp);
 };
 
 struct dpu_hw_intf {
@@ -92,7 +98,7 @@ struct dpu_hw_intf {
  */
 struct dpu_hw_intf *dpu_hw_intf_init(enum dpu_intf idx,
 		void __iomem *addr,
-		struct dpu_mdss_cfg *m);
+		const struct dpu_mdss_cfg *m);
 
 /**
  * dpu_hw_intf_destroy(): Destroys INTF driver context
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c
index 5bc39baa746a..37becd43bd54 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c
@@ -24,8 +24,8 @@
 #define LM_BLEND0_FG_ALPHA               0x04
 #define LM_BLEND0_BG_ALPHA               0x08
 
-static struct dpu_lm_cfg *_lm_offset(enum dpu_lm mixer,
-		struct dpu_mdss_cfg *m,
+static const struct dpu_lm_cfg *_lm_offset(enum dpu_lm mixer,
+		const struct dpu_mdss_cfg *m,
 		void __iomem *addr,
 		struct dpu_hw_blk_reg_map *b)
 {
@@ -147,12 +147,13 @@ static void dpu_hw_lm_setup_color3(struct dpu_hw_mixer *ctx,
 	DPU_REG_WRITE(c, LM_OP_MODE, op_mode);
 }
 
-static void _setup_mixer_ops(struct dpu_mdss_cfg *m,
+static void _setup_mixer_ops(const struct dpu_mdss_cfg *m,
 		struct dpu_hw_lm_ops *ops,
 		unsigned long features)
 {
 	ops->setup_mixer_out = dpu_hw_lm_setup_out;
-	if (IS_SDM845_TARGET(m->hwversion) || IS_SDM670_TARGET(m->hwversion))
+	if (IS_SDM845_TARGET(m->hwversion) || IS_SDM670_TARGET(m->hwversion)
+	    || IS_SC7180_TARGET(m->hwversion))
 		ops->setup_blend_config = dpu_hw_lm_setup_blend_config_sdm845;
 	else
 		ops->setup_blend_config = dpu_hw_lm_setup_blend_config;
@@ -164,10 +165,10 @@ static struct dpu_hw_blk_ops dpu_hw_ops;
 
 struct dpu_hw_mixer *dpu_hw_lm_init(enum dpu_lm idx,
 		void __iomem *addr,
-		struct dpu_mdss_cfg *m)
+		const struct dpu_mdss_cfg *m)
 {
 	struct dpu_hw_mixer *c;
-	struct dpu_lm_cfg *cfg;
+	const struct dpu_lm_cfg *cfg;
 
 	c = kzalloc(sizeof(*c), GFP_KERNEL);
 	if (!c)
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h
index 147ace31cfc2..4a6b2de19ef6 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h
@@ -91,7 +91,7 @@ static inline struct dpu_hw_mixer *to_dpu_hw_mixer(struct dpu_hw_blk *hw)
  */
 struct dpu_hw_mixer *dpu_hw_lm_init(enum dpu_lm idx,
 		void __iomem *addr,
-		struct dpu_mdss_cfg *m);
+		const struct dpu_mdss_cfg *m);
 
 /**
  * dpu_hw_lm_destroy(): Destroys layer mixer driver context
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.c
index 5dbaba9fd180..d110a40f0e73 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.c
@@ -28,8 +28,8 @@
 #define PP_FBC_BUDGET_CTL               0x038
 #define PP_FBC_LOSSY_MODE               0x03C
 
-static struct dpu_pingpong_cfg *_pingpong_offset(enum dpu_pingpong pp,
-		struct dpu_mdss_cfg *m,
+static const struct dpu_pingpong_cfg *_pingpong_offset(enum dpu_pingpong pp,
+		const struct dpu_mdss_cfg *m,
 		void __iomem *addr,
 		struct dpu_hw_blk_reg_map *b)
 {
@@ -195,10 +195,10 @@ static struct dpu_hw_blk_ops dpu_hw_ops;
 
 struct dpu_hw_pingpong *dpu_hw_pingpong_init(enum dpu_pingpong idx,
 		void __iomem *addr,
-		struct dpu_mdss_cfg *m)
+		const struct dpu_mdss_cfg *m)
 {
 	struct dpu_hw_pingpong *c;
-	struct dpu_pingpong_cfg *cfg;
+	const struct dpu_pingpong_cfg *cfg;
 
 	c = kzalloc(sizeof(*c), GFP_KERNEL);
 	if (!c)
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.h
index 58bdb9279aa8..3d6f46b1db30 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.h
@@ -106,7 +106,7 @@ struct dpu_hw_pingpong {
  */
 struct dpu_hw_pingpong *dpu_hw_pingpong_init(enum dpu_pingpong idx,
 		void __iomem *addr,
-		struct dpu_mdss_cfg *m);
+		const struct dpu_mdss_cfg *m);
 
 /**
  * dpu_hw_pingpong_destroy - destroys pingpong driver context
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c
index 4f8b813aab81..82c5dbfdabc7 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c
@@ -132,6 +132,7 @@
 /* traffic shaper clock in Hz */
 #define TS_CLK			19200000
 
+
 static int _sspp_subblk_offset(struct dpu_hw_pipe *ctx,
 		int s_id,
 		u32 *idx)
@@ -657,7 +658,8 @@ static void _setup_layer_ops(struct dpu_hw_pipe *c,
 		test_bit(DPU_SSPP_SMART_DMA_V2, &c->cap->features))
 		c->ops.setup_multirect = dpu_hw_sspp_setup_multirect;
 
-	if (test_bit(DPU_SSPP_SCALER_QSEED3, &features)) {
+	if (test_bit(DPU_SSPP_SCALER_QSEED3, &features) ||
+			test_bit(DPU_SSPP_SCALER_QSEED4, &features)) {
 		c->ops.setup_scaler = _dpu_hw_sspp_setup_scaler3;
 		c->ops.get_scaler_ver = _dpu_hw_sspp_get_scaler3_ver;
 	}
@@ -666,7 +668,7 @@ static void _setup_layer_ops(struct dpu_hw_pipe *c,
 		c->ops.setup_cdp = dpu_hw_sspp_setup_cdp;
 }
 
-static struct dpu_sspp_cfg *_sspp_offset(enum dpu_sspp sspp,
+static const struct dpu_sspp_cfg *_sspp_offset(enum dpu_sspp sspp,
 		void __iomem *addr,
 		struct dpu_mdss_cfg *catalog,
 		struct dpu_hw_blk_reg_map *b)
@@ -696,7 +698,7 @@ struct dpu_hw_pipe *dpu_hw_sspp_init(enum dpu_sspp idx,
 		bool is_virtual_pipe)
 {
 	struct dpu_hw_pipe *hw_pipe;
-	struct dpu_sspp_cfg *cfg;
+	const struct dpu_sspp_cfg *cfg;
 
 	if (!addr || !catalog)
 		return ERR_PTR(-EINVAL);
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h
index a3680b482b41..85b018a9b03c 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h
@@ -27,7 +27,8 @@ struct dpu_hw_pipe;
  */
 #define DPU_SSPP_SCALER ((1UL << DPU_SSPP_SCALER_RGB) | \
 	(1UL << DPU_SSPP_SCALER_QSEED2) | \
-	(1UL << DPU_SSPP_SCALER_QSEED3))
+	 (1UL << DPU_SSPP_SCALER_QSEED3) | \
+	  (1UL << DPU_SSPP_SCALER_QSEED4))
 
 /**
  * Component indices
@@ -373,7 +374,7 @@ struct dpu_hw_pipe {
 	struct dpu_hw_blk base;
 	struct dpu_hw_blk_reg_map hw;
 	struct dpu_mdss_cfg *catalog;
-	struct dpu_mdp_cfg *mdp;
+	const struct dpu_mdp_cfg *mdp;
 
 	/* Pipe */
 	enum dpu_sspp idx;
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_io_util.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_io_util.c
index 27fbeb504362..078afc5f5882 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_io_util.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_io_util.c
@@ -93,19 +93,12 @@ int msm_dss_enable_clk(struct dss_clk *clk_arry, int num_clk, int enable)
 			DEV_DBG("%pS->%s: enable '%s'\n",
 				__builtin_return_address(0), __func__,
 				clk_arry[i].clk_name);
-			if (clk_arry[i].clk) {
-				rc = clk_prepare_enable(clk_arry[i].clk);
-				if (rc)
-					DEV_ERR("%pS->%s: %s en fail. rc=%d\n",
-						__builtin_return_address(0),
-						__func__,
-						clk_arry[i].clk_name, rc);
-			} else {
-				DEV_ERR("%pS->%s: '%s' is not available\n",
-					__builtin_return_address(0), __func__,
-					clk_arry[i].clk_name);
-				rc = -EPERM;
-			}
+			rc = clk_prepare_enable(clk_arry[i].clk);
+			if (rc)
+				DEV_ERR("%pS->%s: %s en fail. rc=%d\n",
+					__builtin_return_address(0),
+					__func__,
+					clk_arry[i].clk_name, rc);
 
 			if (rc && i) {
 				msm_dss_enable_clk(&clk_arry[i - 1],
@@ -119,12 +112,7 @@ int msm_dss_enable_clk(struct dss_clk *clk_arry, int num_clk, int enable)
 				__builtin_return_address(0), __func__,
 				clk_arry[i].clk_name);
 
-			if (clk_arry[i].clk)
-				clk_disable_unprepare(clk_arry[i].clk);
-			else
-				DEV_ERR("%pS->%s: '%s' is not available\n",
-					__builtin_return_address(0), __func__,
-					clk_arry[i].clk_name);
+			clk_disable_unprepare(clk_arry[i].clk);
 		}
 	}
 
@@ -187,6 +175,7 @@ int msm_dss_parse_clock(struct platform_device *pdev,
 			continue;
 		mp->clk_config[i].rate = rate;
 		mp->clk_config[i].type = DSS_CLK_PCLK;
+		mp->clk_config[i].max_rate = rate;
 	}
 
 	mp->num_clk = num_clk;
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
index 6c92f0fbeac9..cb08fafb1dc1 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
@@ -1059,6 +1059,7 @@ static const struct dev_pm_ops dpu_pm_ops = {
 
 static const struct of_device_id dpu_dt_match[] = {
 	{ .compatible = "qcom,sdm845-dpu", },
+	{ .compatible = "qcom,sc7180-dpu", },
 	{}
 };
 MODULE_DEVICE_TABLE(of, dpu_dt_match);
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
index 58d5acbcfc5c..3b9c33e694bf 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
@@ -53,8 +53,13 @@ enum {
 	R_MAX
 };
 
+/*
+ * Default Preload Values
+ */
 #define DPU_QSEED3_DEFAULT_PRELOAD_H 0x4
 #define DPU_QSEED3_DEFAULT_PRELOAD_V 0x3
+#define DPU_QSEED4_DEFAULT_PRELOAD_V 0x2
+#define DPU_QSEED4_DEFAULT_PRELOAD_H 0x4
 
 #define DEFAULT_REFRESH_RATE	60
 
@@ -477,8 +482,16 @@ static void _dpu_plane_setup_scaler3(struct dpu_plane *pdpu,
 			scale_cfg->src_width[i] /= chroma_subsmpl_h;
 			scale_cfg->src_height[i] /= chroma_subsmpl_v;
 		}
-		scale_cfg->preload_x[i] = DPU_QSEED3_DEFAULT_PRELOAD_H;
-		scale_cfg->preload_y[i] = DPU_QSEED3_DEFAULT_PRELOAD_V;
+
+		if (pdpu->pipe_hw->cap->features &
+			BIT(DPU_SSPP_SCALER_QSEED4)) {
+			scale_cfg->preload_x[i] = DPU_QSEED4_DEFAULT_PRELOAD_H;
+			scale_cfg->preload_y[i] = DPU_QSEED4_DEFAULT_PRELOAD_V;
+		} else {
+			scale_cfg->preload_x[i] = DPU_QSEED3_DEFAULT_PRELOAD_H;
+			scale_cfg->preload_y[i] = DPU_QSEED3_DEFAULT_PRELOAD_V;
+		}
+
 		pstate->pixel_ext.num_ext_pxls_top[i] =
 			scale_cfg->src_height[i];
 		pstate->pixel_ext.num_ext_pxls_left[i] =
@@ -738,7 +751,7 @@ done:
 	} else {
 		pstate[R0]->multirect_index = DPU_SSPP_RECT_0;
 		pstate[R1]->multirect_index = DPU_SSPP_RECT_1;
-	};
+	}
 
 	DPU_DEBUG_PLANE(dpu_plane[R0], "R0: %d - %d\n",
 		pstate[R0]->multirect_mode, pstate[R0]->multirect_index);
@@ -858,7 +871,7 @@ static int dpu_plane_atomic_check(struct drm_plane *plane,
 					  pdpu->pipe_sblk->maxupscale << 16,
 					  true, true);
 	if (ret) {
-		DPU_ERROR_PLANE(pdpu, "Check plane state failed (%d)\n", ret);
+		DPU_DEBUG_PLANE(pdpu, "Check plane state failed (%d)\n", ret);
 		return ret;
 	}
 	if (!state->visible)
@@ -884,13 +897,13 @@ static int dpu_plane_atomic_check(struct drm_plane *plane,
 		(!(pdpu->features & DPU_SSPP_SCALER) ||
 		 !(pdpu->features & (BIT(DPU_SSPP_CSC)
 		 | BIT(DPU_SSPP_CSC_10BIT))))) {
-		DPU_ERROR_PLANE(pdpu,
+		DPU_DEBUG_PLANE(pdpu,
 				"plane doesn't have scaler/csc for yuv\n");
 		return -EINVAL;
 
 	/* check src bounds */
 	} else if (!dpu_plane_validate_src(&src, &fb_rect, min_src_size)) {
-		DPU_ERROR_PLANE(pdpu, "invalid source " DRM_RECT_FMT "\n",
+		DPU_DEBUG_PLANE(pdpu, "invalid source " DRM_RECT_FMT "\n",
 				DRM_RECT_ARG(&src));
 		return -E2BIG;
 
@@ -899,19 +912,19 @@ static int dpu_plane_atomic_check(struct drm_plane *plane,
 		   (src.x1 & 0x1 || src.y1 & 0x1 ||
 		    drm_rect_width(&src) & 0x1 ||
 		    drm_rect_height(&src) & 0x1)) {
-		DPU_ERROR_PLANE(pdpu, "invalid yuv source " DRM_RECT_FMT "\n",
+		DPU_DEBUG_PLANE(pdpu, "invalid yuv source " DRM_RECT_FMT "\n",
 				DRM_RECT_ARG(&src));
 		return -EINVAL;
 
 	/* min dst support */
 	} else if (drm_rect_width(&dst) < 0x1 || drm_rect_height(&dst) < 0x1) {
-		DPU_ERROR_PLANE(pdpu, "invalid dest rect " DRM_RECT_FMT "\n",
+		DPU_DEBUG_PLANE(pdpu, "invalid dest rect " DRM_RECT_FMT "\n",
 				DRM_RECT_ARG(&dst));
 		return -EINVAL;
 
 	/* check decimated source width */
 	} else if (drm_rect_width(&src) > max_linewidth) {
-		DPU_ERROR_PLANE(pdpu, "invalid src " DRM_RECT_FMT " line:%u\n",
+		DPU_DEBUG_PLANE(pdpu, "invalid src " DRM_RECT_FMT " line:%u\n",
 				DRM_RECT_ARG(&src), max_linewidth);
 		return -E2BIG;
 	}
@@ -1337,7 +1350,8 @@ static int _dpu_plane_init_debugfs(struct drm_plane *plane)
 			pdpu->debugfs_root, &pdpu->debugfs_src);
 
 	if (cfg->features & BIT(DPU_SSPP_SCALER_QSEED3) ||
-			cfg->features & BIT(DPU_SSPP_SCALER_QSEED2)) {
+			cfg->features & BIT(DPU_SSPP_SCALER_QSEED2) ||
+			cfg->features & BIT(DPU_SSPP_SCALER_QSEED4)) {
 		dpu_debugfs_setup_regset32(&pdpu->debugfs_scaler,
 				sblk->scaler_blk.base + cfg->base,
 				sblk->scaler_blk.len,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c
index ddc8412731af..23f5b1433b35 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c
@@ -141,11 +141,11 @@ int dpu_rm_destroy(struct dpu_rm *rm)
 
 static int _dpu_rm_hw_blk_create(
 		struct dpu_rm *rm,
-		struct dpu_mdss_cfg *cat,
+		const struct dpu_mdss_cfg *cat,
 		void __iomem *mmio,
 		enum dpu_hw_blk_type type,
 		uint32_t id,
-		void *hw_catalog_info)
+		const void *hw_catalog_info)
 {
 	struct dpu_rm_hw_blk *blk;
 	void *hw;
@@ -215,7 +215,7 @@ int dpu_rm_init(struct dpu_rm *rm,
 
 	/* Interrogate HW catalog and create tracking items for hw blocks */
 	for (i = 0; i < cat->mixer_count; i++) {
-		struct dpu_lm_cfg *lm = &cat->mixer[i];
+		const struct dpu_lm_cfg *lm = &cat->mixer[i];
 
 		if (lm->pingpong == PINGPONG_MAX) {
 			DPU_DEBUG("skip mixer %d without pingpong\n", lm->id);
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c
index 991f4c8f8a12..93ab36bd8df3 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c
@@ -299,7 +299,7 @@ void dpu_debugfs_vbif_init(struct dpu_kms *dpu_kms, struct dentry *debugfs_root)
 	entry = debugfs_create_dir("vbif", debugfs_root);
 
 	for (i = 0; i < dpu_kms->catalog->vbif_count; i++) {
-		struct dpu_vbif_cfg *vbif = &dpu_kms->catalog->vbif[i];
+		const struct dpu_vbif_cfg *vbif = &dpu_kms->catalog->vbif[i];
 
 		snprintf(vbif_name, sizeof(vbif_name), "%d", vbif->id);
 
@@ -318,7 +318,7 @@ void dpu_debugfs_vbif_init(struct dpu_kms *dpu_kms, struct dentry *debugfs_root)
 			(u32 *)&vbif->default_ot_wr_limit);
 
 		for (j = 0; j < vbif->dynamic_ot_rd_tbl.count; j++) {
-			struct dpu_vbif_dynamic_ot_cfg *cfg =
+			const struct dpu_vbif_dynamic_ot_cfg *cfg =
 					&vbif->dynamic_ot_rd_tbl.cfg[j];
 
 			snprintf(vbif_name, sizeof(vbif_name),
@@ -332,7 +332,7 @@ void dpu_debugfs_vbif_init(struct dpu_kms *dpu_kms, struct dentry *debugfs_root)
 		}
 
 		for (j = 0; j < vbif->dynamic_ot_wr_tbl.count; j++) {
-			struct dpu_vbif_dynamic_ot_cfg *cfg =
+			const struct dpu_vbif_dynamic_ot_cfg *cfg =
 					&vbif->dynamic_ot_wr_tbl.cfg[j];
 
 			snprintf(vbif_name, sizeof(vbif_name),
diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c
index 772f0753ed38..aaf2f26f8505 100644
--- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c
+++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c
@@ -121,7 +121,7 @@ static void mdp4_dsi_encoder_enable(struct drm_encoder *encoder)
 	if (mdp4_dsi_encoder->enabled)
 		return;
 
-	 mdp4_crtc_set_config(encoder->crtc,
+	mdp4_crtc_set_config(encoder->crtc,
 			MDP4_DMA_CONFIG_PACK_ALIGN_MSB |
 			MDP4_DMA_CONFIG_DEFLKR_EN |
 			MDP4_DMA_CONFIG_DITHER_EN |
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c
index 1f48f64539a2..e3c4c250238b 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c
@@ -902,7 +902,7 @@ struct mdp5_cfg_handler *mdp5_cfg_init(struct mdp5_kms *mdp5_kms,
 				major, minor);
 		ret = -ENXIO;
 		goto fail;
-	};
+	}
 
 	/* only after mdp5_cfg global pointer's init can we access the hw */
 	for (i = 0; i < num_handlers; i++) {
diff --git a/drivers/gpu/drm/msm/dsi/dsi.h b/drivers/gpu/drm/msm/dsi/dsi.h
index eff1a4c61258..4de771d6f0be 100644
--- a/drivers/gpu/drm/msm/dsi/dsi.h
+++ b/drivers/gpu/drm/msm/dsi/dsi.h
@@ -178,6 +178,8 @@ int msm_dsi_host_modeset_init(struct mipi_dsi_host *host,
 int msm_dsi_host_init(struct msm_dsi *msm_dsi);
 int msm_dsi_runtime_suspend(struct device *dev);
 int msm_dsi_runtime_resume(struct device *dev);
+int dsi_link_clk_set_rate_6g(struct msm_dsi_host *msm_host);
+int dsi_link_clk_set_rate_v2(struct msm_dsi_host *msm_host);
 int dsi_link_clk_enable_6g(struct msm_dsi_host *msm_host);
 int dsi_link_clk_enable_v2(struct msm_dsi_host *msm_host);
 void dsi_link_clk_disable_6g(struct msm_dsi_host *msm_host);
diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.c b/drivers/gpu/drm/msm/dsi/dsi_cfg.c
index 86ad3fdf207d..813d69deb5e8 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_cfg.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.c
@@ -153,6 +153,10 @@ static const char * const dsi_sdm845_bus_clk_names[] = {
 	"iface", "bus",
 };
 
+static const char * const dsi_sc7180_bus_clk_names[] = {
+	"iface", "bus",
+};
+
 static const struct msm_dsi_config sdm845_dsi_cfg = {
 	.io_offset = DSI_6G_REG_SHIFT,
 	.reg_cfg = {
@@ -167,7 +171,22 @@ static const struct msm_dsi_config sdm845_dsi_cfg = {
 	.num_dsi = 2,
 };
 
+static const struct msm_dsi_config sc7180_dsi_cfg = {
+	.io_offset = DSI_6G_REG_SHIFT,
+	.reg_cfg = {
+		.num = 1,
+		.regs = {
+			{"vdda", 21800, 4 },	/* 1.2 V */
+		},
+	},
+	.bus_clk_names = dsi_sc7180_bus_clk_names,
+	.num_bus_clks = ARRAY_SIZE(dsi_sc7180_bus_clk_names),
+	.io_start = { 0xae94000 },
+	.num_dsi = 1,
+};
+
 static const struct msm_dsi_host_cfg_ops msm_dsi_v2_host_ops = {
+	.link_clk_set_rate = dsi_link_clk_set_rate_v2,
 	.link_clk_enable = dsi_link_clk_enable_v2,
 	.link_clk_disable = dsi_link_clk_disable_v2,
 	.clk_init_ver = dsi_clk_init_v2,
@@ -179,6 +198,7 @@ static const struct msm_dsi_host_cfg_ops msm_dsi_v2_host_ops = {
 };
 
 static const struct msm_dsi_host_cfg_ops msm_dsi_6g_host_ops = {
+	.link_clk_set_rate = dsi_link_clk_set_rate_6g,
 	.link_clk_enable = dsi_link_clk_enable_6g,
 	.link_clk_disable = dsi_link_clk_disable_6g,
 	.clk_init_ver = NULL,
@@ -190,6 +210,7 @@ static const struct msm_dsi_host_cfg_ops msm_dsi_6g_host_ops = {
 };
 
 static const struct msm_dsi_host_cfg_ops msm_dsi_6g_v2_host_ops = {
+	.link_clk_set_rate = dsi_link_clk_set_rate_6g,
 	.link_clk_enable = dsi_link_clk_enable_6g,
 	.link_clk_disable = dsi_link_clk_disable_6g,
 	.clk_init_ver = dsi_clk_init_6g_v2,
@@ -223,6 +244,9 @@ static const struct msm_dsi_cfg_handler dsi_cfg_handlers[] = {
 		&msm8998_dsi_cfg, &msm_dsi_6g_v2_host_ops},
 	{MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V2_2_1,
 		&sdm845_dsi_cfg, &msm_dsi_6g_v2_host_ops},
+	{MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V2_4_1,
+		&sc7180_dsi_cfg, &msm_dsi_6g_v2_host_ops},
+
 };
 
 const struct msm_dsi_cfg_handler *msm_dsi_cfg_get(u32 major, u32 minor)
diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.h b/drivers/gpu/drm/msm/dsi/dsi_cfg.h
index 50a37ceb6a25..217e24a65178 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_cfg.h
+++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.h
@@ -20,6 +20,7 @@
 #define MSM_DSI_6G_VER_MINOR_V1_4_2	0x10040002
 #define MSM_DSI_6G_VER_MINOR_V2_2_0	0x20000000
 #define MSM_DSI_6G_VER_MINOR_V2_2_1	0x20020001
+#define MSM_DSI_6G_VER_MINOR_V2_4_1	0x20040001
 
 #define MSM_DSI_V2_VER_MINOR_8064	0x0
 
@@ -35,6 +36,7 @@ struct msm_dsi_config {
 };
 
 struct msm_dsi_host_cfg_ops {
+	int (*link_clk_set_rate)(struct msm_dsi_host *msm_host);
 	int (*link_clk_enable)(struct msm_dsi_host *msm_host);
 	void (*link_clk_disable)(struct msm_dsi_host *msm_host);
 	int (*clk_init_ver)(struct msm_dsi_host *msm_host);
diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c
index 458cec82ae13..11ae5b8444c3 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_host.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_host.c
@@ -505,7 +505,7 @@ int msm_dsi_runtime_resume(struct device *dev)
 	return dsi_bus_clk_enable(msm_host);
 }
 
-int dsi_link_clk_enable_6g(struct msm_dsi_host *msm_host)
+int dsi_link_clk_set_rate_6g(struct msm_dsi_host *msm_host)
 {
 	int ret;
 
@@ -515,13 +515,13 @@ int dsi_link_clk_enable_6g(struct msm_dsi_host *msm_host)
 	ret = clk_set_rate(msm_host->byte_clk, msm_host->byte_clk_rate);
 	if (ret) {
 		pr_err("%s: Failed to set rate byte clk, %d\n", __func__, ret);
-		goto error;
+		return ret;
 	}
 
 	ret = clk_set_rate(msm_host->pixel_clk, msm_host->pixel_clk_rate);
 	if (ret) {
 		pr_err("%s: Failed to set rate pixel clk, %d\n", __func__, ret);
-		goto error;
+		return ret;
 	}
 
 	if (msm_host->byte_intf_clk) {
@@ -530,10 +530,18 @@ int dsi_link_clk_enable_6g(struct msm_dsi_host *msm_host)
 		if (ret) {
 			pr_err("%s: Failed to set rate byte intf clk, %d\n",
 			       __func__, ret);
-			goto error;
+			return ret;
 		}
 	}
 
+	return 0;
+}
+
+
+int dsi_link_clk_enable_6g(struct msm_dsi_host *msm_host)
+{
+	int ret;
+
 	ret = clk_prepare_enable(msm_host->esc_clk);
 	if (ret) {
 		pr_err("%s: Failed to enable dsi esc clk\n", __func__);
@@ -573,7 +581,7 @@ error:
 	return ret;
 }
 
-int dsi_link_clk_enable_v2(struct msm_dsi_host *msm_host)
+int dsi_link_clk_set_rate_v2(struct msm_dsi_host *msm_host)
 {
 	int ret;
 
@@ -584,27 +592,34 @@ int dsi_link_clk_enable_v2(struct msm_dsi_host *msm_host)
 	ret = clk_set_rate(msm_host->byte_clk, msm_host->byte_clk_rate);
 	if (ret) {
 		pr_err("%s: Failed to set rate byte clk, %d\n", __func__, ret);
-		goto error;
+		return ret;
 	}
 
 	ret = clk_set_rate(msm_host->esc_clk, msm_host->esc_clk_rate);
 	if (ret) {
 		pr_err("%s: Failed to set rate esc clk, %d\n", __func__, ret);
-		goto error;
+		return ret;
 	}
 
 	ret = clk_set_rate(msm_host->src_clk, msm_host->src_clk_rate);
 	if (ret) {
 		pr_err("%s: Failed to set rate src clk, %d\n", __func__, ret);
-		goto error;
+		return ret;
 	}
 
 	ret = clk_set_rate(msm_host->pixel_clk, msm_host->pixel_clk_rate);
 	if (ret) {
 		pr_err("%s: Failed to set rate pixel clk, %d\n", __func__, ret);
-		goto error;
+		return ret;
 	}
 
+	return 0;
+}
+
+int dsi_link_clk_enable_v2(struct msm_dsi_host *msm_host)
+{
+	int ret;
+
 	ret = clk_prepare_enable(msm_host->byte_clk);
 	if (ret) {
 		pr_err("%s: Failed to enable dsi byte clk\n", __func__);
@@ -818,7 +833,7 @@ static void dsi_ctrl_config(struct msm_dsi_host *msm_host, bool enable,
 	u32 flags = msm_host->mode_flags;
 	enum mipi_dsi_pixel_format mipi_fmt = msm_host->format;
 	const struct msm_dsi_cfg_handler *cfg_hnd = msm_host->cfg_hnd;
-	u32 data = 0;
+	u32 data = 0, lane_ctrl = 0;
 
 	if (!enable) {
 		dsi_write(msm_host, REG_DSI_CTRL, 0);
@@ -906,9 +921,11 @@ static void dsi_ctrl_config(struct msm_dsi_host *msm_host, bool enable,
 	dsi_write(msm_host, REG_DSI_LANE_SWAP_CTRL,
 		  DSI_LANE_SWAP_CTRL_DLN_SWAP_SEL(msm_host->dlane_swap));
 
-	if (!(flags & MIPI_DSI_CLOCK_NON_CONTINUOUS))
+	if (!(flags & MIPI_DSI_CLOCK_NON_CONTINUOUS)) {
+		lane_ctrl = dsi_read(msm_host, REG_DSI_LANE_CTRL);
 		dsi_write(msm_host, REG_DSI_LANE_CTRL,
-			DSI_LANE_CTRL_CLKLN_HS_FORCE_REQUEST);
+			lane_ctrl | DSI_LANE_CTRL_CLKLN_HS_FORCE_REQUEST);
+	}
 
 	data |= DSI_CTRL_ENABLE;
 
@@ -1996,6 +2013,7 @@ int msm_dsi_host_xfer_prepare(struct mipi_dsi_host *host,
 	 * mdp clock need to be enabled to receive dsi interrupt
 	 */
 	pm_runtime_get_sync(&msm_host->pdev->dev);
+	cfg_hnd->ops->link_clk_set_rate(msm_host);
 	cfg_hnd->ops->link_clk_enable(msm_host);
 
 	/* TODO: vote for bus bandwidth */
@@ -2344,7 +2362,9 @@ int msm_dsi_host_power_on(struct mipi_dsi_host *host,
 	}
 
 	pm_runtime_get_sync(&msm_host->pdev->dev);
-	ret = cfg_hnd->ops->link_clk_enable(msm_host);
+	ret = cfg_hnd->ops->link_clk_set_rate(msm_host);
+	if (!ret)
+		ret = cfg_hnd->ops->link_clk_enable(msm_host);
 	if (ret) {
 		pr_err("%s: failed to enable link clocks. ret=%d\n",
 		       __func__, ret);
diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c b/drivers/gpu/drm/msm/dsi/dsi_manager.c
index 0fc29f1be8cc..104115d112eb 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_manager.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c
@@ -432,20 +432,8 @@ static void dsi_mgr_bridge_pre_enable(struct drm_bridge *bridge)
 		}
 	}
 
-	if (panel) {
-		ret = drm_panel_enable(panel);
-		if (ret) {
-			pr_err("%s: enable panel %d failed, %d\n", __func__, id,
-									ret);
-			goto panel_en_fail;
-		}
-	}
-
 	return;
 
-panel_en_fail:
-	if (is_dual_dsi && msm_dsi1)
-		msm_dsi_host_disable(msm_dsi1->host);
 host1_en_fail:
 	msm_dsi_host_disable(host);
 host_en_fail:
@@ -464,12 +452,51 @@ phy_en_fail:
 
 static void dsi_mgr_bridge_enable(struct drm_bridge *bridge)
 {
-	DBG("");
+	int id = dsi_mgr_bridge_get_id(bridge);
+	struct msm_dsi *msm_dsi = dsi_mgr_get_dsi(id);
+	struct drm_panel *panel = msm_dsi->panel;
+	bool is_dual_dsi = IS_DUAL_DSI();
+	int ret;
+
+	DBG("id=%d", id);
+	if (!msm_dsi_device_connected(msm_dsi))
+		return;
+
+	/* Do nothing with the host if it is slave-DSI in case of dual DSI */
+	if (is_dual_dsi && !IS_MASTER_DSI_LINK(id))
+		return;
+
+	if (panel) {
+		ret = drm_panel_enable(panel);
+		if (ret) {
+			pr_err("%s: enable panel %d failed, %d\n", __func__, id,
+									ret);
+		}
+	}
 }
 
 static void dsi_mgr_bridge_disable(struct drm_bridge *bridge)
 {
-	DBG("");
+	int id = dsi_mgr_bridge_get_id(bridge);
+	struct msm_dsi *msm_dsi = dsi_mgr_get_dsi(id);
+	struct drm_panel *panel = msm_dsi->panel;
+	bool is_dual_dsi = IS_DUAL_DSI();
+	int ret;
+
+	DBG("id=%d", id);
+	if (!msm_dsi_device_connected(msm_dsi))
+		return;
+
+	/* Do nothing with the host if it is slave-DSI in case of dual DSI */
+	if (is_dual_dsi && !IS_MASTER_DSI_LINK(id))
+		return;
+
+	if (panel) {
+		ret = drm_panel_disable(panel);
+		if (ret)
+			pr_err("%s: Panel %d OFF failed, %d\n", __func__, id,
+									ret);
+	}
 }
 
 static void dsi_mgr_bridge_post_disable(struct drm_bridge *bridge)
@@ -495,13 +522,6 @@ static void dsi_mgr_bridge_post_disable(struct drm_bridge *bridge)
 	if (is_dual_dsi && !IS_MASTER_DSI_LINK(id))
 		goto disable_phy;
 
-	if (panel) {
-		ret = drm_panel_disable(panel);
-		if (ret)
-			pr_err("%s: Panel %d OFF failed, %d\n", __func__, id,
-									ret);
-	}
-
 	ret = msm_dsi_host_disable(host);
 	if (ret)
 		pr_err("%s: host %d disable failed, %d\n", __func__, id, ret);
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_connector.c b/drivers/gpu/drm/msm/hdmi/hdmi_connector.c
index 59702684d576..58707a1f3878 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi_connector.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_connector.c
@@ -101,7 +101,7 @@ static int gpio_config(struct hdmi *hdmi, bool on)
 
 				gpiod_set_value_cansleep(gpio.gpiod, value);
 			}
-		};
+		}
 
 		DBG("gpio off");
 	}
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index c84f0a8b3f2c..f50fefb87040 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -1192,7 +1192,8 @@ static int add_display_components(struct device *dev,
 	 * the interfaces to our components list.
 	 */
 	if (of_device_is_compatible(dev->of_node, "qcom,mdss") ||
-	    of_device_is_compatible(dev->of_node, "qcom,sdm845-mdss")) {
+	    of_device_is_compatible(dev->of_node, "qcom,sdm845-mdss") ||
+	    of_device_is_compatible(dev->of_node, "qcom,sc7180-mdss")) {
 		ret = of_platform_populate(dev->of_node, NULL, NULL, dev);
 		if (ret) {
 			DRM_DEV_ERROR(dev, "failed to populate children devices\n");
@@ -1317,6 +1318,7 @@ static const struct of_device_id dt_match[] = {
 	{ .compatible = "qcom,mdp4", .data = (void *)KMS_MDP4 },
 	{ .compatible = "qcom,mdss", .data = (void *)KMS_MDP5 },
 	{ .compatible = "qcom,sdm845-mdss", .data = (void *)KMS_DPU },
+	{ .compatible = "qcom,sc7180-mdss", .data = (void *)KMS_DPU },
 	{}
 };
 MODULE_DEVICE_TABLE(of, dt_match);
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index ab8f0f9c9dc8..be5bc2e8425c 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -111,8 +111,15 @@ struct msm_gpu {
 	struct clk *ebi1_clk, *core_clk, *rbbmtimer_clk;
 	uint32_t fast_rate;
 
+	/* The gfx-mem interconnect path that's used by all GPU types. */
 	struct icc_path *icc_path;
 
+	/*
+	 * Second interconnect path for some A3xx and all A4xx GPUs to the
+	 * On Chip MEMory (OCMEM).
+	 */
+	struct icc_path *ocmem_icc_path;
+
 	/* Hang and Inactivity Detection:
 	 */
 #define DRM_MSM_INACTIVE_PERIOD   66 /* in ms (roughly four frames) */
diff --git a/drivers/gpu/drm/rockchip/cdn-dp-core.h b/drivers/gpu/drm/rockchip/cdn-dp-core.h
index 83c4586665b4..81ac9b658a70 100644
--- a/drivers/gpu/drm/rockchip/cdn-dp-core.h
+++ b/drivers/gpu/drm/rockchip/cdn-dp-core.h
@@ -95,7 +95,7 @@ struct cdn_dp_device {
 	struct cdn_dp_port *port[MAX_PHY];
 	u8 ports;
 	u8 max_lanes;
-	u8 max_rate;
+	unsigned int max_rate;
 	u8 lanes;
 	int active_port;
 
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
index 2e3a058fc239..ec79e8e5ad3c 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -45,7 +45,7 @@
  * @guilty: atomic_t set to 1 when a job on this queue
  *          is found to be guilty causing a timeout
  *
- * Note: the rq_list should have atleast one element to schedule
+ * Note: the sched_list should have atleast one element to schedule
  *       the entity
  *
  * Returns 0 on success or a negative error code on failure.
@@ -130,7 +130,7 @@ static struct drm_sched_rq *
 drm_sched_entity_get_free_sched(struct drm_sched_entity *entity)
 {
 	struct drm_sched_rq *rq = NULL;
-	unsigned int min_jobs = UINT_MAX, num_jobs;
+	unsigned int min_score = UINT_MAX, num_score;
 	int i;
 
 	for (i = 0; i < entity->num_sched_list; ++i) {
@@ -141,9 +141,9 @@ drm_sched_entity_get_free_sched(struct drm_sched_entity *entity)
 			continue;
 		}
 
-		num_jobs = atomic_read(&sched->num_jobs);
-		if (num_jobs < min_jobs) {
-			min_jobs = num_jobs;
+		num_score = atomic_read(&sched->score);
+		if (num_score < min_score) {
+			min_score = num_score;
 			rq = &entity->sched_list[i]->sched_rq[entity->priority];
 		}
 	}
@@ -498,7 +498,7 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job,
 	bool first;
 
 	trace_drm_sched_job(sched_job, entity);
-	atomic_inc(&entity->rq->sched->num_jobs);
+	atomic_inc(&entity->rq->sched->score);
 	WRITE_ONCE(entity->last_user, current->group_leader);
 	first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node);
 
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 3fad5876a13f..71ce6215956f 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -92,6 +92,7 @@ void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
 	if (!list_empty(&entity->list))
 		return;
 	spin_lock(&rq->lock);
+	atomic_inc(&rq->sched->score);
 	list_add_tail(&entity->list, &rq->entities);
 	spin_unlock(&rq->lock);
 }
@@ -110,6 +111,7 @@ void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
 	if (list_empty(&entity->list))
 		return;
 	spin_lock(&rq->lock);
+	atomic_dec(&rq->sched->score);
 	list_del_init(&entity->list);
 	if (rq->current_entity == entity)
 		rq->current_entity = NULL;
@@ -655,7 +657,7 @@ static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)
 	struct drm_gpu_scheduler *sched = s_fence->sched;
 
 	atomic_dec(&sched->hw_rq_count);
-	atomic_dec(&sched->num_jobs);
+	atomic_dec(&sched->score);
 
 	trace_drm_sched_process_job(s_fence);
 
@@ -830,7 +832,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
 	spin_lock_init(&sched->job_list_lock);
 	atomic_set(&sched->hw_rq_count, 0);
 	INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
-	atomic_set(&sched->num_jobs, 0);
+	atomic_set(&sched->score, 0);
 	atomic64_set(&sched->job_id_count, 0);
 
 	/* Each scheduler will run on a seperate kernel thread */
diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
index a7c4654445c7..68d4644ac2dc 100644
--- a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
+++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
@@ -685,8 +685,6 @@ static void sun4i_hdmi_unbind(struct device *dev, struct device *master,
 	struct sun4i_hdmi *hdmi = dev_get_drvdata(dev);
 
 	cec_unregister_adapter(hdmi->cec_adap);
-	drm_connector_cleanup(&hdmi->connector);
-	drm_encoder_cleanup(&hdmi->encoder);
 	i2c_del_adapter(hdmi->i2c);
 	i2c_put_adapter(hdmi->ddc_i2c);
 	clk_disable_unprepare(hdmi->mod_clk);
diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c
index 42651d737c55..c81cdce6ed55 100644
--- a/drivers/gpu/drm/sun4i/sun4i_tcon.c
+++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c
@@ -489,7 +489,7 @@ static void sun4i_tcon0_mode_set_rgb(struct sun4i_tcon *tcon,
 
 	WARN_ON(!tcon->quirks->has_channel_0);
 
-	tcon->dclk_min_div = 1;
+	tcon->dclk_min_div = tcon->quirks->dclk_min_div;
 	tcon->dclk_max_div = 127;
 	sun4i_tcon0_mode_set_common(tcon, mode);
 
@@ -1426,12 +1426,14 @@ static int sun8i_r40_tcon_tv_set_mux(struct sun4i_tcon *tcon,
 static const struct sun4i_tcon_quirks sun4i_a10_quirks = {
 	.has_channel_0		= true,
 	.has_channel_1		= true,
+	.dclk_min_div		= 4,
 	.set_mux		= sun4i_a10_tcon_set_mux,
 };
 
 static const struct sun4i_tcon_quirks sun5i_a13_quirks = {
 	.has_channel_0		= true,
 	.has_channel_1		= true,
+	.dclk_min_div		= 4,
 	.set_mux		= sun5i_a13_tcon_set_mux,
 };
 
@@ -1440,6 +1442,7 @@ static const struct sun4i_tcon_quirks sun6i_a31_quirks = {
 	.has_channel_1		= true,
 	.has_lvds_alt		= true,
 	.needs_de_be_mux	= true,
+	.dclk_min_div		= 1,
 	.set_mux		= sun6i_tcon_set_mux,
 };
 
@@ -1447,11 +1450,13 @@ static const struct sun4i_tcon_quirks sun6i_a31s_quirks = {
 	.has_channel_0		= true,
 	.has_channel_1		= true,
 	.needs_de_be_mux	= true,
+	.dclk_min_div		= 1,
 };
 
 static const struct sun4i_tcon_quirks sun7i_a20_quirks = {
 	.has_channel_0		= true,
 	.has_channel_1		= true,
+	.dclk_min_div		= 4,
 	/* Same display pipeline structure as A10 */
 	.set_mux		= sun4i_a10_tcon_set_mux,
 };
@@ -1459,11 +1464,13 @@ static const struct sun4i_tcon_quirks sun7i_a20_quirks = {
 static const struct sun4i_tcon_quirks sun8i_a33_quirks = {
 	.has_channel_0		= true,
 	.has_lvds_alt		= true,
+	.dclk_min_div		= 1,
 };
 
 static const struct sun4i_tcon_quirks sun8i_a83t_lcd_quirks = {
 	.supports_lvds		= true,
 	.has_channel_0		= true,
+	.dclk_min_div		= 1,
 };
 
 static const struct sun4i_tcon_quirks sun8i_a83t_tv_quirks = {
@@ -1477,11 +1484,13 @@ static const struct sun4i_tcon_quirks sun8i_r40_tv_quirks = {
 
 static const struct sun4i_tcon_quirks sun8i_v3s_quirks = {
 	.has_channel_0		= true,
+	.dclk_min_div		= 1,
 };
 
 static const struct sun4i_tcon_quirks sun9i_a80_tcon_lcd_quirks = {
-	.has_channel_0	= true,
-	.needs_edp_reset = true,
+	.has_channel_0		= true,
+	.needs_edp_reset	= true,
+	.dclk_min_div		= 1,
 };
 
 static const struct sun4i_tcon_quirks sun9i_a80_tcon_tv_quirks = {
diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.h b/drivers/gpu/drm/sun4i/sun4i_tcon.h
index f9f1fe80b206..a62ec826ae71 100644
--- a/drivers/gpu/drm/sun4i/sun4i_tcon.h
+++ b/drivers/gpu/drm/sun4i/sun4i_tcon.h
@@ -224,6 +224,7 @@ struct sun4i_tcon_quirks {
 	bool	needs_de_be_mux; /* sun6i needs mux to select backend */
 	bool    needs_edp_reset; /* a80 edp reset needed for tcon0 access */
 	bool	supports_lvds;   /* Does the TCON support an LVDS output? */
+	u8	dclk_min_div;	/* minimum divider for TCON0 DCLK */
 
 	/* callback to handle tcon muxing options */
 	int	(*set_mux)(struct sun4i_tcon *, const struct drm_encoder *);
diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index 714af052fbef..7c70fd31a4c2 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -1727,6 +1727,7 @@ static void tegra_crtc_atomic_disable(struct drm_crtc *crtc,
 {
 	struct tegra_dc *dc = to_tegra_dc(crtc);
 	u32 value;
+	int err;
 
 	if (!tegra_dc_idle(dc)) {
 		tegra_dc_stop(dc);
@@ -1773,7 +1774,9 @@ static void tegra_crtc_atomic_disable(struct drm_crtc *crtc,
 
 	spin_unlock_irq(&crtc->dev->event_lock);
 
-	pm_runtime_put_sync(dc->dev);
+	err = host1x_client_suspend(&dc->client);
+	if (err < 0)
+		dev_err(dc->dev, "failed to suspend: %d\n", err);
 }
 
 static void tegra_crtc_atomic_enable(struct drm_crtc *crtc,
@@ -1783,8 +1786,13 @@ static void tegra_crtc_atomic_enable(struct drm_crtc *crtc,
 	struct tegra_dc_state *state = to_dc_state(crtc->state);
 	struct tegra_dc *dc = to_tegra_dc(crtc);
 	u32 value;
+	int err;
 
-	pm_runtime_get_sync(dc->dev);
+	err = host1x_client_resume(&dc->client);
+	if (err < 0) {
+		dev_err(dc->dev, "failed to resume: %d\n", err);
+		return;
+	}
 
 	/* initialize display controller */
 	if (dc->syncpt) {
@@ -1996,7 +2004,7 @@ static bool tegra_dc_has_window_groups(struct tegra_dc *dc)
 
 static int tegra_dc_init(struct host1x_client *client)
 {
-	struct drm_device *drm = dev_get_drvdata(client->parent);
+	struct drm_device *drm = dev_get_drvdata(client->host);
 	unsigned long flags = HOST1X_SYNCPT_CLIENT_MANAGED;
 	struct tegra_dc *dc = host1x_client_to_dc(client);
 	struct tegra_drm *tegra = drm->dev_private;
@@ -2012,6 +2020,15 @@ static int tegra_dc_init(struct host1x_client *client)
 	if (!tegra_dc_has_window_groups(dc))
 		return 0;
 
+	/*
+	 * Set the display hub as the host1x client parent for the display
+	 * controller. This is needed for the runtime reference counting that
+	 * ensures the display hub is always powered when any of the display
+	 * controllers are.
+	 */
+	if (dc->soc->has_nvdisplay)
+		client->parent = &tegra->hub->client;
+
 	dc->syncpt = host1x_syncpt_request(client, flags);
 	if (!dc->syncpt)
 		dev_warn(dc->dev, "failed to allocate syncpoint\n");
@@ -2077,9 +2094,9 @@ static int tegra_dc_init(struct host1x_client *client)
 
 	/*
 	 * Inherit the DMA parameters (such as maximum segment size) from the
-	 * parent device.
+	 * parent host1x device.
 	 */
-	client->dev->dma_parms = client->parent->dma_parms;
+	client->dev->dma_parms = client->host->dma_parms;
 
 	return 0;
 
@@ -2121,9 +2138,74 @@ static int tegra_dc_exit(struct host1x_client *client)
 	return 0;
 }
 
+static int tegra_dc_runtime_suspend(struct host1x_client *client)
+{
+	struct tegra_dc *dc = host1x_client_to_dc(client);
+	struct device *dev = client->dev;
+	int err;
+
+	err = reset_control_assert(dc->rst);
+	if (err < 0) {
+		dev_err(dev, "failed to assert reset: %d\n", err);
+		return err;
+	}
+
+	if (dc->soc->has_powergate)
+		tegra_powergate_power_off(dc->powergate);
+
+	clk_disable_unprepare(dc->clk);
+	pm_runtime_put_sync(dev);
+
+	return 0;
+}
+
+static int tegra_dc_runtime_resume(struct host1x_client *client)
+{
+	struct tegra_dc *dc = host1x_client_to_dc(client);
+	struct device *dev = client->dev;
+	int err;
+
+	err = pm_runtime_get_sync(dev);
+	if (err < 0) {
+		dev_err(dev, "failed to get runtime PM: %d\n", err);
+		return err;
+	}
+
+	if (dc->soc->has_powergate) {
+		err = tegra_powergate_sequence_power_up(dc->powergate, dc->clk,
+							dc->rst);
+		if (err < 0) {
+			dev_err(dev, "failed to power partition: %d\n", err);
+			goto put_rpm;
+		}
+	} else {
+		err = clk_prepare_enable(dc->clk);
+		if (err < 0) {
+			dev_err(dev, "failed to enable clock: %d\n", err);
+			goto put_rpm;
+		}
+
+		err = reset_control_deassert(dc->rst);
+		if (err < 0) {
+			dev_err(dev, "failed to deassert reset: %d\n", err);
+			goto disable_clk;
+		}
+	}
+
+	return 0;
+
+disable_clk:
+	clk_disable_unprepare(dc->clk);
+put_rpm:
+	pm_runtime_put_sync(dev);
+	return err;
+}
+
 static const struct host1x_client_ops dc_client_ops = {
 	.init = tegra_dc_init,
 	.exit = tegra_dc_exit,
+	.suspend = tegra_dc_runtime_suspend,
+	.resume = tegra_dc_runtime_resume,
 };
 
 static const struct tegra_dc_soc_info tegra20_dc_soc_info = {
@@ -2535,65 +2617,10 @@ static int tegra_dc_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM
-static int tegra_dc_suspend(struct device *dev)
-{
-	struct tegra_dc *dc = dev_get_drvdata(dev);
-	int err;
-
-	err = reset_control_assert(dc->rst);
-	if (err < 0) {
-		dev_err(dev, "failed to assert reset: %d\n", err);
-		return err;
-	}
-
-	if (dc->soc->has_powergate)
-		tegra_powergate_power_off(dc->powergate);
-
-	clk_disable_unprepare(dc->clk);
-
-	return 0;
-}
-
-static int tegra_dc_resume(struct device *dev)
-{
-	struct tegra_dc *dc = dev_get_drvdata(dev);
-	int err;
-
-	if (dc->soc->has_powergate) {
-		err = tegra_powergate_sequence_power_up(dc->powergate, dc->clk,
-							dc->rst);
-		if (err < 0) {
-			dev_err(dev, "failed to power partition: %d\n", err);
-			return err;
-		}
-	} else {
-		err = clk_prepare_enable(dc->clk);
-		if (err < 0) {
-			dev_err(dev, "failed to enable clock: %d\n", err);
-			return err;
-		}
-
-		err = reset_control_deassert(dc->rst);
-		if (err < 0) {
-			dev_err(dev, "failed to deassert reset: %d\n", err);
-			return err;
-		}
-	}
-
-	return 0;
-}
-#endif
-
-static const struct dev_pm_ops tegra_dc_pm_ops = {
-	SET_RUNTIME_PM_OPS(tegra_dc_suspend, tegra_dc_resume, NULL)
-};
-
 struct platform_driver tegra_dc_driver = {
 	.driver = {
 		.name = "tegra-dc",
 		.of_match_table = tegra_dc_of_match,
-		.pm = &tegra_dc_pm_ops,
 	},
 	.probe = tegra_dc_probe,
 	.remove = tegra_dc_remove,
diff --git a/drivers/gpu/drm/tegra/dpaux.c b/drivers/gpu/drm/tegra/dpaux.c
index 622cdf1ad246..7dfb50f65067 100644
--- a/drivers/gpu/drm/tegra/dpaux.c
+++ b/drivers/gpu/drm/tegra/dpaux.c
@@ -588,7 +588,7 @@ static int tegra_dpaux_remove(struct platform_device *pdev)
 	/* make sure pads are powered down when not in use */
 	tegra_dpaux_pad_power_down(dpaux);
 
-	pm_runtime_put(&pdev->dev);
+	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 
 	drm_dp_aux_unregister(&dpaux->aux);
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index f455ce71e85d..aa9e49f04988 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -905,7 +905,7 @@ int tegra_drm_unregister_client(struct tegra_drm *tegra,
 int host1x_client_iommu_attach(struct host1x_client *client)
 {
 	struct iommu_domain *domain = iommu_get_domain_for_dev(client->dev);
-	struct drm_device *drm = dev_get_drvdata(client->parent);
+	struct drm_device *drm = dev_get_drvdata(client->host);
 	struct tegra_drm *tegra = drm->dev_private;
 	struct iommu_group *group = NULL;
 	int err;
@@ -941,7 +941,7 @@ int host1x_client_iommu_attach(struct host1x_client *client)
 
 void host1x_client_iommu_detach(struct host1x_client *client)
 {
-	struct drm_device *drm = dev_get_drvdata(client->parent);
+	struct drm_device *drm = dev_get_drvdata(client->host);
 	struct tegra_drm *tegra = drm->dev_private;
 	struct iommu_domain *domain;
 
diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index d941553f7a3d..ed99b67deb29 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -144,6 +144,8 @@ int tegra_output_init(struct drm_device *drm, struct tegra_output *output);
 void tegra_output_exit(struct tegra_output *output);
 void tegra_output_find_possible_crtcs(struct tegra_output *output,
 				      struct drm_device *drm);
+int tegra_output_suspend(struct tegra_output *output);
+int tegra_output_resume(struct tegra_output *output);
 
 int tegra_output_connector_get_modes(struct drm_connector *connector);
 enum drm_connector_status
diff --git a/drivers/gpu/drm/tegra/dsi.c b/drivers/gpu/drm/tegra/dsi.c
index a5d47e301c5f..88b9d64c77bf 100644
--- a/drivers/gpu/drm/tegra/dsi.c
+++ b/drivers/gpu/drm/tegra/dsi.c
@@ -840,7 +840,9 @@ static void tegra_dsi_unprepare(struct tegra_dsi *dsi)
 		dev_err(dsi->dev, "failed to disable MIPI calibration: %d\n",
 			err);
 
-	pm_runtime_put(dsi->dev);
+	err = host1x_client_suspend(&dsi->client);
+	if (err < 0)
+		dev_err(dsi->dev, "failed to suspend: %d\n", err);
 }
 
 static void tegra_dsi_encoder_disable(struct drm_encoder *encoder)
@@ -882,11 +884,15 @@ static void tegra_dsi_encoder_disable(struct drm_encoder *encoder)
 	tegra_dsi_unprepare(dsi);
 }
 
-static void tegra_dsi_prepare(struct tegra_dsi *dsi)
+static int tegra_dsi_prepare(struct tegra_dsi *dsi)
 {
 	int err;
 
-	pm_runtime_get_sync(dsi->dev);
+	err = host1x_client_resume(&dsi->client);
+	if (err < 0) {
+		dev_err(dsi->dev, "failed to resume: %d\n", err);
+		return err;
+	}
 
 	err = tegra_mipi_enable(dsi->mipi);
 	if (err < 0)
@@ -899,6 +905,8 @@ static void tegra_dsi_prepare(struct tegra_dsi *dsi)
 
 	if (dsi->slave)
 		tegra_dsi_prepare(dsi->slave);
+
+	return 0;
 }
 
 static void tegra_dsi_encoder_enable(struct drm_encoder *encoder)
@@ -909,8 +917,13 @@ static void tegra_dsi_encoder_enable(struct drm_encoder *encoder)
 	struct tegra_dsi *dsi = to_dsi(output);
 	struct tegra_dsi_state *state;
 	u32 value;
+	int err;
 
-	tegra_dsi_prepare(dsi);
+	err = tegra_dsi_prepare(dsi);
+	if (err < 0) {
+		dev_err(dsi->dev, "failed to prepare: %d\n", err);
+		return;
+	}
 
 	state = tegra_dsi_get_state(dsi);
 
@@ -1030,7 +1043,7 @@ static const struct drm_encoder_helper_funcs tegra_dsi_encoder_helper_funcs = {
 
 static int tegra_dsi_init(struct host1x_client *client)
 {
-	struct drm_device *drm = dev_get_drvdata(client->parent);
+	struct drm_device *drm = dev_get_drvdata(client->host);
 	struct tegra_dsi *dsi = host1x_client_to_dsi(client);
 	int err;
 
@@ -1075,9 +1088,89 @@ static int tegra_dsi_exit(struct host1x_client *client)
 	return 0;
 }
 
+static int tegra_dsi_runtime_suspend(struct host1x_client *client)
+{
+	struct tegra_dsi *dsi = host1x_client_to_dsi(client);
+	struct device *dev = client->dev;
+	int err;
+
+	if (dsi->rst) {
+		err = reset_control_assert(dsi->rst);
+		if (err < 0) {
+			dev_err(dev, "failed to assert reset: %d\n", err);
+			return err;
+		}
+	}
+
+	usleep_range(1000, 2000);
+
+	clk_disable_unprepare(dsi->clk_lp);
+	clk_disable_unprepare(dsi->clk);
+
+	regulator_disable(dsi->vdd);
+	pm_runtime_put_sync(dev);
+
+	return 0;
+}
+
+static int tegra_dsi_runtime_resume(struct host1x_client *client)
+{
+	struct tegra_dsi *dsi = host1x_client_to_dsi(client);
+	struct device *dev = client->dev;
+	int err;
+
+	err = pm_runtime_get_sync(dev);
+	if (err < 0) {
+		dev_err(dev, "failed to get runtime PM: %d\n", err);
+		return err;
+	}
+
+	err = regulator_enable(dsi->vdd);
+	if (err < 0) {
+		dev_err(dev, "failed to enable VDD supply: %d\n", err);
+		goto put_rpm;
+	}
+
+	err = clk_prepare_enable(dsi->clk);
+	if (err < 0) {
+		dev_err(dev, "cannot enable DSI clock: %d\n", err);
+		goto disable_vdd;
+	}
+
+	err = clk_prepare_enable(dsi->clk_lp);
+	if (err < 0) {
+		dev_err(dev, "cannot enable low-power clock: %d\n", err);
+		goto disable_clk;
+	}
+
+	usleep_range(1000, 2000);
+
+	if (dsi->rst) {
+		err = reset_control_deassert(dsi->rst);
+		if (err < 0) {
+			dev_err(dev, "cannot assert reset: %d\n", err);
+			goto disable_clk_lp;
+		}
+	}
+
+	return 0;
+
+disable_clk_lp:
+	clk_disable_unprepare(dsi->clk_lp);
+disable_clk:
+	clk_disable_unprepare(dsi->clk);
+disable_vdd:
+	regulator_disable(dsi->vdd);
+put_rpm:
+	pm_runtime_put_sync(dev);
+	return err;
+}
+
 static const struct host1x_client_ops dsi_client_ops = {
 	.init = tegra_dsi_init,
 	.exit = tegra_dsi_exit,
+	.suspend = tegra_dsi_runtime_suspend,
+	.resume = tegra_dsi_runtime_resume,
 };
 
 static int tegra_dsi_setup_clocks(struct tegra_dsi *dsi)
@@ -1596,79 +1689,6 @@ static int tegra_dsi_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM
-static int tegra_dsi_suspend(struct device *dev)
-{
-	struct tegra_dsi *dsi = dev_get_drvdata(dev);
-	int err;
-
-	if (dsi->rst) {
-		err = reset_control_assert(dsi->rst);
-		if (err < 0) {
-			dev_err(dev, "failed to assert reset: %d\n", err);
-			return err;
-		}
-	}
-
-	usleep_range(1000, 2000);
-
-	clk_disable_unprepare(dsi->clk_lp);
-	clk_disable_unprepare(dsi->clk);
-
-	regulator_disable(dsi->vdd);
-
-	return 0;
-}
-
-static int tegra_dsi_resume(struct device *dev)
-{
-	struct tegra_dsi *dsi = dev_get_drvdata(dev);
-	int err;
-
-	err = regulator_enable(dsi->vdd);
-	if (err < 0) {
-		dev_err(dsi->dev, "failed to enable VDD supply: %d\n", err);
-		return err;
-	}
-
-	err = clk_prepare_enable(dsi->clk);
-	if (err < 0) {
-		dev_err(dev, "cannot enable DSI clock: %d\n", err);
-		goto disable_vdd;
-	}
-
-	err = clk_prepare_enable(dsi->clk_lp);
-	if (err < 0) {
-		dev_err(dev, "cannot enable low-power clock: %d\n", err);
-		goto disable_clk;
-	}
-
-	usleep_range(1000, 2000);
-
-	if (dsi->rst) {
-		err = reset_control_deassert(dsi->rst);
-		if (err < 0) {
-			dev_err(dev, "cannot assert reset: %d\n", err);
-			goto disable_clk_lp;
-		}
-	}
-
-	return 0;
-
-disable_clk_lp:
-	clk_disable_unprepare(dsi->clk_lp);
-disable_clk:
-	clk_disable_unprepare(dsi->clk);
-disable_vdd:
-	regulator_disable(dsi->vdd);
-	return err;
-}
-#endif
-
-static const struct dev_pm_ops tegra_dsi_pm_ops = {
-	SET_RUNTIME_PM_OPS(tegra_dsi_suspend, tegra_dsi_resume, NULL)
-};
-
 static const struct of_device_id tegra_dsi_of_match[] = {
 	{ .compatible = "nvidia,tegra210-dsi", },
 	{ .compatible = "nvidia,tegra132-dsi", },
@@ -1682,7 +1702,6 @@ struct platform_driver tegra_dsi_driver = {
 	.driver = {
 		.name = "tegra-dsi",
 		.of_match_table = tegra_dsi_of_match,
-		.pm = &tegra_dsi_pm_ops,
 	},
 	.probe = tegra_dsi_probe,
 	.remove = tegra_dsi_remove,
diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c
index 1fc4e56c7cc5..48363f744bb9 100644
--- a/drivers/gpu/drm/tegra/gr2d.c
+++ b/drivers/gpu/drm/tegra/gr2d.c
@@ -34,7 +34,7 @@ static inline struct gr2d *to_gr2d(struct tegra_drm_client *client)
 static int gr2d_init(struct host1x_client *client)
 {
 	struct tegra_drm_client *drm = host1x_to_drm_client(client);
-	struct drm_device *dev = dev_get_drvdata(client->parent);
+	struct drm_device *dev = dev_get_drvdata(client->host);
 	unsigned long flags = HOST1X_SYNCPT_HAS_BASE;
 	struct gr2d *gr2d = to_gr2d(drm);
 	int err;
@@ -76,7 +76,7 @@ put:
 static int gr2d_exit(struct host1x_client *client)
 {
 	struct tegra_drm_client *drm = host1x_to_drm_client(client);
-	struct drm_device *dev = dev_get_drvdata(client->parent);
+	struct drm_device *dev = dev_get_drvdata(client->host);
 	struct tegra_drm *tegra = dev->dev_private;
 	struct gr2d *gr2d = to_gr2d(drm);
 	int err;
diff --git a/drivers/gpu/drm/tegra/gr3d.c b/drivers/gpu/drm/tegra/gr3d.c
index 24fae0f64032..c0a528be0369 100644
--- a/drivers/gpu/drm/tegra/gr3d.c
+++ b/drivers/gpu/drm/tegra/gr3d.c
@@ -43,7 +43,7 @@ static inline struct gr3d *to_gr3d(struct tegra_drm_client *client)
 static int gr3d_init(struct host1x_client *client)
 {
 	struct tegra_drm_client *drm = host1x_to_drm_client(client);
-	struct drm_device *dev = dev_get_drvdata(client->parent);
+	struct drm_device *dev = dev_get_drvdata(client->host);
 	unsigned long flags = HOST1X_SYNCPT_HAS_BASE;
 	struct gr3d *gr3d = to_gr3d(drm);
 	int err;
@@ -85,7 +85,7 @@ put:
 static int gr3d_exit(struct host1x_client *client)
 {
 	struct tegra_drm_client *drm = host1x_to_drm_client(client);
-	struct drm_device *dev = dev_get_drvdata(client->parent);
+	struct drm_device *dev = dev_get_drvdata(client->host);
 	struct gr3d *gr3d = to_gr3d(drm);
 	int err;
 
diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c
index 21a629adcb51..6f117628f257 100644
--- a/drivers/gpu/drm/tegra/hdmi.c
+++ b/drivers/gpu/drm/tegra/hdmi.c
@@ -1146,6 +1146,7 @@ static void tegra_hdmi_encoder_disable(struct drm_encoder *encoder)
 	struct tegra_dc *dc = to_tegra_dc(encoder->crtc);
 	struct tegra_hdmi *hdmi = to_hdmi(output);
 	u32 value;
+	int err;
 
 	/*
 	 * The following accesses registers of the display controller, so make
@@ -1171,7 +1172,9 @@ static void tegra_hdmi_encoder_disable(struct drm_encoder *encoder)
 	tegra_hdmi_writel(hdmi, 0, HDMI_NV_PDISP_INT_ENABLE);
 	tegra_hdmi_writel(hdmi, 0, HDMI_NV_PDISP_INT_MASK);
 
-	pm_runtime_put(hdmi->dev);
+	err = host1x_client_suspend(&hdmi->client);
+	if (err < 0)
+		dev_err(hdmi->dev, "failed to suspend: %d\n", err);
 }
 
 static void tegra_hdmi_encoder_enable(struct drm_encoder *encoder)
@@ -1186,7 +1189,11 @@ static void tegra_hdmi_encoder_enable(struct drm_encoder *encoder)
 	u32 value;
 	int err;
 
-	pm_runtime_get_sync(hdmi->dev);
+	err = host1x_client_resume(&hdmi->client);
+	if (err < 0) {
+		dev_err(hdmi->dev, "failed to resume: %d\n", err);
+		return;
+	}
 
 	/*
 	 * Enable and unmask the HDA codec SCRATCH0 register interrupt. This
@@ -1424,8 +1431,8 @@ static const struct drm_encoder_helper_funcs tegra_hdmi_encoder_helper_funcs = {
 
 static int tegra_hdmi_init(struct host1x_client *client)
 {
-	struct drm_device *drm = dev_get_drvdata(client->parent);
 	struct tegra_hdmi *hdmi = host1x_client_to_hdmi(client);
+	struct drm_device *drm = dev_get_drvdata(client->host);
 	int err;
 
 	hdmi->output.dev = client->dev;
@@ -1490,9 +1497,66 @@ static int tegra_hdmi_exit(struct host1x_client *client)
 	return 0;
 }
 
+static int tegra_hdmi_runtime_suspend(struct host1x_client *client)
+{
+	struct tegra_hdmi *hdmi = host1x_client_to_hdmi(client);
+	struct device *dev = client->dev;
+	int err;
+
+	err = reset_control_assert(hdmi->rst);
+	if (err < 0) {
+		dev_err(dev, "failed to assert reset: %d\n", err);
+		return err;
+	}
+
+	usleep_range(1000, 2000);
+
+	clk_disable_unprepare(hdmi->clk);
+	pm_runtime_put_sync(dev);
+
+	return 0;
+}
+
+static int tegra_hdmi_runtime_resume(struct host1x_client *client)
+{
+	struct tegra_hdmi *hdmi = host1x_client_to_hdmi(client);
+	struct device *dev = client->dev;
+	int err;
+
+	err = pm_runtime_get_sync(dev);
+	if (err < 0) {
+		dev_err(dev, "failed to get runtime PM: %d\n", err);
+		return err;
+	}
+
+	err = clk_prepare_enable(hdmi->clk);
+	if (err < 0) {
+		dev_err(dev, "failed to enable clock: %d\n", err);
+		goto put_rpm;
+	}
+
+	usleep_range(1000, 2000);
+
+	err = reset_control_deassert(hdmi->rst);
+	if (err < 0) {
+		dev_err(dev, "failed to deassert reset: %d\n", err);
+		goto disable_clk;
+	}
+
+	return 0;
+
+disable_clk:
+	clk_disable_unprepare(hdmi->clk);
+put_rpm:
+	pm_runtime_put_sync(dev);
+	return err;
+}
+
 static const struct host1x_client_ops hdmi_client_ops = {
 	.init = tegra_hdmi_init,
 	.exit = tegra_hdmi_exit,
+	.suspend = tegra_hdmi_runtime_suspend,
+	.resume = tegra_hdmi_runtime_resume,
 };
 
 static const struct tegra_hdmi_config tegra20_hdmi_config = {
@@ -1700,58 +1764,10 @@ static int tegra_hdmi_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM
-static int tegra_hdmi_suspend(struct device *dev)
-{
-	struct tegra_hdmi *hdmi = dev_get_drvdata(dev);
-	int err;
-
-	err = reset_control_assert(hdmi->rst);
-	if (err < 0) {
-		dev_err(dev, "failed to assert reset: %d\n", err);
-		return err;
-	}
-
-	usleep_range(1000, 2000);
-
-	clk_disable_unprepare(hdmi->clk);
-
-	return 0;
-}
-
-static int tegra_hdmi_resume(struct device *dev)
-{
-	struct tegra_hdmi *hdmi = dev_get_drvdata(dev);
-	int err;
-
-	err = clk_prepare_enable(hdmi->clk);
-	if (err < 0) {
-		dev_err(dev, "failed to enable clock: %d\n", err);
-		return err;
-	}
-
-	usleep_range(1000, 2000);
-
-	err = reset_control_deassert(hdmi->rst);
-	if (err < 0) {
-		dev_err(dev, "failed to deassert reset: %d\n", err);
-		clk_disable_unprepare(hdmi->clk);
-		return err;
-	}
-
-	return 0;
-}
-#endif
-
-static const struct dev_pm_ops tegra_hdmi_pm_ops = {
-	SET_RUNTIME_PM_OPS(tegra_hdmi_suspend, tegra_hdmi_resume, NULL)
-};
-
 struct platform_driver tegra_hdmi_driver = {
 	.driver = {
 		.name = "tegra-hdmi",
 		.of_match_table = tegra_hdmi_of_match,
-		.pm = &tegra_hdmi_pm_ops,
 	},
 	.probe = tegra_hdmi_probe,
 	.remove = tegra_hdmi_remove,
diff --git a/drivers/gpu/drm/tegra/hub.c b/drivers/gpu/drm/tegra/hub.c
index 47d985ac7cd7..8183e617bf6b 100644
--- a/drivers/gpu/drm/tegra/hub.c
+++ b/drivers/gpu/drm/tegra/hub.c
@@ -95,17 +95,25 @@ static inline void tegra_plane_writel(struct tegra_plane *plane, u32 value,
 
 static int tegra_windowgroup_enable(struct tegra_windowgroup *wgrp)
 {
+	int err = 0;
+
 	mutex_lock(&wgrp->lock);
 
 	if (wgrp->usecount == 0) {
-		pm_runtime_get_sync(wgrp->parent);
+		err = host1x_client_resume(wgrp->parent);
+		if (err < 0) {
+			dev_err(wgrp->parent->dev, "failed to resume: %d\n", err);
+			goto unlock;
+		}
+
 		reset_control_deassert(wgrp->rst);
 	}
 
 	wgrp->usecount++;
-	mutex_unlock(&wgrp->lock);
 
-	return 0;
+unlock:
+	mutex_unlock(&wgrp->lock);
+	return err;
 }
 
 static void tegra_windowgroup_disable(struct tegra_windowgroup *wgrp)
@@ -121,7 +129,7 @@ static void tegra_windowgroup_disable(struct tegra_windowgroup *wgrp)
 			       wgrp->index);
 		}
 
-		pm_runtime_put(wgrp->parent);
+		host1x_client_suspend(wgrp->parent);
 	}
 
 	wgrp->usecount--;
@@ -379,6 +387,7 @@ static void tegra_shared_plane_atomic_disable(struct drm_plane *plane,
 	struct tegra_plane *p = to_tegra_plane(plane);
 	struct tegra_dc *dc;
 	u32 value;
+	int err;
 
 	/* rien ne va plus */
 	if (!old_state || !old_state->crtc)
@@ -386,6 +395,12 @@ static void tegra_shared_plane_atomic_disable(struct drm_plane *plane,
 
 	dc = to_tegra_dc(old_state->crtc);
 
+	err = host1x_client_resume(&dc->client);
+	if (err < 0) {
+		dev_err(dc->dev, "failed to resume: %d\n", err);
+		return;
+	}
+
 	/*
 	 * XXX Legacy helpers seem to sometimes call ->atomic_disable() even
 	 * on planes that are already disabled. Make sure we fallback to the
@@ -394,15 +409,13 @@ static void tegra_shared_plane_atomic_disable(struct drm_plane *plane,
 	if (WARN_ON(p->dc == NULL))
 		p->dc = dc;
 
-	pm_runtime_get_sync(dc->dev);
-
 	value = tegra_plane_readl(p, DC_WIN_WIN_OPTIONS);
 	value &= ~WIN_ENABLE;
 	tegra_plane_writel(p, value, DC_WIN_WIN_OPTIONS);
 
 	tegra_dc_remove_shared_plane(dc, p);
 
-	pm_runtime_put(dc->dev);
+	host1x_client_suspend(&dc->client);
 }
 
 static void tegra_shared_plane_atomic_update(struct drm_plane *plane,
@@ -415,6 +428,7 @@ static void tegra_shared_plane_atomic_update(struct drm_plane *plane,
 	struct tegra_plane *p = to_tegra_plane(plane);
 	dma_addr_t base;
 	u32 value;
+	int err;
 
 	/* rien ne va plus */
 	if (!plane->state->crtc || !plane->state->fb)
@@ -425,7 +439,11 @@ static void tegra_shared_plane_atomic_update(struct drm_plane *plane,
 		return;
 	}
 
-	pm_runtime_get_sync(dc->dev);
+	err = host1x_client_resume(&dc->client);
+	if (err < 0) {
+		dev_err(dc->dev, "failed to resume: %d\n", err);
+		return;
+	}
 
 	tegra_dc_assign_shared_plane(dc, p);
 
@@ -515,7 +533,7 @@ static void tegra_shared_plane_atomic_update(struct drm_plane *plane,
 	value &= ~CONTROL_CSC_ENABLE;
 	tegra_plane_writel(p, value, DC_WIN_WINDOW_SET_CONTROL);
 
-	pm_runtime_put(dc->dev);
+	host1x_client_suspend(&dc->client);
 }
 
 static const struct drm_plane_helper_funcs tegra_shared_plane_helper_funcs = {
@@ -551,7 +569,7 @@ struct drm_plane *tegra_shared_plane_create(struct drm_device *drm,
 	plane->base.index = index;
 
 	plane->wgrp = &hub->wgrps[wgrp];
-	plane->wgrp->parent = dc->dev;
+	plane->wgrp->parent = &dc->client;
 
 	p = &plane->base.base;
 
@@ -656,8 +674,13 @@ int tegra_display_hub_atomic_check(struct drm_device *drm,
 static void tegra_display_hub_update(struct tegra_dc *dc)
 {
 	u32 value;
+	int err;
 
-	pm_runtime_get_sync(dc->dev);
+	err = host1x_client_resume(&dc->client);
+	if (err < 0) {
+		dev_err(dc->dev, "failed to resume: %d\n", err);
+		return;
+	}
 
 	value = tegra_dc_readl(dc, DC_CMD_IHUB_COMMON_MISC_CTL);
 	value &= ~LATENCY_EVENT;
@@ -672,7 +695,7 @@ static void tegra_display_hub_update(struct tegra_dc *dc)
 	tegra_dc_writel(dc, COMMON_ACTREQ, DC_CMD_STATE_CONTROL);
 	tegra_dc_readl(dc, DC_CMD_STATE_CONTROL);
 
-	pm_runtime_put(dc->dev);
+	host1x_client_suspend(&dc->client);
 }
 
 void tegra_display_hub_atomic_commit(struct drm_device *drm,
@@ -705,7 +728,7 @@ void tegra_display_hub_atomic_commit(struct drm_device *drm,
 static int tegra_display_hub_init(struct host1x_client *client)
 {
 	struct tegra_display_hub *hub = to_tegra_display_hub(client);
-	struct drm_device *drm = dev_get_drvdata(client->parent);
+	struct drm_device *drm = dev_get_drvdata(client->host);
 	struct tegra_drm *tegra = drm->dev_private;
 	struct tegra_display_hub_state *state;
 
@@ -723,7 +746,7 @@ static int tegra_display_hub_init(struct host1x_client *client)
 
 static int tegra_display_hub_exit(struct host1x_client *client)
 {
-	struct drm_device *drm = dev_get_drvdata(client->parent);
+	struct drm_device *drm = dev_get_drvdata(client->host);
 	struct tegra_drm *tegra = drm->dev_private;
 
 	drm_atomic_private_obj_fini(&tegra->hub->base);
@@ -732,9 +755,85 @@ static int tegra_display_hub_exit(struct host1x_client *client)
 	return 0;
 }
 
+static int tegra_display_hub_runtime_suspend(struct host1x_client *client)
+{
+	struct tegra_display_hub *hub = to_tegra_display_hub(client);
+	struct device *dev = client->dev;
+	unsigned int i = hub->num_heads;
+	int err;
+
+	err = reset_control_assert(hub->rst);
+	if (err < 0)
+		return err;
+
+	while (i--)
+		clk_disable_unprepare(hub->clk_heads[i]);
+
+	clk_disable_unprepare(hub->clk_hub);
+	clk_disable_unprepare(hub->clk_dsc);
+	clk_disable_unprepare(hub->clk_disp);
+
+	pm_runtime_put_sync(dev);
+
+	return 0;
+}
+
+static int tegra_display_hub_runtime_resume(struct host1x_client *client)
+{
+	struct tegra_display_hub *hub = to_tegra_display_hub(client);
+	struct device *dev = client->dev;
+	unsigned int i;
+	int err;
+
+	err = pm_runtime_get_sync(dev);
+	if (err < 0) {
+		dev_err(dev, "failed to get runtime PM: %d\n", err);
+		return err;
+	}
+
+	err = clk_prepare_enable(hub->clk_disp);
+	if (err < 0)
+		goto put_rpm;
+
+	err = clk_prepare_enable(hub->clk_dsc);
+	if (err < 0)
+		goto disable_disp;
+
+	err = clk_prepare_enable(hub->clk_hub);
+	if (err < 0)
+		goto disable_dsc;
+
+	for (i = 0; i < hub->num_heads; i++) {
+		err = clk_prepare_enable(hub->clk_heads[i]);
+		if (err < 0)
+			goto disable_heads;
+	}
+
+	err = reset_control_deassert(hub->rst);
+	if (err < 0)
+		goto disable_heads;
+
+	return 0;
+
+disable_heads:
+	while (i--)
+		clk_disable_unprepare(hub->clk_heads[i]);
+
+	clk_disable_unprepare(hub->clk_hub);
+disable_dsc:
+	clk_disable_unprepare(hub->clk_dsc);
+disable_disp:
+	clk_disable_unprepare(hub->clk_disp);
+put_rpm:
+	pm_runtime_put_sync(dev);
+	return err;
+}
+
 static const struct host1x_client_ops tegra_display_hub_ops = {
 	.init = tegra_display_hub_init,
 	.exit = tegra_display_hub_exit,
+	.suspend = tegra_display_hub_runtime_suspend,
+	.resume = tegra_display_hub_runtime_resume,
 };
 
 static int tegra_display_hub_probe(struct platform_device *pdev)
@@ -851,6 +950,7 @@ static int tegra_display_hub_probe(struct platform_device *pdev)
 static int tegra_display_hub_remove(struct platform_device *pdev)
 {
 	struct tegra_display_hub *hub = platform_get_drvdata(pdev);
+	unsigned int i;
 	int err;
 
 	err = host1x_client_unregister(&hub->client);
@@ -859,78 +959,17 @@ static int tegra_display_hub_remove(struct platform_device *pdev)
 			err);
 	}
 
-	pm_runtime_disable(&pdev->dev);
-
-	return err;
-}
-
-static int __maybe_unused tegra_display_hub_suspend(struct device *dev)
-{
-	struct tegra_display_hub *hub = dev_get_drvdata(dev);
-	unsigned int i = hub->num_heads;
-	int err;
-
-	err = reset_control_assert(hub->rst);
-	if (err < 0)
-		return err;
-
-	while (i--)
-		clk_disable_unprepare(hub->clk_heads[i]);
-
-	clk_disable_unprepare(hub->clk_hub);
-	clk_disable_unprepare(hub->clk_dsc);
-	clk_disable_unprepare(hub->clk_disp);
-
-	return 0;
-}
-
-static int __maybe_unused tegra_display_hub_resume(struct device *dev)
-{
-	struct tegra_display_hub *hub = dev_get_drvdata(dev);
-	unsigned int i;
-	int err;
-
-	err = clk_prepare_enable(hub->clk_disp);
-	if (err < 0)
-		return err;
-
-	err = clk_prepare_enable(hub->clk_dsc);
-	if (err < 0)
-		goto disable_disp;
-
-	err = clk_prepare_enable(hub->clk_hub);
-	if (err < 0)
-		goto disable_dsc;
+	for (i = 0; i < hub->soc->num_wgrps; i++) {
+		struct tegra_windowgroup *wgrp = &hub->wgrps[i];
 
-	for (i = 0; i < hub->num_heads; i++) {
-		err = clk_prepare_enable(hub->clk_heads[i]);
-		if (err < 0)
-			goto disable_heads;
+		mutex_destroy(&wgrp->lock);
 	}
 
-	err = reset_control_deassert(hub->rst);
-	if (err < 0)
-		goto disable_heads;
-
-	return 0;
-
-disable_heads:
-	while (i--)
-		clk_disable_unprepare(hub->clk_heads[i]);
+	pm_runtime_disable(&pdev->dev);
 
-	clk_disable_unprepare(hub->clk_hub);
-disable_dsc:
-	clk_disable_unprepare(hub->clk_dsc);
-disable_disp:
-	clk_disable_unprepare(hub->clk_disp);
 	return err;
 }
 
-static const struct dev_pm_ops tegra_display_hub_pm_ops = {
-	SET_RUNTIME_PM_OPS(tegra_display_hub_suspend,
-			   tegra_display_hub_resume, NULL)
-};
-
 static const struct tegra_display_hub_soc tegra186_display_hub = {
 	.num_wgrps = 6,
 	.supports_dsc = true,
@@ -958,7 +997,6 @@ struct platform_driver tegra_display_hub_driver = {
 	.driver = {
 		.name = "tegra-display-hub",
 		.of_match_table = tegra_display_hub_of_match,
-		.pm = &tegra_display_hub_pm_ops,
 	},
 	.probe = tegra_display_hub_probe,
 	.remove = tegra_display_hub_remove,
diff --git a/drivers/gpu/drm/tegra/hub.h b/drivers/gpu/drm/tegra/hub.h
index 767a60d9313c..3efa1be07ff8 100644
--- a/drivers/gpu/drm/tegra/hub.h
+++ b/drivers/gpu/drm/tegra/hub.h
@@ -17,7 +17,7 @@ struct tegra_windowgroup {
 	struct mutex lock;
 
 	unsigned int index;
-	struct device *parent;
+	struct host1x_client *parent;
 	struct reset_control *rst;
 };
 
diff --git a/drivers/gpu/drm/tegra/output.c b/drivers/gpu/drm/tegra/output.c
index 80ddde4adbae..a264259b97a2 100644
--- a/drivers/gpu/drm/tegra/output.c
+++ b/drivers/gpu/drm/tegra/output.c
@@ -250,3 +250,19 @@ void tegra_output_find_possible_crtcs(struct tegra_output *output,
 
 	output->encoder.possible_crtcs = mask;
 }
+
+int tegra_output_suspend(struct tegra_output *output)
+{
+	if (output->hpd_irq)
+		disable_irq(output->hpd_irq);
+
+	return 0;
+}
+
+int tegra_output_resume(struct tegra_output *output)
+{
+	if (output->hpd_irq)
+		enable_irq(output->hpd_irq);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index 1b8087d2dafe..41d24949478e 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -2255,7 +2255,7 @@ static void tegra_sor_hdmi_disable(struct drm_encoder *encoder)
 	if (err < 0)
 		dev_err(sor->dev, "failed to power off I/O pad: %d\n", err);
 
-	pm_runtime_put(sor->dev);
+	host1x_client_suspend(&sor->client);
 }
 
 static void tegra_sor_hdmi_enable(struct drm_encoder *encoder)
@@ -2276,7 +2276,11 @@ static void tegra_sor_hdmi_enable(struct drm_encoder *encoder)
 	mode = &encoder->crtc->state->adjusted_mode;
 	pclk = mode->clock * 1000;
 
-	pm_runtime_get_sync(sor->dev);
+	err = host1x_client_resume(&sor->client);
+	if (err < 0) {
+		dev_err(sor->dev, "failed to resume: %d\n", err);
+		return;
+	}
 
 	/* switch to safe parent clock */
 	err = tegra_sor_set_parent_clock(sor, sor->clk_safe);
@@ -2722,7 +2726,7 @@ static void tegra_sor_dp_disable(struct drm_encoder *encoder)
 	if (output->panel)
 		drm_panel_unprepare(output->panel);
 
-	pm_runtime_put(sor->dev);
+	host1x_client_suspend(&sor->client);
 }
 
 static void tegra_sor_dp_enable(struct drm_encoder *encoder)
@@ -2742,7 +2746,11 @@ static void tegra_sor_dp_enable(struct drm_encoder *encoder)
 	mode = &encoder->crtc->state->adjusted_mode;
 	info = &output->connector.display_info;
 
-	pm_runtime_get_sync(sor->dev);
+	err = host1x_client_resume(&sor->client);
+	if (err < 0) {
+		dev_err(sor->dev, "failed to resume: %d\n", err);
+		return;
+	}
 
 	/* switch to safe parent clock */
 	err = tegra_sor_set_parent_clock(sor, sor->clk_safe);
@@ -3053,7 +3061,7 @@ static const struct tegra_sor_ops tegra_sor_dp_ops = {
 
 static int tegra_sor_init(struct host1x_client *client)
 {
-	struct drm_device *drm = dev_get_drvdata(client->parent);
+	struct drm_device *drm = dev_get_drvdata(client->host);
 	const struct drm_encoder_helper_funcs *helpers = NULL;
 	struct tegra_sor *sor = host1x_client_to_sor(client);
 	int connector = DRM_MODE_CONNECTOR_Unknown;
@@ -3190,9 +3198,80 @@ static int tegra_sor_exit(struct host1x_client *client)
 	return 0;
 }
 
+static int tegra_sor_runtime_suspend(struct host1x_client *client)
+{
+	struct tegra_sor *sor = host1x_client_to_sor(client);
+	struct device *dev = client->dev;
+	int err;
+
+	if (sor->rst) {
+		err = reset_control_assert(sor->rst);
+		if (err < 0) {
+			dev_err(dev, "failed to assert reset: %d\n", err);
+			return err;
+		}
+
+		reset_control_release(sor->rst);
+	}
+
+	usleep_range(1000, 2000);
+
+	clk_disable_unprepare(sor->clk);
+	pm_runtime_put_sync(dev);
+
+	return 0;
+}
+
+static int tegra_sor_runtime_resume(struct host1x_client *client)
+{
+	struct tegra_sor *sor = host1x_client_to_sor(client);
+	struct device *dev = client->dev;
+	int err;
+
+	err = pm_runtime_get_sync(dev);
+	if (err < 0) {
+		dev_err(dev, "failed to get runtime PM: %d\n", err);
+		return err;
+	}
+
+	err = clk_prepare_enable(sor->clk);
+	if (err < 0) {
+		dev_err(dev, "failed to enable clock: %d\n", err);
+		goto put_rpm;
+	}
+
+	usleep_range(1000, 2000);
+
+	if (sor->rst) {
+		err = reset_control_acquire(sor->rst);
+		if (err < 0) {
+			dev_err(dev, "failed to acquire reset: %d\n", err);
+			goto disable_clk;
+		}
+
+		err = reset_control_deassert(sor->rst);
+		if (err < 0) {
+			dev_err(dev, "failed to deassert reset: %d\n", err);
+			goto release_reset;
+		}
+	}
+
+	return 0;
+
+release_reset:
+	reset_control_release(sor->rst);
+disable_clk:
+	clk_disable_unprepare(sor->clk);
+put_rpm:
+	pm_runtime_put_sync(dev);
+	return err;
+}
+
 static const struct host1x_client_ops sor_client_ops = {
 	.init = tegra_sor_init,
 	.exit = tegra_sor_exit,
+	.suspend = tegra_sor_runtime_suspend,
+	.resume = tegra_sor_runtime_resume,
 };
 
 static const u8 tegra124_sor_xbar_cfg[5] = {
@@ -3843,10 +3922,9 @@ static int tegra_sor_probe(struct platform_device *pdev)
 	if (!sor->clk_pad) {
 		char *name;
 
-		err = pm_runtime_get_sync(&pdev->dev);
+		err = host1x_client_resume(&sor->client);
 		if (err < 0) {
-			dev_err(&pdev->dev, "failed to get runtime PM: %d\n",
-				err);
+			dev_err(sor->dev, "failed to resume: %d\n", err);
 			goto remove;
 		}
 
@@ -3857,7 +3935,7 @@ static int tegra_sor_probe(struct platform_device *pdev)
 		}
 
 		sor->clk_pad = tegra_clk_sor_pad_register(sor, name);
-		pm_runtime_put(&pdev->dev);
+		host1x_client_suspend(&sor->client);
 	}
 
 	if (IS_ERR(sor->clk_pad)) {
@@ -3913,54 +3991,21 @@ static int tegra_sor_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static int tegra_sor_runtime_suspend(struct device *dev)
-{
-	struct tegra_sor *sor = dev_get_drvdata(dev);
-	int err;
-
-	if (sor->rst) {
-		err = reset_control_assert(sor->rst);
-		if (err < 0) {
-			dev_err(dev, "failed to assert reset: %d\n", err);
-			return err;
-		}
-
-		reset_control_release(sor->rst);
-	}
-
-	usleep_range(1000, 2000);
-
-	clk_disable_unprepare(sor->clk);
-
-	return 0;
-}
-
-static int tegra_sor_runtime_resume(struct device *dev)
+static int __maybe_unused tegra_sor_suspend(struct device *dev)
 {
 	struct tegra_sor *sor = dev_get_drvdata(dev);
 	int err;
 
-	err = clk_prepare_enable(sor->clk);
+	err = tegra_output_suspend(&sor->output);
 	if (err < 0) {
-		dev_err(dev, "failed to enable clock: %d\n", err);
+		dev_err(dev, "failed to suspend output: %d\n", err);
 		return err;
 	}
 
-	usleep_range(1000, 2000);
-
-	if (sor->rst) {
-		err = reset_control_acquire(sor->rst);
-		if (err < 0) {
-			dev_err(dev, "failed to acquire reset: %d\n", err);
-			clk_disable_unprepare(sor->clk);
-			return err;
-		}
-
-		err = reset_control_deassert(sor->rst);
+	if (sor->hdmi_supply) {
+		err = regulator_disable(sor->hdmi_supply);
 		if (err < 0) {
-			dev_err(dev, "failed to deassert reset: %d\n", err);
-			reset_control_release(sor->rst);
-			clk_disable_unprepare(sor->clk);
+			tegra_output_resume(&sor->output);
 			return err;
 		}
 	}
@@ -3968,37 +4013,31 @@ static int tegra_sor_runtime_resume(struct device *dev)
 	return 0;
 }
 
-static int tegra_sor_suspend(struct device *dev)
+static int __maybe_unused tegra_sor_resume(struct device *dev)
 {
 	struct tegra_sor *sor = dev_get_drvdata(dev);
 	int err;
 
 	if (sor->hdmi_supply) {
-		err = regulator_disable(sor->hdmi_supply);
+		err = regulator_enable(sor->hdmi_supply);
 		if (err < 0)
 			return err;
 	}
 
-	return 0;
-}
+	err = tegra_output_resume(&sor->output);
+	if (err < 0) {
+		dev_err(dev, "failed to resume output: %d\n", err);
 
-static int tegra_sor_resume(struct device *dev)
-{
-	struct tegra_sor *sor = dev_get_drvdata(dev);
-	int err;
+		if (sor->hdmi_supply)
+			regulator_disable(sor->hdmi_supply);
 
-	if (sor->hdmi_supply) {
-		err = regulator_enable(sor->hdmi_supply);
-		if (err < 0)
-			return err;
+		return err;
 	}
 
 	return 0;
 }
 
 static const struct dev_pm_ops tegra_sor_pm_ops = {
-	SET_RUNTIME_PM_OPS(tegra_sor_runtime_suspend, tegra_sor_runtime_resume,
-			   NULL)
 	SET_SYSTEM_SLEEP_PM_OPS(tegra_sor_suspend, tegra_sor_resume)
 };
 
diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c
index 3526c2892ddb..ade56b860cf9 100644
--- a/drivers/gpu/drm/tegra/vic.c
+++ b/drivers/gpu/drm/tegra/vic.c
@@ -161,7 +161,7 @@ static int vic_boot(struct vic *vic)
 static int vic_init(struct host1x_client *client)
 {
 	struct tegra_drm_client *drm = host1x_to_drm_client(client);
-	struct drm_device *dev = dev_get_drvdata(client->parent);
+	struct drm_device *dev = dev_get_drvdata(client->host);
 	struct tegra_drm *tegra = dev->dev_private;
 	struct vic *vic = to_vic(drm);
 	int err;
@@ -190,9 +190,9 @@ static int vic_init(struct host1x_client *client)
 
 	/*
 	 * Inherit the DMA parameters (such as maximum segment size) from the
-	 * parent device.
+	 * parent host1x device.
 	 */
-	client->dev->dma_parms = client->parent->dma_parms;
+	client->dev->dma_parms = client->host->dma_parms;
 
 	return 0;
 
@@ -209,7 +209,7 @@ detach:
 static int vic_exit(struct host1x_client *client)
 {
 	struct tegra_drm_client *drm = host1x_to_drm_client(client);
-	struct drm_device *dev = dev_get_drvdata(client->parent);
+	struct drm_device *dev = dev_get_drvdata(client->host);
 	struct tegra_drm *tegra = dev->dev_private;
 	struct vic *vic = to_vic(drm);
 	int err;
diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c b/drivers/gpu/drm/virtio/virtgpu_plane.c
index ac42c84d2d7f..d1c3f5fbfee4 100644
--- a/drivers/gpu/drm/virtio/virtgpu_plane.c
+++ b/drivers/gpu/drm/virtio/virtgpu_plane.c
@@ -260,6 +260,7 @@ static void virtio_gpu_cursor_plane_update(struct drm_plane *plane,
 		if (!objs)
 			return;
 		virtio_gpu_array_add_obj(objs, vgfb->base.obj[0]);
+		virtio_gpu_array_lock_resv(objs);
 		virtio_gpu_cmd_transfer_to_host_2d
 			(vgdev, 0,
 			 plane->state->crtc_w,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c
index 4ac55fc2bf97..44d858ce4ce7 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c
@@ -209,8 +209,10 @@ int vmw_cmdbuf_res_add(struct vmw_cmdbuf_res_manager *man,
 
 	cres->hash.key = user_key | (res_type << 24);
 	ret = drm_ht_insert_item(&man->resources, &cres->hash);
-	if (unlikely(ret != 0))
+	if (unlikely(ret != 0)) {
+		kfree(cres);
 		goto out_invalid_key;
+	}
 
 	cres->state = VMW_CMDBUF_RES_ADD;
 	cres->res = vmw_resource_reference(res);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index e962048f65d2..827458f49112 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -28,10 +28,10 @@
 #include <linux/console.h>
 #include <linux/dma-mapping.h>
 #include <linux/module.h>
+#include <linux/pci.h>
 
 #include <drm/drm_drv.h>
 #include <drm/drm_ioctl.h>
-#include <drm/drm_pci.h>
 #include <drm/drm_sysfs.h>
 #include <drm/ttm/ttm_bo_driver.h>
 #include <drm/ttm/ttm_module.h>
@@ -150,6 +150,9 @@
 #define DRM_IOCTL_VMW_GB_SURFACE_REF_EXT				\
 	DRM_IOWR(DRM_COMMAND_BASE + DRM_VMW_GB_SURFACE_REF_EXT,		\
 		union drm_vmw_gb_surface_reference_ext_arg)
+#define DRM_IOCTL_VMW_MSG						\
+	DRM_IOWR(DRM_COMMAND_BASE + DRM_VMW_MSG,			\
+		struct drm_vmw_msg_arg)
 
 /**
  * The core DRM version of this macro doesn't account for
@@ -165,9 +168,9 @@
 
 static const struct drm_ioctl_desc vmw_ioctls[] = {
 	VMW_IOCTL_DEF(VMW_GET_PARAM, vmw_getparam_ioctl,
-		      DRM_AUTH | DRM_RENDER_ALLOW),
+		      DRM_RENDER_ALLOW),
 	VMW_IOCTL_DEF(VMW_ALLOC_DMABUF, vmw_bo_alloc_ioctl,
-		      DRM_AUTH | DRM_RENDER_ALLOW),
+		      DRM_RENDER_ALLOW),
 	VMW_IOCTL_DEF(VMW_UNREF_DMABUF, vmw_bo_unref_ioctl,
 		      DRM_RENDER_ALLOW),
 	VMW_IOCTL_DEF(VMW_CURSOR_BYPASS,
@@ -182,16 +185,16 @@ static const struct drm_ioctl_desc vmw_ioctls[] = {
 		      DRM_MASTER),
 
 	VMW_IOCTL_DEF(VMW_CREATE_CONTEXT, vmw_context_define_ioctl,
-		      DRM_AUTH | DRM_RENDER_ALLOW),
+		      DRM_RENDER_ALLOW),
 	VMW_IOCTL_DEF(VMW_UNREF_CONTEXT, vmw_context_destroy_ioctl,
 		      DRM_RENDER_ALLOW),
 	VMW_IOCTL_DEF(VMW_CREATE_SURFACE, vmw_surface_define_ioctl,
-		      DRM_AUTH | DRM_RENDER_ALLOW),
+		      DRM_RENDER_ALLOW),
 	VMW_IOCTL_DEF(VMW_UNREF_SURFACE, vmw_surface_destroy_ioctl,
 		      DRM_RENDER_ALLOW),
 	VMW_IOCTL_DEF(VMW_REF_SURFACE, vmw_surface_reference_ioctl,
-		      DRM_AUTH | DRM_RENDER_ALLOW),
-	VMW_IOCTL_DEF(VMW_EXECBUF, vmw_execbuf_ioctl, DRM_AUTH |
+		      DRM_RENDER_ALLOW),
+	VMW_IOCTL_DEF(VMW_EXECBUF, vmw_execbuf_ioctl,
 		      DRM_RENDER_ALLOW),
 	VMW_IOCTL_DEF(VMW_FENCE_WAIT, vmw_fence_obj_wait_ioctl,
 		      DRM_RENDER_ALLOW),
@@ -201,9 +204,9 @@ static const struct drm_ioctl_desc vmw_ioctls[] = {
 	VMW_IOCTL_DEF(VMW_FENCE_UNREF, vmw_fence_obj_unref_ioctl,
 		      DRM_RENDER_ALLOW),
 	VMW_IOCTL_DEF(VMW_FENCE_EVENT, vmw_fence_event_ioctl,
-		      DRM_AUTH | DRM_RENDER_ALLOW),
+		      DRM_RENDER_ALLOW),
 	VMW_IOCTL_DEF(VMW_GET_3D_CAP, vmw_get_cap_3d_ioctl,
-		      DRM_AUTH | DRM_RENDER_ALLOW),
+		      DRM_RENDER_ALLOW),
 
 	/* these allow direct access to the framebuffers mark as master only */
 	VMW_IOCTL_DEF(VMW_PRESENT, vmw_present_ioctl,
@@ -221,28 +224,31 @@ static const struct drm_ioctl_desc vmw_ioctls[] = {
 		      DRM_RENDER_ALLOW),
 	VMW_IOCTL_DEF(VMW_CREATE_SHADER,
 		      vmw_shader_define_ioctl,
-		      DRM_AUTH | DRM_RENDER_ALLOW),
+		      DRM_RENDER_ALLOW),
 	VMW_IOCTL_DEF(VMW_UNREF_SHADER,
 		      vmw_shader_destroy_ioctl,
 		      DRM_RENDER_ALLOW),
 	VMW_IOCTL_DEF(VMW_GB_SURFACE_CREATE,
 		      vmw_gb_surface_define_ioctl,
-		      DRM_AUTH | DRM_RENDER_ALLOW),
+		      DRM_RENDER_ALLOW),
 	VMW_IOCTL_DEF(VMW_GB_SURFACE_REF,
 		      vmw_gb_surface_reference_ioctl,
-		      DRM_AUTH | DRM_RENDER_ALLOW),
+		      DRM_RENDER_ALLOW),
 	VMW_IOCTL_DEF(VMW_SYNCCPU,
 		      vmw_user_bo_synccpu_ioctl,
 		      DRM_RENDER_ALLOW),
 	VMW_IOCTL_DEF(VMW_CREATE_EXTENDED_CONTEXT,
 		      vmw_extended_context_define_ioctl,
-		      DRM_AUTH | DRM_RENDER_ALLOW),
+		      DRM_RENDER_ALLOW),
 	VMW_IOCTL_DEF(VMW_GB_SURFACE_CREATE_EXT,
 		      vmw_gb_surface_define_ext_ioctl,
-		      DRM_AUTH | DRM_RENDER_ALLOW),
+		      DRM_RENDER_ALLOW),
 	VMW_IOCTL_DEF(VMW_GB_SURFACE_REF_EXT,
 		      vmw_gb_surface_reference_ext_ioctl,
-		      DRM_AUTH | DRM_RENDER_ALLOW),
+		      DRM_RENDER_ALLOW),
+	VMW_IOCTL_DEF(VMW_MSG,
+		      vmw_msg_ioctl,
+		      DRM_RENDER_ALLOW),
 };
 
 static const struct pci_device_id vmw_pci_id_list[] = {
@@ -1211,8 +1217,10 @@ static void vmw_remove(struct pci_dev *pdev)
 {
 	struct drm_device *dev = pci_get_drvdata(pdev);
 
+	drm_dev_unregister(dev);
+	vmw_driver_unload(dev);
+	drm_dev_put(dev);
 	pci_disable_device(pdev);
-	drm_put_dev(dev);
 }
 
 static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val,
@@ -1391,8 +1399,6 @@ static const struct file_operations vmwgfx_driver_fops = {
 static struct drm_driver driver = {
 	.driver_features =
 	DRIVER_MODESET | DRIVER_RENDER | DRIVER_ATOMIC,
-	.load = vmw_driver_load,
-	.unload = vmw_driver_unload,
 	.get_vblank_counter = vmw_get_vblank_counter,
 	.enable_vblank = vmw_enable_vblank,
 	.disable_vblank = vmw_disable_vblank,
@@ -1431,7 +1437,39 @@ static struct pci_driver vmw_pci_driver = {
 
 static int vmw_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
-	return drm_get_pci_dev(pdev, ent, &driver);
+	struct drm_device *dev;
+	int ret;
+
+	ret = pci_enable_device(pdev);
+	if (ret)
+		return ret;
+
+	dev = drm_dev_alloc(&driver, &pdev->dev);
+	if (IS_ERR(dev)) {
+		ret = PTR_ERR(dev);
+		goto err_pci_disable_device;
+	}
+
+	dev->pdev = pdev;
+	pci_set_drvdata(pdev, dev);
+
+	ret = vmw_driver_load(dev, ent->driver_data);
+	if (ret)
+		goto err_drm_dev_put;
+
+	ret = drm_dev_register(dev, ent->driver_data);
+	if (ret)
+		goto err_vmw_driver_unload;
+
+	return 0;
+
+err_vmw_driver_unload:
+	vmw_driver_unload(dev);
+err_drm_dev_put:
+	drm_dev_put(dev);
+err_pci_disable_device:
+	pci_disable_device(pdev);
+	return ret;
 }
 
 static int __init vmwgfx_init(void)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index a31e726d6d71..86b69397d166 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -56,9 +56,9 @@
 
 
 #define VMWGFX_DRIVER_NAME "vmwgfx"
-#define VMWGFX_DRIVER_DATE "20190328"
+#define VMWGFX_DRIVER_DATE "20200114"
 #define VMWGFX_DRIVER_MAJOR 2
-#define VMWGFX_DRIVER_MINOR 16
+#define VMWGFX_DRIVER_MINOR 17
 #define VMWGFX_DRIVER_PATCHLEVEL 0
 #define VMWGFX_FIFO_STATIC_SIZE (1024*1024)
 #define VMWGFX_MAX_RELOCATIONS 2048
@@ -1403,6 +1403,8 @@ int vmw_bo_cpu_blit(struct ttm_buffer_object *dst,
 int vmw_host_get_guestinfo(const char *guest_info_param,
 			   char *buffer, size_t *length);
 int vmw_host_log(const char *log);
+int vmw_msg_ioctl(struct drm_device *dev, void *data,
+		  struct drm_file *file_priv);
 
 /* VMW logging */
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 934ad7c0c342..73489a45decb 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -2377,9 +2377,12 @@ static int vmw_cmd_dx_clear_rendertarget_view(struct vmw_private *dev_priv,
 {
 	VMW_DECLARE_CMD_VAR(*cmd, SVGA3dCmdDXClearRenderTargetView) =
 		container_of(header, typeof(*cmd), header);
+	struct vmw_resource *ret;
 
-	return PTR_RET(vmw_view_id_val_add(sw_context, vmw_view_rt,
-					   cmd->body.renderTargetViewId));
+	ret = vmw_view_id_val_add(sw_context, vmw_view_rt,
+				  cmd->body.renderTargetViewId);
+
+	return PTR_ERR_OR_ZERO(ret);
 }
 
 /**
@@ -2396,9 +2399,12 @@ static int vmw_cmd_dx_clear_depthstencil_view(struct vmw_private *dev_priv,
 {
 	VMW_DECLARE_CMD_VAR(*cmd, SVGA3dCmdDXClearDepthStencilView) =
 		container_of(header, typeof(*cmd), header);
+	struct vmw_resource *ret;
+
+	ret = vmw_view_id_val_add(sw_context, vmw_view_ds,
+				  cmd->body.depthStencilViewId);
 
-	return PTR_RET(vmw_view_id_val_add(sw_context, vmw_view_ds,
-					   cmd->body.depthStencilViewId));
+	return PTR_ERR_OR_ZERO(ret);
 }
 
 static int vmw_cmd_dx_view_define(struct vmw_private *dev_priv,
@@ -2741,9 +2747,12 @@ static int vmw_cmd_dx_genmips(struct vmw_private *dev_priv,
 {
 	VMW_DECLARE_CMD_VAR(*cmd, SVGA3dCmdDXGenMips) =
 		container_of(header, typeof(*cmd), header);
+	struct vmw_resource *ret;
+
+	ret = vmw_view_id_val_add(sw_context, vmw_view_sr,
+				  cmd->body.shaderResourceViewId);
 
-	return PTR_RET(vmw_view_id_val_add(sw_context, vmw_view_sr,
-					   cmd->body.shaderResourceViewId));
+	return PTR_ERR_OR_ZERO(ret);
 }
 
 /**
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
index b6c5e4c2ac3c..e9f448a5ebb3 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
@@ -28,6 +28,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/mem_encrypt.h>
 
 #include <asm/hypervisor.h>
 
@@ -56,6 +57,8 @@
 
 #define HIGH_WORD(X) ((X & 0xFFFF0000) >> 16)
 
+#define MAX_USER_MSG_LENGTH	PAGE_SIZE
+
 static u32 vmw_msg_enabled = 1;
 
 enum rpc_msg_type {
@@ -148,7 +151,8 @@ static unsigned long vmw_port_hb_out(struct rpc_channel *channel,
 	unsigned long si, di, eax, ebx, ecx, edx;
 	unsigned long msg_len = strlen(msg);
 
-	if (hb) {
+	/* HB port can't access encrypted memory. */
+	if (hb && !mem_encrypt_active()) {
 		unsigned long bp = channel->cookie_high;
 
 		si = (uintptr_t) msg;
@@ -202,7 +206,8 @@ static unsigned long vmw_port_hb_in(struct rpc_channel *channel, char *reply,
 {
 	unsigned long si, di, eax, ebx, ecx, edx;
 
-	if (hb) {
+	/* HB port can't access encrypted memory */
+	if (hb && !mem_encrypt_active()) {
 		unsigned long bp = channel->cookie_low;
 
 		si = channel->cookie_high;
@@ -514,3 +519,84 @@ out_open:
 
 	return -EINVAL;
 }
+
+
+/**
+ * vmw_msg_ioctl: Sends and receveives a message to/from host from/to user-space
+ *
+ * Sends a message from user-space to host.
+ * Can also receive a result from host and return that to user-space.
+ *
+ * @dev: Identifies the drm device.
+ * @data: Pointer to the ioctl argument.
+ * @file_priv: Identifies the caller.
+ * Return: Zero on success, negative error code on error.
+ */
+
+int vmw_msg_ioctl(struct drm_device *dev, void *data,
+		  struct drm_file *file_priv)
+{
+	struct drm_vmw_msg_arg *arg =
+		(struct drm_vmw_msg_arg *) data;
+	struct rpc_channel channel;
+	char *msg;
+	int length;
+
+	msg = kmalloc(MAX_USER_MSG_LENGTH, GFP_KERNEL);
+	if (!msg) {
+		DRM_ERROR("Cannot allocate memory for log message.\n");
+		return -ENOMEM;
+	}
+
+	length = strncpy_from_user(msg, (void __user *)((unsigned long)arg->send),
+				   MAX_USER_MSG_LENGTH);
+	if (length < 0 || length >= MAX_USER_MSG_LENGTH) {
+		DRM_ERROR("Userspace message access failure.\n");
+		kfree(msg);
+		return -EINVAL;
+	}
+
+
+	if (vmw_open_channel(&channel, RPCI_PROTOCOL_NUM)) {
+		DRM_ERROR("Failed to open channel.\n");
+		goto out_open;
+	}
+
+	if (vmw_send_msg(&channel, msg)) {
+		DRM_ERROR("Failed to send message to host.\n");
+		goto out_msg;
+	}
+
+	if (!arg->send_only) {
+		char *reply = NULL;
+		size_t reply_len = 0;
+
+		if (vmw_recv_msg(&channel, (void *) &reply, &reply_len)) {
+			DRM_ERROR("Failed to receive message from host.\n");
+			goto out_msg;
+		}
+		if (reply && reply_len > 0) {
+			if (copy_to_user((void __user *)((unsigned long)arg->receive),
+							 reply, reply_len)) {
+				DRM_ERROR("Failed to copy message to userspace.\n");
+				kfree(reply);
+				goto out_msg;
+			}
+			arg->receive_len = (__u32)reply_len;
+		}
+		kfree(reply);
+	}
+
+	vmw_close_channel(&channel);
+	kfree(msg);
+
+	return 0;
+
+out_msg:
+	vmw_close_channel(&channel);
+out_open:
+	kfree(msg);
+
+	return -EINVAL;
+}
+
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
index 32b9131b2bae..3ce630aa4fde 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
@@ -934,16 +934,12 @@ vmw_surface_handle_reference(struct vmw_private *dev_priv,
 	uint32_t handle;
 	struct ttm_base_object *base;
 	int ret;
-	bool require_exist = false;
 
 	if (handle_type == DRM_VMW_HANDLE_PRIME) {
 		ret = ttm_prime_fd_to_handle(tfile, u_handle, &handle);
 		if (unlikely(ret != 0))
 			return ret;
 	} else {
-		if (unlikely(drm_is_render_client(file_priv)))
-			require_exist = true;
-
 		handle = u_handle;
 	}
 
@@ -960,9 +956,18 @@ vmw_surface_handle_reference(struct vmw_private *dev_priv,
 	}
 
 	if (handle_type != DRM_VMW_HANDLE_PRIME) {
+		bool require_exist = false;
+
 		user_srf = container_of(base, struct vmw_user_surface,
 					prime.base);
 
+		/* Error out if we are unauthenticated primary */
+		if (drm_is_primary_client(file_priv) &&
+		    !file_priv->authenticated) {
+			ret = -EACCES;
+			goto out_bad_resource;
+		}
+
 		/*
 		 * Make sure the surface creator has the same
 		 * authenticating master, or is already registered with us.
@@ -971,6 +976,9 @@ vmw_surface_handle_reference(struct vmw_private *dev_priv,
 		    user_srf->master != file_priv->master)
 			require_exist = true;
 
+		if (unlikely(drm_is_render_client(file_priv)))
+			require_exist = true;
+
 		ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL,
 					 require_exist);
 		if (unlikely(ret != 0)) {
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
index ce288756531b..aa7e50f63b94 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
@@ -45,6 +45,10 @@ int vmw_mmap(struct file *filp, struct vm_area_struct *vma)
 
 	vma->vm_ops = &vmw_vm_ops;
 
+	/* Use VM_PFNMAP rather than VM_MIXEDMAP if not a COW mapping */
+	if ((vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) != VM_MAYWRITE)
+		vma->vm_flags = (vma->vm_flags & ~VM_MIXEDMAP) | VM_PFNMAP;
+
 	return 0;
 }